From 718ddbaa07a190b638817fa71de3c76ae04f8ab1 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 12:27:28 -0800
Subject: [PATCH 1/9] NFC: Extract HLSLBool_t and HLSLHalf_t into shared
 HLSLTestDataTypes.h

Move shared HLSL test type wrappers out of LongVectorTestData.h into a
common header that both LongVector and LinearAlgebra tests can include.
Also adds HLSLNorm_t, F8E4M3_t, and F8E5M2_t wrappers for full SM 6.10
linalg ComponentType coverage.

LongVectorTestData.h retains backward-compatible using declarations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../unittests/HLSLExec/HLSLTestDataTypes.h    | 511 ++++++++++++++++++
 .../unittests/HLSLExec/LongVectorTestData.h   | 232 +-------
 2 files changed, 517 insertions(+), 226 deletions(-)
 create mode 100644 tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
new file mode 100644
index 0000000000..f69b68e5a7
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -0,0 +1,511 @@
+#ifndef HLSLTESTDATATYPES_H
+#define HLSLTESTDATATYPES_H
+
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <ostream>
+
+#include <DirectXMath.h>
+#include <DirectXPackedVector.h>
+
+#include "dxc/Support/Global.h"
+
+// Shared HLSL type wrappers for use in execution tests.
+// These types bridge the gap between C++ and HLSL type representations.
+namespace HLSLTestDataTypes {
+
+// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
+// Take int32_t as a constuctor argument and convert it to bool when needed.
+// Comparisons cast to a bool because we only care if the bool representation is
+// true or false.
+struct HLSLBool_t {
+  HLSLBool_t() : Val(0) {}
+  HLSLBool_t(int32_t Val) : Val(Val) {}
+  HLSLBool_t(bool Val) : Val(Val) {}
+
+  bool operator==(const HLSLBool_t &Other) const {
+    return static_cast<bool>(Val) == static_cast<bool>(Other.Val);
+  }
+
+  bool operator!=(const HLSLBool_t &Other) const {
+    return static_cast<bool>(Val) != static_cast<bool>(Other.Val);
+  }
+
+  bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; }
+
+  bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; }
+
+  bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; }
+
+  bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; }
+
+  HLSLBool_t operator*(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val * Other.Val);
+  }
+
+  HLSLBool_t operator+(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val + Other.Val);
+  }
+
+  HLSLBool_t operator-(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val - Other.Val);
+  }
+
+  HLSLBool_t operator/(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val / Other.Val);
+  }
+
+  HLSLBool_t operator%(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val % Other.Val);
+  }
+
+  HLSLBool_t operator&&(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val && Other.Val);
+  }
+
+  HLSLBool_t operator||(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val || Other.Val);
+  }
+
+  bool AsBool() const { return static_cast<bool>(Val); }
+
+  operator bool() const { return AsBool(); }
+  operator int16_t() const { return (int16_t)(AsBool()); }
+  operator int32_t() const { return (int32_t)(AsBool()); }
+  operator int64_t() const { return (int64_t)(AsBool()); }
+  operator uint16_t() const { return (uint16_t)(AsBool()); }
+  operator uint32_t() const { return (uint32_t)(AsBool()); }
+  operator uint64_t() const { return (uint64_t)(AsBool()); }
+  operator float() const { return (float)(AsBool()); }
+  operator double() const { return (double)(AsBool()); }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) {
+    Os << static_cast<bool>(Obj.Val);
+    return Os;
+  }
+
+  // So we can construct std::strings using std::ostream
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) {
+    Os << static_cast<bool>(Obj.Val);
+    return Os;
+  }
+
+  int32_t Val = 0;
+};
+
+//  No native float16 type in C++ until C++23 . So we use uint16_t to represent
+//  it. Simple little wrapping struct to help handle the right behavior.
+struct HLSLHalf_t {
+  HLSLHalf_t() : Val(0) {}
+  HLSLHalf_t(const float F) {
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+  HLSLHalf_t(const double D) {
+    float F;
+    if (D >= std::numeric_limits<double>::max())
+      F = std::numeric_limits<float>::max();
+    else if (D <= std::numeric_limits<double>::lowest())
+      F = std::numeric_limits<float>::lowest();
+    else
+      F = static_cast<float>(D);
+
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+  HLSLHalf_t(const uint32_t U) {
+    float F = static_cast<float>(U);
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+
+  // PackedVector::HALF is a uint16. Make sure we don't ever accidentally
+  // convert one of these to a HLSLHalf_t by arithmetically converting it to a
+  // float.
+  HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
+
+  static double GetULP(HLSLHalf_t A) {
+    DXASSERT(!std::isnan(A) && !std::isinf(A),
+             "ULP of NaN or infinity is undefined");
+
+    HLSLHalf_t Next = A;
+    ++Next.Val;
+
+    double NextD = Next;
+    double AD = A;
+    return NextD - AD;
+  }
+
+  static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) {
+    HLSLHalf_t H;
+    H.Val = Half;
+    return H;
+  }
+
+  // Implicit conversion to float for use with things like std::acos, std::tan,
+  // etc
+  operator float() const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val);
+  }
+
+  bool operator==(const HLSLHalf_t &Other) const {
+    // Convert to floats to properly handle the '0 == -0' case which must
+    // compare to true but have different uint16_t values.
+    // That is, 0 == -0 is true. We store Val as a uint16_t.
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return A == B;
+  }
+
+  bool operator<(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator>(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  // Used by tolerance checks in the tests.
+  bool operator>(float F) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    return A > F;
+  }
+
+  bool operator<(float F) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    return A < F;
+  }
+
+  bool operator<=(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <=
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator>=(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >=
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; }
+
+  HLSLHalf_t operator*(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B));
+  }
+
+  HLSLHalf_t operator+(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B)));
+  }
+
+  HLSLHalf_t operator-(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B));
+  }
+
+  HLSLHalf_t operator/(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B));
+  }
+
+  HLSLHalf_t operator%(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    const float C = std::fmod(A, B);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C));
+  }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) {
+    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
+    return Os;
+  }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) {
+    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
+    return Os;
+  }
+
+  // HALF is an alias to uint16_t
+  DirectX::PackedVector::HALF Val = 0;
+};
+
+// Normalized type wrappers for SNorm [-1,1] and UNorm [0,1] interpretations.
+// Thin wrappers over floating-point types to enable type-distinct input sets.
+// The Signed parameter distinguishes SNorm (true) from UNorm (false).
+template <typename BaseT, bool Signed> struct HLSLNorm_t {
+  BaseT Val;
+
+  HLSLNorm_t() : Val() {}
+  HLSLNorm_t(BaseT V) : Val(V) {}
+
+  operator BaseT() const { return Val; }
+
+  HLSLNorm_t operator*(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val * O.Val);
+  }
+  HLSLNorm_t operator+(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val + O.Val);
+  }
+  HLSLNorm_t operator-(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val - O.Val);
+  }
+
+  bool operator==(const HLSLNorm_t &O) const { return Val == O.Val; }
+  bool operator!=(const HLSLNorm_t &O) const { return !(Val == O.Val); }
+  bool operator<(const HLSLNorm_t &O) const { return Val < O.Val; }
+  bool operator>(const HLSLNorm_t &O) const { return Val > O.Val; }
+  bool operator<=(const HLSLNorm_t &O) const { return Val <= O.Val; }
+  bool operator>=(const HLSLNorm_t &O) const { return Val >= O.Val; }
+
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLNorm_t &Obj) {
+    Os << Obj.Val;
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLNorm_t &Obj) {
+    Os << Obj.Val;
+    return Os;
+  }
+};
+
+using SNormF16_t = HLSLNorm_t<HLSLHalf_t, true>;
+using UNormF16_t = HLSLNorm_t<HLSLHalf_t, false>;
+using SNormF32_t = HLSLNorm_t<float, true>;
+using UNormF32_t = HLSLNorm_t<float, false>;
+using SNormF64_t = HLSLNorm_t<double, true>;
+using UNormF64_t = HLSLNorm_t<double, false>;
+
+// FP8 E4M3 type wrapper (1 sign, 4 exponent, 3 mantissa, bias 7).
+// Range: [-448, 448]. No Inf; only NaN (0x7F/0xFF).
+struct F8E4M3_t {
+  uint8_t Val;
+
+  F8E4M3_t() : Val(0) {}
+  F8E4M3_t(float F) { Val = FloatToF8E4M3(F); }
+
+  operator float() const { return F8E4M3ToFloat(Val); }
+
+  F8E4M3_t operator*(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) * float(O));
+  }
+  F8E4M3_t operator+(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) + float(O));
+  }
+  F8E4M3_t operator-(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) - float(O));
+  }
+
+  bool operator==(const F8E4M3_t &O) const { return Val == O.Val; }
+  bool operator!=(const F8E4M3_t &O) const { return Val != O.Val; }
+  bool operator<(const F8E4M3_t &O) const {
+    return float(*this) < float(O);
+  }
+  bool operator>(const F8E4M3_t &O) const {
+    return float(*this) > float(O);
+  }
+  bool operator<=(const F8E4M3_t &O) const {
+    return float(*this) <= float(O);
+  }
+  bool operator>=(const F8E4M3_t &O) const {
+    return float(*this) >= float(O);
+  }
+
+  friend std::ostream &operator<<(std::ostream &Os, const F8E4M3_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const F8E4M3_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+
+private:
+  static float F8E4M3ToFloat(uint8_t V) {
+    uint8_t Sign = (V >> 7) & 1;
+    uint8_t Exp = (V >> 3) & 0xF;
+    uint8_t Mant = V & 0x7;
+
+    if (Exp == 0xF && Mant == 0x7)
+      return std::numeric_limits<float>::quiet_NaN();
+
+    float Result;
+    if (Exp == 0)
+      Result = std::ldexp(static_cast<float>(Mant), -9);
+    else
+      Result = std::ldexp(1.0f + static_cast<float>(Mant) / 8.0f, Exp - 7);
+
+    return Sign ? -Result : Result;
+  }
+
+  static uint8_t FloatToF8E4M3(float F) {
+    if (std::isnan(F))
+      return 0x7F;
+
+    uint8_t Sign = 0;
+    if (F < 0.0f) {
+      Sign = 1;
+      F = -F;
+    }
+
+    if (F == 0.0f)
+      return Sign << 7;
+
+    // Clamp to max representable (E=15, M=6 → 448).
+    if (F >= 448.0f)
+      return (Sign << 7) | (0xF << 3) | 0x6;
+
+    int Exp;
+    float Frac = std::frexp(F, &Exp);
+    int BiasedExp = Exp + 6;
+
+    if (BiasedExp <= 0) {
+      int Mant = static_cast<int>(std::round(F * 512.0f));
+      if (Mant > 7)
+        Mant = 7;
+      if (Mant < 0)
+        Mant = 0;
+      return (Sign << 7) | static_cast<uint8_t>(Mant);
+    }
+
+    float Significand = 2.0f * Frac;
+    int Mant = static_cast<int>(std::round((Significand - 1.0f) * 8.0f));
+
+    if (Mant >= 8) {
+      Mant = 0;
+      BiasedExp++;
+    }
+
+    if (BiasedExp >= 15) {
+      if (BiasedExp > 15 || Mant > 6)
+        return (Sign << 7) | (0xF << 3) | 0x6;
+    }
+
+    return (Sign << 7) | (static_cast<uint8_t>(BiasedExp) << 3) |
+           static_cast<uint8_t>(Mant);
+  }
+};
+
+// FP8 E5M2 type wrapper (1 sign, 5 exponent, 2 mantissa, bias 15).
+// Range: [-57344, 57344]. Has Inf and NaN (like IEEE 754).
+struct F8E5M2_t {
+  uint8_t Val;
+
+  F8E5M2_t() : Val(0) {}
+  F8E5M2_t(float F) { Val = FloatToF8E5M2(F); }
+
+  operator float() const { return F8E5M2ToFloat(Val); }
+
+  F8E5M2_t operator*(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) * float(O));
+  }
+  F8E5M2_t operator+(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) + float(O));
+  }
+  F8E5M2_t operator-(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) - float(O));
+  }
+
+  bool operator==(const F8E5M2_t &O) const { return Val == O.Val; }
+  bool operator!=(const F8E5M2_t &O) const { return Val != O.Val; }
+  bool operator<(const F8E5M2_t &O) const {
+    return float(*this) < float(O);
+  }
+  bool operator>(const F8E5M2_t &O) const {
+    return float(*this) > float(O);
+  }
+  bool operator<=(const F8E5M2_t &O) const {
+    return float(*this) <= float(O);
+  }
+  bool operator>=(const F8E5M2_t &O) const {
+    return float(*this) >= float(O);
+  }
+
+  friend std::ostream &operator<<(std::ostream &Os, const F8E5M2_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const F8E5M2_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+
+private:
+  static float F8E5M2ToFloat(uint8_t V) {
+    uint8_t Sign = (V >> 7) & 1;
+    uint8_t Exp = (V >> 2) & 0x1F;
+    uint8_t Mant = V & 0x3;
+
+    if (Exp == 0x1F) {
+      if (Mant == 0)
+        return Sign ? -std::numeric_limits<float>::infinity()
+                    : std::numeric_limits<float>::infinity();
+      return std::numeric_limits<float>::quiet_NaN();
+    }
+
+    float Result;
+    if (Exp == 0)
+      Result = std::ldexp(static_cast<float>(Mant), -16);
+    else
+      Result = std::ldexp(1.0f + static_cast<float>(Mant) / 4.0f, Exp - 15);
+
+    return Sign ? -Result : Result;
+  }
+
+  static uint8_t FloatToF8E5M2(float F) {
+    if (std::isnan(F))
+      return 0x7F;
+
+    uint8_t Sign = 0;
+    if (F < 0.0f) {
+      Sign = 1;
+      F = -F;
+    }
+
+    if (std::isinf(F))
+      return (Sign << 7) | (0x1F << 2);
+
+    if (F == 0.0f)
+      return Sign << 7;
+
+    // Clamp to max representable (E=30, M=3 → 57344).
+    if (F >= 57344.0f)
+      return (Sign << 7) | (0x1E << 2) | 0x3;
+
+    int Exp;
+    float Frac = std::frexp(F, &Exp);
+    int BiasedExp = Exp + 14;
+
+    if (BiasedExp <= 0) {
+      int Mant = static_cast<int>(std::round(F * 65536.0f));
+      if (Mant > 3)
+        Mant = 3;
+      if (Mant < 0)
+        Mant = 0;
+      return (Sign << 7) | static_cast<uint8_t>(Mant);
+    }
+
+    float Significand = 2.0f * Frac;
+    int Mant = static_cast<int>(std::round((Significand - 1.0f) * 4.0f));
+
+    if (Mant >= 4) {
+      Mant = 0;
+      BiasedExp++;
+    }
+
+    if (BiasedExp >= 31)
+      return (Sign << 7) | (0x1F << 2);
+
+    return (Sign << 7) | (static_cast<uint8_t>(BiasedExp) << 2) |
+           static_cast<uint8_t>(Mant);
+  }
+};
+
+} // namespace HLSLTestDataTypes
+
+#endif // HLSLTESTDATATYPES_H
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index 4126d861ac..cd58e05814 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -5,237 +5,16 @@
 
 #include <limits>
 #include <map>
-#include <ostream>
 #include <string>
 #include <vector>
 
-#include <DirectXMath.h>
-#include <DirectXPackedVector.h>
-
-#include "dxc/Support/Global.h"
+#include "HLSLTestDataTypes.h"
 
 namespace LongVector {
 
-// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
-// Take int32_t as a constuctor argument and convert it to bool when needed.
-// Comparisons cast to a bool because we only care if the bool representation is
-// true or false.
-struct HLSLBool_t {
-  HLSLBool_t() : Val(0) {}
-  HLSLBool_t(int32_t Val) : Val(Val) {}
-  HLSLBool_t(bool Val) : Val(Val) {}
-
-  bool operator==(const HLSLBool_t &Other) const {
-    return static_cast<bool>(Val) == static_cast<bool>(Other.Val);
-  }
-
-  bool operator!=(const HLSLBool_t &Other) const {
-    return static_cast<bool>(Val) != static_cast<bool>(Other.Val);
-  }
-
-  bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; }
-
-  bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; }
-
-  bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; }
-
-  bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; }
-
-  HLSLBool_t operator*(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val * Other.Val);
-  }
-
-  HLSLBool_t operator+(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val + Other.Val);
-  }
-
-  HLSLBool_t operator-(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val - Other.Val);
-  }
-
-  HLSLBool_t operator/(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val / Other.Val);
-  }
-
-  HLSLBool_t operator%(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val % Other.Val);
-  }
-
-  HLSLBool_t operator&&(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val && Other.Val);
-  }
-
-  HLSLBool_t operator||(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val || Other.Val);
-  }
-
-  bool AsBool() const { return static_cast<bool>(Val); }
-
-  operator bool() const { return AsBool(); }
-  operator int16_t() const { return (int16_t)(AsBool()); }
-  operator int32_t() const { return (int32_t)(AsBool()); }
-  operator int64_t() const { return (int64_t)(AsBool()); }
-  operator uint16_t() const { return (uint16_t)(AsBool()); }
-  operator uint32_t() const { return (uint32_t)(AsBool()); }
-  operator uint64_t() const { return (uint64_t)(AsBool()); }
-  operator float() const { return (float)(AsBool()); }
-  operator double() const { return (double)(AsBool()); }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) {
-    Os << static_cast<bool>(Obj.Val);
-    return Os;
-  }
-
-  // So we can construct std::strings using std::ostream
-  friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) {
-    Os << static_cast<bool>(Obj.Val);
-    return Os;
-  }
-
-  int32_t Val = 0;
-};
-
-//  No native float16 type in C++ until C++23 . So we use uint16_t to represent
-//  it. Simple little wrapping struct to help handle the right behavior.
-struct HLSLHalf_t {
-  HLSLHalf_t() : Val(0) {}
-  HLSLHalf_t(const float F) {
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-  HLSLHalf_t(const double D) {
-    float F;
-    if (D >= std::numeric_limits<double>::max())
-      F = std::numeric_limits<float>::max();
-    else if (D <= std::numeric_limits<double>::lowest())
-      F = std::numeric_limits<float>::lowest();
-    else
-      F = static_cast<float>(D);
-
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-  HLSLHalf_t(const uint32_t U) {
-    float F = static_cast<float>(U);
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-
-  // PackedVector::HALF is a uint16. Make sure we don't ever accidentally
-  // convert one of these to a HLSLHalf_t by arithmetically converting it to a
-  // float.
-  HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
-
-  static double GetULP(HLSLHalf_t A) {
-    DXASSERT(!std::isnan(A) && !std::isinf(A),
-             "ULP of NaN or infinity is undefined");
-
-    HLSLHalf_t Next = A;
-    ++Next.Val;
-
-    double NextD = Next;
-    double AD = A;
-    return NextD - AD;
-  }
-
-  static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) {
-    HLSLHalf_t H;
-    H.Val = Half;
-    return H;
-  }
-
-  // Implicit conversion to float for use with things like std::acos, std::tan,
-  // etc
-  operator float() const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val);
-  }
-
-  bool operator==(const HLSLHalf_t &Other) const {
-    // Convert to floats to properly handle the '0 == -0' case which must
-    // compare to true but have different uint16_t values.
-    // That is, 0 == -0 is true. We store Val as a uint16_t.
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return A == B;
-  }
-
-  bool operator<(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator>(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  // Used by tolerance checks in the tests.
-  bool operator>(float F) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    return A > F;
-  }
-
-  bool operator<(float F) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    return A < F;
-  }
-
-  bool operator<=(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <=
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator>=(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >=
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; }
-
-  HLSLHalf_t operator*(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B));
-  }
-
-  HLSLHalf_t operator+(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B)));
-  }
-
-  HLSLHalf_t operator-(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B));
-  }
-
-  HLSLHalf_t operator/(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B));
-  }
-
-  HLSLHalf_t operator%(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    const float C = std::fmod(A, B);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C));
-  }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) {
-    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
-    return Os;
-  }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) {
-    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
-    return Os;
-  }
-
-  // HALF is an alias to uint16_t
-  DirectX::PackedVector::HALF Val = 0;
-};
+// Import shared HLSL type wrappers into LongVector namespace.
+using HLSLTestDataTypes::HLSLBool_t;
+using HLSLTestDataTypes::HLSLHalf_t;
 
 enum class InputSet {
 #define INPUT_SET(SYMBOL) SYMBOL,
@@ -247,7 +26,8 @@ template <typename T> const std::vector<T> &getInputSet(InputSet InputSet) {
 }
 
 #define BEGIN_INPUT_SETS(TYPE)                                                 \
-  template <> const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {  \
+  template <>                                                                  \
+  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {       \
     using T = TYPE;                                                            \
     switch (InputSet) {
 

From 6940b2fca1b8b2923b34994a2f3548469eb33e7f Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 12:27:38 -0800
Subject: [PATCH 2/9] Add LinearAlgebra TAEF execution tests for SM 6.10 linalg
 operations

Adds a test scaffold for linear algebra matrix operations, following the
same architectural patterns as the LongVector tests:
- Op/ExpectedBuilder structs with ValidationConfig
- dispatchTest/runAndVerify separation
- Macro-driven InputSet data tables for all SM 6.10 ComponentTypes
- Multiple matrix dimension coverage (2x2, 4x4, 4x8, 8x4, 8x8)

Initial ops: FillMatrix, MatrixStore, MatrixAccumulate, MatrixMul.
Intended as a framework for the senior developer implementing linalg
functionality in DXC to write execution tests against.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tools/clang/unittests/HLSLExec/CMakeLists.txt |   1 +
 .../unittests/HLSLExec/LinearAlgebra.cpp      | 656 ++++++++++++++++++
 .../unittests/HLSLExec/LinearAlgebraOps.def   |  26 +
 .../HLSLExec/LinearAlgebraTestData.h          | 222 ++++++
 .../unittests/HLSLExec/ShaderOpArith.xml      | 145 ++++
 5 files changed, 1050 insertions(+)
 create mode 100644 tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
 create mode 100644 tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
 create mode 100644 tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h

diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt
index 8282fd5282..216799a464 100644
--- a/tools/clang/unittests/HLSLExec/CMakeLists.txt
+++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_library(ExecHLSLTests SHARED
   ShaderOpTest.cpp
   TableParameterHandler.cpp
   LongVectors.cpp
+  LinearAlgebra.cpp
   HlslExecTestUtils.cpp
   ExecHLSLTests.rc
   )
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
new file mode 100644
index 0000000000..148e454365
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -0,0 +1,656 @@
+#ifndef NOMINMAX
+#define NOMINMAX 1
+#endif
+
+#define INLINE_TEST_METHOD_MARKUP
+#include <WexTestClass.h>
+
+#include "LinearAlgebraTestData.h"
+
+#include "ShaderOpTest.h"
+#include "dxc/Support/Global.h"
+
+#include "HlslTestUtils.h"
+
+#include "HlslExecTestUtils.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+using namespace HLSLTestDataTypes; // For HLSLHalf_t, HLSLBool_t
+
+namespace LinearAlgebra {
+
+//
+// Operation Types
+//
+
+enum class OpType : unsigned {
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)       \
+  SYMBOL,
+#include "LinearAlgebraOps.def"
+  NumOpTypes
+};
+
+struct Operation {
+  size_t Arity;
+  const char *Define;
+  const char *ShaderName;
+  InputSet InputSets[2];
+  OpType Type;
+};
+
+static constexpr Operation Operations[] = {
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)       \
+  {ARITY, DEFINE, SHADER_NAME,                                                 \
+   {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2}, OpType::SYMBOL},
+#include "LinearAlgebraOps.def"
+};
+
+constexpr const Operation &getOperation(OpType Op) {
+  if (Op < OpType::NumOpTypes)
+    return Operations[unsigned(Op)];
+  std::abort();
+}
+
+//
+// Data Types
+//
+
+struct DataType {
+  const char *HLSLTypeString;
+  const char *CompTypeString;
+  bool Is16Bit;
+  size_t HLSLSizeInBytes;
+};
+
+template <typename T> const DataType &getDataType() {
+  static_assert(false && "Unknown data type");
+}
+
+#define DATA_TYPE(TYPE, HLSL_STRING, COMP_TYPE, HLSL_SIZE, IS_16BIT)           \
+  template <> const DataType &getDataType<TYPE>() {                            \
+    static DataType DT{HLSL_STRING, COMP_TYPE, IS_16BIT, HLSL_SIZE};          \
+    return DT;                                                                 \
+  }
+
+DATA_TYPE(HLSLHalf_t, "float16_t", "ComponentType::F16", 2, true)
+DATA_TYPE(float, "float", "ComponentType::F32", 4, false)
+DATA_TYPE(double, "double", "ComponentType::F64", 8, false)
+DATA_TYPE(int32_t, "int", "ComponentType::I32", 4, false)
+DATA_TYPE(uint32_t, "uint", "ComponentType::U32", 4, false)
+
+#undef DATA_TYPE
+
+template <typename T> constexpr bool isFloatingPointType() {
+  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
+         std::is_same_v<T, HLSLHalf_t>;
+}
+
+//
+// Validation
+//
+
+enum class ValidationType { Epsilon, Ulp };
+
+struct ValidationConfig {
+  double Tolerance = 0.0;
+  ValidationType Type = ValidationType::Epsilon;
+
+  static ValidationConfig Epsilon(double Tol) {
+    return {Tol, ValidationType::Epsilon};
+  }
+
+  static ValidationConfig Ulp(double Tol) {
+    return {Tol, ValidationType::Ulp};
+  }
+};
+
+// Default validation: ULP for floating point, exact for integers.
+template <typename T> struct DefaultValidation {
+  ValidationConfig ValidationConfig;
+
+  DefaultValidation() {
+    if constexpr (isFloatingPointType<T>())
+      ValidationConfig = ValidationConfig::Ulp(1.0);
+  }
+};
+
+// Strict validation: exact match.
+struct StrictValidation {
+  ValidationConfig ValidationConfig;
+};
+
+//
+// Value comparison overloads following LongVector patterns.
+//
+
+template <typename T>
+bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
+  if (Tolerance == 0.0)
+    return A == B;
+
+  T Diff = A > B ? A - B : B - A;
+  return Diff <= Tolerance;
+}
+
+bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
+                   ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
+  case ValidationType::Ulp:
+    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
+  default:
+    return false;
+  }
+}
+
+bool doValuesMatch(float A, float B, double Tolerance, ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
+  case ValidationType::Ulp:
+    return CompareFloatULP(A, B, static_cast<int>(Tolerance));
+  default:
+    return false;
+  }
+}
+
+bool doValuesMatch(double A, double B, double Tolerance,
+                   ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareDoubleEpsilon(A, B, Tolerance);
+  case ValidationType::Ulp:
+    return CompareDoubleULP(A, B, static_cast<int64_t>(Tolerance));
+  default:
+    return false;
+  }
+}
+
+template <typename T>
+bool doVectorsMatch(const std::vector<T> &Actual,
+                    const std::vector<T> &Expected,
+                    const ValidationConfig &Config, bool VerboseLogging) {
+  DXASSERT(Actual.size() == Expected.size(),
+           "Actual and Expected must be the same size");
+
+  if (VerboseLogging)
+    hlsl_test::LogCommentFmt(L"Verifying %zu elements", Actual.size());
+
+  std::vector<size_t> MismatchedIndexes;
+  for (size_t I = 0; I < Actual.size(); I++) {
+    if (!doValuesMatch(Actual[I], Expected[I], Config.Tolerance, Config.Type))
+      MismatchedIndexes.push_back(I);
+  }
+
+  if (MismatchedIndexes.empty())
+    return true;
+
+  for (size_t Index : MismatchedIndexes) {
+    std::wstringstream Wss(L"");
+    Wss << std::setprecision(15);
+    Wss << L"Mismatch at Index: " << Index;
+    Wss << L" Actual:" << Actual[Index];
+    Wss << L" Expected:" << Expected[Index];
+    hlsl_test::LogErrorFmt(Wss.str().c_str());
+  }
+
+  return false;
+}
+
+//
+// Matrix dimensions for test iteration.
+//
+
+struct MatrixDims {
+  size_t Rows;
+  size_t Cols;
+};
+
+std::vector<MatrixDims> getMatrixSizesToTest() {
+  return {{2, 2}, {4, 4}, {4, 8}, {8, 4}, {8, 8}};
+}
+
+//
+// Build compiler options.
+//
+
+std::string
+getCompilerOptionsString(const Operation &Op, const DataType &ElemType,
+                         size_t Rows, size_t Cols, size_t KDim = 0) {
+  std::stringstream Options;
+
+  if (ElemType.Is16Bit)
+    Options << " -enable-16bit-types";
+
+  Options << " -D" << Op.Define;
+  Options << " -DELEM_TYPE=" << ElemType.HLSLTypeString;
+  Options << " -DOUT_TYPE=" << ElemType.HLSLTypeString;
+  Options << " -DCOMP_TYPE=" << ElemType.CompTypeString;
+  Options << " -DROWS=" << Rows;
+  Options << " -DCOLS=" << Cols;
+
+  if (KDim > 0)
+    Options << " -DK_DIM=" << KDim;
+
+  Options << " -DMATRIX_LAYOUT=0";
+
+  return Options.str();
+}
+
+//
+// Shader buffer helpers.
+//
+
+template <typename T>
+void fillShaderBuffer(std::vector<BYTE> &ShaderBuffer,
+                      const std::vector<T> &Data) {
+  const size_t DataSize = sizeof(T) * Data.size();
+  DXASSERT_NOMSG(ShaderBuffer.size() >= DataSize);
+
+  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
+    auto *Ptr =
+        reinterpret_cast<DirectX::PackedVector::HALF *>(ShaderBuffer.data());
+    for (size_t I = 0; I < Data.size(); I++)
+      Ptr[I] = Data[I].Val;
+    return;
+  }
+
+  auto *Ptr = reinterpret_cast<T *>(ShaderBuffer.data());
+  for (size_t I = 0; I < Data.size(); I++)
+    Ptr[I] = Data[I];
+}
+
+template <typename T>
+void readShaderBuffer(const MappedData &ShaderBuffer, std::vector<T> &OutData,
+                      size_t NumElements) {
+  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
+    auto *Ptr =
+        static_cast<const DirectX::PackedVector::HALF *>(ShaderBuffer.data());
+    for (size_t I = 0; I < NumElements; I++)
+      OutData.push_back(HLSLHalf_t::FromHALF(Ptr[I]));
+    return;
+  }
+
+  auto *Ptr = static_cast<const T *>(ShaderBuffer.data());
+  for (size_t I = 0; I < NumElements; I++)
+    OutData.push_back(Ptr[I]);
+}
+
+//
+// Input building helpers. Following LongVector::buildTestInput pattern.
+//
+
+template <typename T> using InputSets = std::vector<std::vector<T>>;
+
+template <typename T>
+std::vector<T> buildTestInput(InputSet Set, size_t NumElements) {
+  const std::vector<T> &RawData = getInputSet<T>(Set);
+
+  std::vector<T> Result;
+  Result.reserve(NumElements);
+  for (size_t I = 0; I < NumElements; ++I)
+    Result.push_back(RawData[I % RawData.size()]);
+
+  return Result;
+}
+
+// Build an identity matrix of the given dimensions using the Identity InputSet
+// for the diagonal value.
+template <typename T>
+std::vector<T> buildIdentityMatrix(size_t Rows, size_t Cols) {
+  const T One = getInputSet<T>(InputSet::Identity)[0];
+  const T Zero = One - One;
+  std::vector<T> Result(Rows * Cols, Zero);
+  size_t MinDim = Rows < Cols ? Rows : Cols;
+  for (size_t I = 0; I < MinDim; ++I)
+    Result[I * Cols + I] = One;
+  return Result;
+}
+
+template <typename T>
+InputSets<T> buildTestInputs(const Operation &Op, size_t Rows, size_t Cols,
+                              size_t KDim) {
+  InputSets<T> Inputs;
+  const size_t NumElements = Rows * Cols;
+
+  if (Op.Arity >= 1)
+    Inputs.push_back(buildTestInput<T>(Op.InputSets[0], NumElements));
+
+  if (Op.Arity >= 2) {
+    // For binary ops the second input may be an identity matrix.
+    if (Op.InputSets[1] == InputSet::Identity)
+      Inputs.push_back(buildIdentityMatrix<T>(KDim, Cols));
+    else
+      Inputs.push_back(buildTestInput<T>(Op.InputSets[1], KDim * Cols));
+  }
+
+  return Inputs;
+}
+
+//
+// Core GPU test runner. Returns the output buffer or nullopt if skipped.
+//
+
+template <typename T>
+std::optional<std::vector<T>>
+runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging,
+              const Operation &Op, const InputSets<T> &Inputs, size_t Rows,
+              size_t Cols, size_t KDim, size_t ExpectedOutputSize) {
+
+  const DataType &ElemType = getDataType<T>();
+
+  std::string CompilerOptions =
+      getCompilerOptionsString(Op, ElemType, Rows, Cols, KDim);
+
+  if (VerboseLogging)
+    hlsl_test::LogCommentFmt(L"Compiler Options: %S",
+                             CompilerOptions.c_str());
+
+  dxc::SpecificDllLoader DxilDllLoader;
+  CComPtr<IStream> TestXML;
+  readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader);
+  auto ShaderOpSet = std::make_shared<st::ShaderOpSet>();
+  st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get());
+
+  std::shared_ptr<st::ShaderOpTestResult> TestResult =
+      st::RunShaderOpTestAfterParse(
+          D3DDevice, DxilDllLoader, Op.ShaderName,
+          [&](LPCSTR Name, std::vector<BYTE> &ShaderData,
+              st::ShaderOp *ShaderOp) {
+            if (VerboseLogging)
+              hlsl_test::LogCommentFmt(
+                  L"LinAlg RunShaderOpTest CallBack. Resource Name: %S", Name);
+
+            if (_stricmp(Name, "OutputMatrix") == 0) {
+              ShaderOp->Shaders.at(0).Arguments = CompilerOptions.c_str();
+              return;
+            }
+
+            for (size_t I = 0; I < 2; ++I) {
+              std::string BufferName = "InputMatrix";
+              BufferName += (char)('1' + I);
+              if (_stricmp(Name, BufferName.c_str()) == 0) {
+                if (I < Inputs.size() && !Inputs[I].empty())
+                  fillShaderBuffer(ShaderData, Inputs[I]);
+                return;
+              }
+            }
+
+            LOG_ERROR_FMT_THROW(
+                L"LinAlg RunShaderOpTest CallBack. Unexpected Resource: %S",
+                Name);
+          },
+          std::move(ShaderOpSet));
+
+  MappedData ShaderOutData;
+  TestResult->Test->GetReadBackData("OutputMatrix", &ShaderOutData);
+
+  std::vector<T> OutData;
+  readShaderBuffer(ShaderOutData, OutData, ExpectedOutputSize);
+
+  return OutData;
+}
+
+//
+// runAndVerify - runs the GPU test and verifies results.
+//
+
+template <typename T>
+void runAndVerify(ID3D12Device *D3DDevice, bool VerboseLogging,
+                  const Operation &Op, const InputSets<T> &Inputs,
+                  const std::vector<T> &Expected,
+                  const ValidationConfig &Config, size_t Rows, size_t Cols,
+                  size_t KDim) {
+
+  auto Actual = runLinAlgTest<T>(D3DDevice, VerboseLogging, Op, Inputs, Rows,
+                                 Cols, KDim, Expected.size());
+
+  if (!Actual) {
+    hlsl_test::LogCommentFmt(L"Test was skipped.");
+    return;
+  }
+
+  VERIFY_IS_TRUE(doVectorsMatch(*Actual, Expected, Config, VerboseLogging));
+}
+
+//
+// Op definitions. Each op carries a ValidationConfig.
+// Specializations are expected to have a ValidationConfig member.
+//
+
+template <OpType OP, typename T> struct Op;
+
+// ExpectedBuilder - specializations compute expected output from inputs.
+template <OpType OP, typename T> struct ExpectedBuilder;
+
+// FillMatrix: splat a scalar value across the entire matrix.
+template <typename T> struct Op<OpType::FillMatrix, T> : StrictValidation {};
+
+template <typename T> struct ExpectedBuilder<OpType::FillMatrix, T> {
+  static std::vector<T> buildExpected(Op<OpType::FillMatrix, T> &,
+                                      const InputSets<T> &, size_t Rows,
+                                      size_t Cols, size_t) {
+    const T FillVal = getInputSet<T>(InputSet::Fill)[0];
+    return std::vector<T>(Rows * Cols, FillVal);
+  }
+
+  // FillMatrix input is special: just the scalar fill value.
+  static InputSets<T> buildInputs(const Operation &, size_t, size_t, size_t) {
+    return {{getInputSet<T>(InputSet::Fill)[0]}};
+  }
+};
+
+// MatrixStore: load and store round-trip.
+template <typename T>
+struct Op<OpType::MatrixStore, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixStore, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixStore, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    return Inputs[0];
+  }
+};
+
+// MatrixAccumulate: accumulate into zero-initialized output.
+template <typename T>
+struct Op<OpType::MatrixAccumulate, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixAccumulate, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixAccumulate, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    return Inputs[0];
+  }
+};
+
+// MatrixMul: multiply input matrix by identity.
+template <typename T>
+struct Op<OpType::MatrixMul, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixMul, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixMul, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    // Multiplying by identity: result should equal Input1.
+    return Inputs[0];
+  }
+};
+
+//
+// dispatchTest - orchestrates building inputs, computing expected results,
+// and running the test across multiple matrix sizes.
+// Follows the same pattern as LongVector::dispatchTest.
+//
+
+template <typename T, OpType OP>
+void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging) {
+
+  const std::vector<MatrixDims> Sizes = getMatrixSizesToTest();
+  constexpr const Operation &Operation = getOperation(OP);
+  Op<OP, T> Op;
+
+  for (const MatrixDims &Dims : Sizes) {
+    const size_t Rows = Dims.Rows;
+    const size_t Cols = Dims.Cols;
+    const size_t KDim = (Operation.Arity >= 2) ? Cols : 0;
+
+    // FillMatrix has special input handling (scalar, not a matrix).
+    InputSets<T> Inputs;
+    if constexpr (OP == OpType::FillMatrix)
+      Inputs = ExpectedBuilder<OP, T>::buildInputs(Operation, Rows, Cols, KDim);
+    else
+      Inputs = buildTestInputs<T>(Operation, Rows, Cols, KDim);
+
+    auto Expected =
+        ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, Rows, Cols, KDim);
+
+    runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
+                 Op.ValidationConfig, Rows, Cols, KDim);
+  }
+}
+
+} // namespace LinearAlgebra
+
+using namespace LinearAlgebra;
+
+//
+// TAEF test entry point macro.
+//
+#define LINALG_TEST(Op, DataType)                                              \
+  TEST_METHOD(Op##_##DataType) { runTest<DataType, OpType::Op>(); }
+
+//
+// Common test class for linear algebra tests.
+// Follows the same pattern as LongVector::TestClassCommon.
+//
+class LinAlgTestClassCommon {
+public:
+  bool setupClass() {
+    WEX::TestExecution::SetVerifyOutput verifySettings(
+        WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+
+    if (!Initialized) {
+      Initialized = true;
+
+      D3D12SDK = D3D12SDKSelector();
+
+      WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging",
+                                                         VerboseLogging);
+      if (VerboseLogging)
+        hlsl_test::LogCommentFmt(
+            L"Verbose logging is enabled for this test.");
+      else
+        hlsl_test::LogCommentFmt(
+            L"Verbose logging is disabled for this test.");
+
+      bool FailIfRequirementsNotMet = false;
+#ifdef _HLK_CONF
+      FailIfRequirementsNotMet = true;
+#endif
+      WEX::TestExecution::RuntimeParameters::TryGetValue(
+          L"FailIfRequirementsNotMet", FailIfRequirementsNotMet);
+
+      const bool SkipUnsupported = !FailIfRequirementsNotMet;
+      // Linear algebra requires at least SM 6.9 device support.
+      if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9,
+                                  SkipUnsupported)) {
+        if (FailIfRequirementsNotMet)
+          hlsl_test::LogErrorFmt(
+              L"Device Creation failed, resulting in test failure, since "
+              L"FailIfRequirementsNotMet is set.");
+
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool setupMethod() {
+    if (D3DDevice && D3DDevice->GetDeviceRemovedReason() != S_OK) {
+      hlsl_test::LogCommentFmt(L"Device was lost!");
+      D3DDevice.Release();
+    }
+
+    if (!D3DDevice) {
+      hlsl_test::LogCommentFmt(L"Creating device");
+
+      const bool SkipUnsupported = false;
+      VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9,
+                                            SkipUnsupported));
+    }
+
+    return true;
+  }
+
+  template <typename T, OpType OP> void runTest() {
+    WEX::TestExecution::SetVerifyOutput verifySettings(
+        WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+
+    dispatchTest<T, OP>(D3DDevice, VerboseLogging);
+  }
+
+protected:
+  CComPtr<ID3D12Device> D3DDevice;
+
+private:
+  bool Initialized = false;
+  std::optional<D3D12SDKSelector> D3D12SDK;
+  bool VerboseLogging = false;
+};
+
+//
+// TAEF Test Class
+//
+class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon {
+public:
+  BEGIN_TEST_CLASS(DxilConf_SM610_LinearAlgebra)
+  TEST_CLASS_PROPERTY(
+      "Kits.TestName",
+      "D3D12 - Shader Model 6.10 - Linear Algebra Tests")
+  TEST_CLASS_PROPERTY("Kits.TestId", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+  TEST_CLASS_PROPERTY(
+      "Kits.Description",
+      "Validates SM 6.10 linear algebra matrix operations")
+  TEST_CLASS_PROPERTY(
+      "Kits.Specification",
+      "Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement")
+  TEST_METHOD_PROPERTY(L"Priority", L"0")
+  END_TEST_CLASS()
+
+  TEST_CLASS_SETUP(setupClass) {
+    return LinAlgTestClassCommon::setupClass();
+  }
+  TEST_METHOD_SETUP(setupMethod) {
+    return LinAlgTestClassCommon::setupMethod();
+  }
+
+  // FillMatrix (Splat)
+  LINALG_TEST(FillMatrix, float);
+  LINALG_TEST(FillMatrix, HLSLHalf_t);
+  LINALG_TEST(FillMatrix, int32_t);
+  LINALG_TEST(FillMatrix, uint32_t);
+
+  // MatrixStore (Load + Store round-trip)
+  LINALG_TEST(MatrixStore, float);
+  LINALG_TEST(MatrixStore, HLSLHalf_t);
+  LINALG_TEST(MatrixStore, int32_t);
+  LINALG_TEST(MatrixStore, uint32_t);
+
+  // MatrixAccumulate (InterlockedAccumulate)
+  LINALG_TEST(MatrixAccumulate, float);
+  LINALG_TEST(MatrixAccumulate, HLSLHalf_t);
+
+  // MatrixMul (Multiply)
+  LINALG_TEST(MatrixMul, float);
+  LINALG_TEST(MatrixMul, HLSLHalf_t);
+};
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
new file mode 100644
index 0000000000..077ca27d33
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
@@ -0,0 +1,26 @@
+//
+// LinearAlgebraOps.def - X-Macro definitions for linear algebra operations
+//
+// This file defines the operations tested by the LinearAlgebra execution tests.
+// Each OP entry maps an operation symbol to its preprocessor define, arity
+// (number of input matrices), the ShaderOp name, and which InputSets to use.
+//
+// OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)
+//   SYMBOL      - C++ enum name for the operation
+//   ARITY       - Number of input matrices required (0 = scalar-only, 1, 2)
+//   DEFINE      - Preprocessor define passed to the shader to select the op
+//   SHADER_NAME - Name of the ShaderOp in ShaderOpArith.xml
+//   INPUT_SET_1 - InputSet for first matrix (or Fill for scalar ops)
+//   INPUT_SET_2 - InputSet for second matrix (or unused placeholder)
+//
+
+#ifndef OP
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)
+#endif
+
+OP(FillMatrix,        0, "FUNC_FILL_MATRIX",        "LinAlgOp", Fill, Fill)
+OP(MatrixStore,       1, "FUNC_MATRIX_STORE",        "LinAlgOp", Seed, Seed)
+OP(MatrixAccumulate,  1, "FUNC_MATRIX_ACCUMULATE",   "LinAlgOp", Seed, Seed)
+OP(MatrixMul,         2, "FUNC_MATRIX_MUL",          "LinAlgOp", Seed, Identity)
+
+#undef OP
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
new file mode 100644
index 0000000000..0bac431b7a
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
@@ -0,0 +1,222 @@
+#ifndef LINEARALGEBRATESTDATA_H
+#define LINEARALGEBRATESTDATA_H
+
+#include <vector>
+
+#include <Verify.h>
+
+#include "HLSLTestDataTypes.h"
+
+namespace LinearAlgebra {
+
+//
+// Input data sets for matrix operations.
+// Follows the same InputSet / BEGIN_INPUT_SETS pattern as LongVectorTestData.h.
+//
+
+enum class InputSet { Seed, Fill, Identity };
+
+template <typename T>
+const std::vector<T> &getInputSet(InputSet InputSet) {
+  static_assert(sizeof(T) == 0, "No InputSet for this type");
+}
+
+#define BEGIN_INPUT_SETS(TYPE)                                                  \
+  template <>                                                                   \
+  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {        \
+    using T = TYPE;                                                             \
+    switch (InputSet) {
+
+#define INPUT_SET(SET, ...)                                                     \
+  case SET: {                                                                   \
+    static std::vector<T> Data = {__VA_ARGS__};                                 \
+    return Data;                                                                \
+  }
+
+#define END_INPUT_SETS()                                                        \
+  default:                                                                      \
+    break;                                                                      \
+    }                                                                           \
+    VERIFY_FAIL("Missing input set");                                           \
+    std::abort();                                                               \
+    }
+
+using HLSLTestDataTypes::HLSLHalf_t;
+using HLSLTestDataTypes::SNormF16_t;
+using HLSLTestDataTypes::UNormF16_t;
+using HLSLTestDataTypes::SNormF32_t;
+using HLSLTestDataTypes::UNormF32_t;
+using HLSLTestDataTypes::SNormF64_t;
+using HLSLTestDataTypes::UNormF64_t;
+using HLSLTestDataTypes::F8E4M3_t;
+using HLSLTestDataTypes::F8E5M2_t;
+
+BEGIN_INPUT_SETS(HLSLHalf_t)
+INPUT_SET(InputSet::Seed, HLSLHalf_t(1.0f), HLSLHalf_t(2.0f),
+          HLSLHalf_t(3.0f), HLSLHalf_t(4.0f), HLSLHalf_t(5.0f),
+          HLSLHalf_t(6.0f), HLSLHalf_t(7.0f), HLSLHalf_t(8.0f),
+          HLSLHalf_t(9.0f), HLSLHalf_t(10.0f), HLSLHalf_t(11.0f),
+          HLSLHalf_t(12.0f), HLSLHalf_t(13.0f), HLSLHalf_t(14.0f))
+INPUT_SET(InputSet::Fill, HLSLHalf_t(42.0f))
+INPUT_SET(InputSet::Identity, HLSLHalf_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(float)
+INPUT_SET(InputSet::Seed, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+          9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f)
+INPUT_SET(InputSet::Fill, 42.0f)
+INPUT_SET(InputSet::Identity, 1.0f)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(double)
+INPUT_SET(InputSet::Seed, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+          11.0, 12.0, 13.0, 14.0)
+INPUT_SET(InputSet::Fill, 42.0)
+INPUT_SET(InputSet::Identity, 1.0)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int32_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint32_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+// --- Additional scalar types ---
+
+BEGIN_INPUT_SETS(int8_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint8_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int16_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint16_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int64_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint64_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+// --- Normalized types (SNorm [-1,1], UNorm [0,1]) ---
+
+BEGIN_INPUT_SETS(SNormF16_t)
+INPUT_SET(InputSet::Seed, SNormF16_t(HLSLHalf_t(-0.9f)),
+          SNormF16_t(HLSLHalf_t(-0.7f)), SNormF16_t(HLSLHalf_t(-0.5f)),
+          SNormF16_t(HLSLHalf_t(-0.3f)), SNormF16_t(HLSLHalf_t(-0.1f)),
+          SNormF16_t(HLSLHalf_t(0.1f)), SNormF16_t(HLSLHalf_t(0.2f)),
+          SNormF16_t(HLSLHalf_t(0.3f)), SNormF16_t(HLSLHalf_t(0.4f)),
+          SNormF16_t(HLSLHalf_t(0.5f)), SNormF16_t(HLSLHalf_t(0.6f)),
+          SNormF16_t(HLSLHalf_t(0.7f)), SNormF16_t(HLSLHalf_t(0.8f)),
+          SNormF16_t(HLSLHalf_t(0.9f)))
+INPUT_SET(InputSet::Fill, SNormF16_t(HLSLHalf_t(0.5f)))
+INPUT_SET(InputSet::Identity, SNormF16_t(HLSLHalf_t(1.0f)))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF16_t)
+INPUT_SET(InputSet::Seed, UNormF16_t(HLSLHalf_t(0.05f)),
+          UNormF16_t(HLSLHalf_t(0.1f)), UNormF16_t(HLSLHalf_t(0.15f)),
+          UNormF16_t(HLSLHalf_t(0.2f)), UNormF16_t(HLSLHalf_t(0.25f)),
+          UNormF16_t(HLSLHalf_t(0.3f)), UNormF16_t(HLSLHalf_t(0.35f)),
+          UNormF16_t(HLSLHalf_t(0.4f)), UNormF16_t(HLSLHalf_t(0.45f)),
+          UNormF16_t(HLSLHalf_t(0.5f)), UNormF16_t(HLSLHalf_t(0.55f)),
+          UNormF16_t(HLSLHalf_t(0.6f)), UNormF16_t(HLSLHalf_t(0.7f)),
+          UNormF16_t(HLSLHalf_t(0.8f)))
+INPUT_SET(InputSet::Fill, UNormF16_t(HLSLHalf_t(0.5f)))
+INPUT_SET(InputSet::Identity, UNormF16_t(HLSLHalf_t(1.0f)))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(SNormF32_t)
+INPUT_SET(InputSet::Seed, SNormF32_t(-0.9f), SNormF32_t(-0.7f),
+          SNormF32_t(-0.5f), SNormF32_t(-0.3f), SNormF32_t(-0.1f),
+          SNormF32_t(0.1f), SNormF32_t(0.2f), SNormF32_t(0.3f),
+          SNormF32_t(0.4f), SNormF32_t(0.5f), SNormF32_t(0.6f),
+          SNormF32_t(0.7f), SNormF32_t(0.8f), SNormF32_t(0.9f))
+INPUT_SET(InputSet::Fill, SNormF32_t(0.5f))
+INPUT_SET(InputSet::Identity, SNormF32_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF32_t)
+INPUT_SET(InputSet::Seed, UNormF32_t(0.05f), UNormF32_t(0.1f),
+          UNormF32_t(0.15f), UNormF32_t(0.2f), UNormF32_t(0.25f),
+          UNormF32_t(0.3f), UNormF32_t(0.35f), UNormF32_t(0.4f),
+          UNormF32_t(0.45f), UNormF32_t(0.5f), UNormF32_t(0.55f),
+          UNormF32_t(0.6f), UNormF32_t(0.7f), UNormF32_t(0.8f))
+INPUT_SET(InputSet::Fill, UNormF32_t(0.5f))
+INPUT_SET(InputSet::Identity, UNormF32_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(SNormF64_t)
+INPUT_SET(InputSet::Seed, SNormF64_t(-0.9), SNormF64_t(-0.7),
+          SNormF64_t(-0.5), SNormF64_t(-0.3), SNormF64_t(-0.1),
+          SNormF64_t(0.1), SNormF64_t(0.2), SNormF64_t(0.3), SNormF64_t(0.4),
+          SNormF64_t(0.5), SNormF64_t(0.6), SNormF64_t(0.7), SNormF64_t(0.8),
+          SNormF64_t(0.9))
+INPUT_SET(InputSet::Fill, SNormF64_t(0.5))
+INPUT_SET(InputSet::Identity, SNormF64_t(1.0))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF64_t)
+INPUT_SET(InputSet::Seed, UNormF64_t(0.05), UNormF64_t(0.1),
+          UNormF64_t(0.15), UNormF64_t(0.2), UNormF64_t(0.25),
+          UNormF64_t(0.3), UNormF64_t(0.35), UNormF64_t(0.4),
+          UNormF64_t(0.45), UNormF64_t(0.5), UNormF64_t(0.55),
+          UNormF64_t(0.6), UNormF64_t(0.7), UNormF64_t(0.8))
+INPUT_SET(InputSet::Fill, UNormF64_t(0.5))
+INPUT_SET(InputSet::Identity, UNormF64_t(1.0))
+END_INPUT_SETS()
+
+// --- FP8 types (packed 4 elements per scalar in HLSL) ---
+
+BEGIN_INPUT_SETS(F8E4M3_t)
+INPUT_SET(InputSet::Seed, F8E4M3_t(1.0f), F8E4M3_t(1.5f), F8E4M3_t(2.0f),
+          F8E4M3_t(2.5f), F8E4M3_t(3.0f), F8E4M3_t(4.0f), F8E4M3_t(5.0f),
+          F8E4M3_t(6.0f), F8E4M3_t(7.0f), F8E4M3_t(8.0f), F8E4M3_t(0.5f),
+          F8E4M3_t(0.25f), F8E4M3_t(0.75f), F8E4M3_t(10.0f))
+INPUT_SET(InputSet::Fill, F8E4M3_t(2.0f))
+INPUT_SET(InputSet::Identity, F8E4M3_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(F8E5M2_t)
+INPUT_SET(InputSet::Seed, F8E5M2_t(1.0f), F8E5M2_t(1.5f), F8E5M2_t(2.0f),
+          F8E5M2_t(3.0f), F8E5M2_t(4.0f), F8E5M2_t(5.0f), F8E5M2_t(6.0f),
+          F8E5M2_t(7.0f), F8E5M2_t(8.0f), F8E5M2_t(0.5f), F8E5M2_t(0.25f),
+          F8E5M2_t(0.75f), F8E5M2_t(10.0f), F8E5M2_t(12.0f))
+INPUT_SET(InputSet::Fill, F8E5M2_t(2.0f))
+INPUT_SET(InputSet::Identity, F8E5M2_t(1.0f))
+END_INPUT_SETS()
+
+#undef BEGIN_INPUT_SETS
+#undef INPUT_SET
+#undef END_INPUT_SETS
+
+} // namespace LinearAlgebra
+
+#endif // LINEARALGEBRATESTDATA_H
diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
index b7edba9561..ebad1bb790 100644
--- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
+++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
@@ -4615,4 +4615,149 @@ void MSMain(uint GID : SV_GroupIndex,
       ]]>
     </Shader>
   </ShaderOp>
+
+  <!-- ================================================================== -->
+  <!-- LinAlgOp: Generic ShaderOp for SM 6.10 linear algebra operations.  -->
+  <!-- Uses preprocessor defines to select the operation under test.      -->
+  <!--                                                                    -->
+  <!-- Defines passed as compiler arguments:                              -->
+  <!--   ELEM_TYPE  : Matrix element type (e.g. float, float16_t)         -->
+  <!--   OUT_TYPE   : Output element type                                 -->
+  <!--   ROWS       : Matrix row count (M dimension)                      -->
+  <!--   COLS       : Matrix column count (N dimension)                   -->
+  <!--   K_DIM      : Inner dimension for multiply (K dimension)          -->
+  <!--   MATRIX_LAYOUT : 0=RowMajor, 1=ColMajor                          -->
+  <!--   FUNC_*     : Operation selector define                           -->
+  <!-- ================================================================== -->
+  <ShaderOp Name="LinAlgOp" CS="CS">
+    <RootSignature>UAV(u0), UAV(u1), UAV(u2)</RootSignature>
+    <!-- Width="2048" bytes: enough for 16x16 matrix of 64-bit elements -->
+    <Resource Name="InputMatrix1" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <Resource Name="InputMatrix2" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <Resource Name="OutputMatrix" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <RootValues>
+      <RootValue Index="0" ResName="InputMatrix1" />
+      <RootValue Index="1" ResName="InputMatrix2" />
+      <RootValue Index="2" ResName="OutputMatrix" />
+    </RootValues>
+    <Shader Name="CS" Target="cs_6_10" EntryPoint="main">
+      <![CDATA[
+        RWByteAddressBuffer g_InputMatrix1 : register(u0);
+        RWByteAddressBuffer g_InputMatrix2 : register(u1);
+        RWByteAddressBuffer g_OutputMatrix : register(u2);
+
+        // Matrix stride in bytes for row-major layout.
+        static const uint ByteStride = COLS * sizeof(ELEM_TYPE);
+
+        #ifdef FUNC_FILL_MATRIX
+        // Test FillMatrix (Splat): Create a matrix filled with a scalar value
+        // read from InputMatrix1[0], then store the result to OutputMatrix.
+        void TestFillMatrix() {
+          ELEM_TYPE FillVal = g_InputMatrix1.Load<ELEM_TYPE>(0);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Splat(FillVal);
+
+          Mat.Store(g_OutputMatrix, 0, ByteStride,
+                    dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_STORE
+        // Test MatrixStore: Load a matrix from InputMatrix1 and store it
+        // to OutputMatrix. Verifies the load-store round trip.
+        void TestMatrixStore() {
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, ByteStride,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          Mat.Store(g_OutputMatrix, 0, ByteStride,
+                    dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_ACCUMULATE
+        // Test MatrixAccumulate: Load a matrix from InputMatrix1, then
+        // InterlockedAccumulate it to OutputMatrix (which is pre-initialized).
+        void TestMatrixAccumulate() {
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, ByteStride,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          Mat.InterlockedAccumulate(g_OutputMatrix, 0, ByteStride,
+                                   dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_MUL
+        // Test MatrixMul: Load two matrices and multiply them.
+        // MatA is ROWS x K_DIM (Use::A), MatB is K_DIM x COLS (Use::B).
+        // Result accumulator is ROWS x COLS.
+        void TestMatrixMul() {
+          static const uint StrideA = K_DIM * sizeof(ELEM_TYPE);
+          static const uint StrideB = COLS * sizeof(ELEM_TYPE);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, K_DIM,
+                             dx::linalg::MatrixUse::A,
+                             dx::linalg::MatrixScope::Wave> MatA =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, K_DIM,
+                                 dx::linalg::MatrixUse::A,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, StrideA,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          dx::linalg::Matrix<COMP_TYPE, K_DIM, COLS,
+                             dx::linalg::MatrixUse::B,
+                             dx::linalg::MatrixScope::Wave> MatB =
+              dx::linalg::Matrix<COMP_TYPE, K_DIM, COLS,
+                                 dx::linalg::MatrixUse::B,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix2, 0, StrideB,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Result =
+              dx::linalg::Multiply(MatA, MatB);
+
+          Result.Store(g_OutputMatrix, 0, ByteStride,
+                       dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        [numthreads(1, 1, 1)]
+        void main(uint GI : SV_GroupIndex) {
+          #ifdef FUNC_FILL_MATRIX
+            TestFillMatrix();
+          #elif defined(FUNC_MATRIX_STORE)
+            TestMatrixStore();
+          #elif defined(FUNC_MATRIX_ACCUMULATE)
+            TestMatrixAccumulate();
+          #elif defined(FUNC_MATRIX_MUL)
+            TestMatrixMul();
+          #endif
+        };
+      ]]>
+    </Shader>
+  </ShaderOp>
 </ShaderOpSet>

From 17506cba475d437747b54d5a2a92b28e403e200d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:38:45 +0000
Subject: [PATCH 3/9] chore: autopublish 2026-03-04T22:38:44Z

---
 .../unittests/HLSLExec/HLSLTestDataTypes.h    | 32 +++------
 .../unittests/HLSLExec/LinearAlgebra.cpp      | 59 +++++++---------
 .../HLSLExec/LinearAlgebraTestData.h          | 69 +++++++++----------
 3 files changed, 66 insertions(+), 94 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
index f69b68e5a7..9fa9274e1c 100644
--- a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -303,18 +303,10 @@ struct F8E4M3_t {
 
   bool operator==(const F8E4M3_t &O) const { return Val == O.Val; }
   bool operator!=(const F8E4M3_t &O) const { return Val != O.Val; }
-  bool operator<(const F8E4M3_t &O) const {
-    return float(*this) < float(O);
-  }
-  bool operator>(const F8E4M3_t &O) const {
-    return float(*this) > float(O);
-  }
-  bool operator<=(const F8E4M3_t &O) const {
-    return float(*this) <= float(O);
-  }
-  bool operator>=(const F8E4M3_t &O) const {
-    return float(*this) >= float(O);
-  }
+  bool operator<(const F8E4M3_t &O) const { return float(*this) < float(O); }
+  bool operator>(const F8E4M3_t &O) const { return float(*this) > float(O); }
+  bool operator<=(const F8E4M3_t &O) const { return float(*this) <= float(O); }
+  bool operator>=(const F8E4M3_t &O) const { return float(*this) >= float(O); }
 
   friend std::ostream &operator<<(std::ostream &Os, const F8E4M3_t &Obj) {
     Os << float(Obj);
@@ -413,18 +405,10 @@ struct F8E5M2_t {
 
   bool operator==(const F8E5M2_t &O) const { return Val == O.Val; }
   bool operator!=(const F8E5M2_t &O) const { return Val != O.Val; }
-  bool operator<(const F8E5M2_t &O) const {
-    return float(*this) < float(O);
-  }
-  bool operator>(const F8E5M2_t &O) const {
-    return float(*this) > float(O);
-  }
-  bool operator<=(const F8E5M2_t &O) const {
-    return float(*this) <= float(O);
-  }
-  bool operator>=(const F8E5M2_t &O) const {
-    return float(*this) >= float(O);
-  }
+  bool operator<(const F8E5M2_t &O) const { return float(*this) < float(O); }
+  bool operator>(const F8E5M2_t &O) const { return float(*this) > float(O); }
+  bool operator<=(const F8E5M2_t &O) const { return float(*this) <= float(O); }
+  bool operator>=(const F8E5M2_t &O) const { return float(*this) >= float(O); }
 
   friend std::ostream &operator<<(std::ostream &Os, const F8E5M2_t &Obj) {
     Os << float(Obj);
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index 148e454365..522f8e91d7 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -31,8 +31,7 @@ namespace LinearAlgebra {
 //
 
 enum class OpType : unsigned {
-#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)       \
-  SYMBOL,
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) SYMBOL,
 #include "LinearAlgebraOps.def"
   NumOpTypes
 };
@@ -47,8 +46,11 @@ struct Operation {
 
 static constexpr Operation Operations[] = {
 #define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)       \
-  {ARITY, DEFINE, SHADER_NAME,                                                 \
-   {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2}, OpType::SYMBOL},
+  {ARITY,                                                                      \
+   DEFINE,                                                                     \
+   SHADER_NAME,                                                                \
+   {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2},                             \
+   OpType::SYMBOL},
 #include "LinearAlgebraOps.def"
 };
 
@@ -75,7 +77,7 @@ template <typename T> const DataType &getDataType() {
 
 #define DATA_TYPE(TYPE, HLSL_STRING, COMP_TYPE, HLSL_SIZE, IS_16BIT)           \
   template <> const DataType &getDataType<TYPE>() {                            \
-    static DataType DT{HLSL_STRING, COMP_TYPE, IS_16BIT, HLSL_SIZE};          \
+    static DataType DT{HLSL_STRING, COMP_TYPE, IS_16BIT, HLSL_SIZE};           \
     return DT;                                                                 \
   }
 
@@ -106,9 +108,7 @@ struct ValidationConfig {
     return {Tol, ValidationType::Epsilon};
   }
 
-  static ValidationConfig Ulp(double Tol) {
-    return {Tol, ValidationType::Ulp};
-  }
+  static ValidationConfig Ulp(double Tol) { return {Tol, ValidationType::Ulp}; }
 };
 
 // Default validation: ULP for floating point, exact for integers.
@@ -162,8 +162,7 @@ bool doValuesMatch(float A, float B, double Tolerance, ValidationType VType) {
   }
 }
 
-bool doValuesMatch(double A, double B, double Tolerance,
-                   ValidationType VType) {
+bool doValuesMatch(double A, double B, double Tolerance, ValidationType VType) {
   switch (VType) {
   case ValidationType::Epsilon:
     return CompareDoubleEpsilon(A, B, Tolerance);
@@ -222,9 +221,9 @@ std::vector<MatrixDims> getMatrixSizesToTest() {
 // Build compiler options.
 //
 
-std::string
-getCompilerOptionsString(const Operation &Op, const DataType &ElemType,
-                         size_t Rows, size_t Cols, size_t KDim = 0) {
+std::string getCompilerOptionsString(const Operation &Op,
+                                     const DataType &ElemType, size_t Rows,
+                                     size_t Cols, size_t KDim = 0) {
   std::stringstream Options;
 
   if (ElemType.Is16Bit)
@@ -317,7 +316,7 @@ std::vector<T> buildIdentityMatrix(size_t Rows, size_t Cols) {
 
 template <typename T>
 InputSets<T> buildTestInputs(const Operation &Op, size_t Rows, size_t Cols,
-                              size_t KDim) {
+                             size_t KDim) {
   InputSets<T> Inputs;
   const size_t NumElements = Rows * Cols;
 
@@ -341,9 +340,9 @@ InputSets<T> buildTestInputs(const Operation &Op, size_t Rows, size_t Cols,
 
 template <typename T>
 std::optional<std::vector<T>>
-runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging,
-              const Operation &Op, const InputSets<T> &Inputs, size_t Rows,
-              size_t Cols, size_t KDim, size_t ExpectedOutputSize) {
+runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Op,
+              const InputSets<T> &Inputs, size_t Rows, size_t Cols, size_t KDim,
+              size_t ExpectedOutputSize) {
 
   const DataType &ElemType = getDataType<T>();
 
@@ -351,8 +350,7 @@ runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging,
       getCompilerOptionsString(Op, ElemType, Rows, Cols, KDim);
 
   if (VerboseLogging)
-    hlsl_test::LogCommentFmt(L"Compiler Options: %S",
-                             CompilerOptions.c_str());
+    hlsl_test::LogCommentFmt(L"Compiler Options: %S", CompilerOptions.c_str());
 
   dxc::SpecificDllLoader DxilDllLoader;
   CComPtr<IStream> TestXML;
@@ -473,8 +471,7 @@ template <typename T> struct ExpectedBuilder<OpType::MatrixAccumulate, T> {
 };
 
 // MatrixMul: multiply input matrix by identity.
-template <typename T>
-struct Op<OpType::MatrixMul, T> : DefaultValidation<T> {};
+template <typename T> struct Op<OpType::MatrixMul, T> : DefaultValidation<T> {};
 
 template <typename T> struct ExpectedBuilder<OpType::MatrixMul, T> {
   static std::vector<T> buildExpected(Op<OpType::MatrixMul, T> &,
@@ -546,11 +543,9 @@ class LinAlgTestClassCommon {
       WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging",
                                                          VerboseLogging);
       if (VerboseLogging)
-        hlsl_test::LogCommentFmt(
-            L"Verbose logging is enabled for this test.");
+        hlsl_test::LogCommentFmt(L"Verbose logging is enabled for this test.");
       else
-        hlsl_test::LogCommentFmt(
-            L"Verbose logging is disabled for this test.");
+        hlsl_test::LogCommentFmt(L"Verbose logging is disabled for this test.");
 
       bool FailIfRequirementsNotMet = false;
 #ifdef _HLK_CONF
@@ -614,22 +609,18 @@ class LinAlgTestClassCommon {
 class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon {
 public:
   BEGIN_TEST_CLASS(DxilConf_SM610_LinearAlgebra)
-  TEST_CLASS_PROPERTY(
-      "Kits.TestName",
-      "D3D12 - Shader Model 6.10 - Linear Algebra Tests")
+  TEST_CLASS_PROPERTY("Kits.TestName",
+                      "D3D12 - Shader Model 6.10 - Linear Algebra Tests")
   TEST_CLASS_PROPERTY("Kits.TestId", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-  TEST_CLASS_PROPERTY(
-      "Kits.Description",
-      "Validates SM 6.10 linear algebra matrix operations")
+  TEST_CLASS_PROPERTY("Kits.Description",
+                      "Validates SM 6.10 linear algebra matrix operations")
   TEST_CLASS_PROPERTY(
       "Kits.Specification",
       "Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement")
   TEST_METHOD_PROPERTY(L"Priority", L"0")
   END_TEST_CLASS()
 
-  TEST_CLASS_SETUP(setupClass) {
-    return LinAlgTestClassCommon::setupClass();
-  }
+  TEST_CLASS_SETUP(setupClass) { return LinAlgTestClassCommon::setupClass(); }
   TEST_METHOD_SETUP(setupMethod) {
     return LinAlgTestClassCommon::setupMethod();
   }
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
index 0bac431b7a..37d453ec7f 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
@@ -16,54 +16,53 @@ namespace LinearAlgebra {
 
 enum class InputSet { Seed, Fill, Identity };
 
-template <typename T>
-const std::vector<T> &getInputSet(InputSet InputSet) {
+template <typename T> const std::vector<T> &getInputSet(InputSet InputSet) {
   static_assert(sizeof(T) == 0, "No InputSet for this type");
 }
 
-#define BEGIN_INPUT_SETS(TYPE)                                                  \
-  template <>                                                                   \
-  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {        \
-    using T = TYPE;                                                             \
+#define BEGIN_INPUT_SETS(TYPE)                                                 \
+  template <>                                                                  \
+  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {       \
+    using T = TYPE;                                                            \
     switch (InputSet) {
 
-#define INPUT_SET(SET, ...)                                                     \
-  case SET: {                                                                   \
-    static std::vector<T> Data = {__VA_ARGS__};                                 \
-    return Data;                                                                \
+#define INPUT_SET(SET, ...)                                                    \
+  case SET: {                                                                  \
+    static std::vector<T> Data = {__VA_ARGS__};                                \
+    return Data;                                                               \
   }
 
-#define END_INPUT_SETS()                                                        \
-  default:                                                                      \
-    break;                                                                      \
-    }                                                                           \
-    VERIFY_FAIL("Missing input set");                                           \
-    std::abort();                                                               \
+#define END_INPUT_SETS()                                                       \
+  default:                                                                     \
+    break;                                                                     \
+    }                                                                          \
+    VERIFY_FAIL("Missing input set");                                          \
+    std::abort();                                                              \
     }
 
+using HLSLTestDataTypes::F8E4M3_t;
+using HLSLTestDataTypes::F8E5M2_t;
 using HLSLTestDataTypes::HLSLHalf_t;
 using HLSLTestDataTypes::SNormF16_t;
-using HLSLTestDataTypes::UNormF16_t;
 using HLSLTestDataTypes::SNormF32_t;
-using HLSLTestDataTypes::UNormF32_t;
 using HLSLTestDataTypes::SNormF64_t;
+using HLSLTestDataTypes::UNormF16_t;
+using HLSLTestDataTypes::UNormF32_t;
 using HLSLTestDataTypes::UNormF64_t;
-using HLSLTestDataTypes::F8E4M3_t;
-using HLSLTestDataTypes::F8E5M2_t;
 
 BEGIN_INPUT_SETS(HLSLHalf_t)
-INPUT_SET(InputSet::Seed, HLSLHalf_t(1.0f), HLSLHalf_t(2.0f),
-          HLSLHalf_t(3.0f), HLSLHalf_t(4.0f), HLSLHalf_t(5.0f),
-          HLSLHalf_t(6.0f), HLSLHalf_t(7.0f), HLSLHalf_t(8.0f),
-          HLSLHalf_t(9.0f), HLSLHalf_t(10.0f), HLSLHalf_t(11.0f),
-          HLSLHalf_t(12.0f), HLSLHalf_t(13.0f), HLSLHalf_t(14.0f))
+INPUT_SET(InputSet::Seed, HLSLHalf_t(1.0f), HLSLHalf_t(2.0f), HLSLHalf_t(3.0f),
+          HLSLHalf_t(4.0f), HLSLHalf_t(5.0f), HLSLHalf_t(6.0f),
+          HLSLHalf_t(7.0f), HLSLHalf_t(8.0f), HLSLHalf_t(9.0f),
+          HLSLHalf_t(10.0f), HLSLHalf_t(11.0f), HLSLHalf_t(12.0f),
+          HLSLHalf_t(13.0f), HLSLHalf_t(14.0f))
 INPUT_SET(InputSet::Fill, HLSLHalf_t(42.0f))
 INPUT_SET(InputSet::Identity, HLSLHalf_t(1.0f))
 END_INPUT_SETS()
 
 BEGIN_INPUT_SETS(float)
-INPUT_SET(InputSet::Seed, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
-          9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f)
+INPUT_SET(InputSet::Seed, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+          10.0f, 11.0f, 12.0f, 13.0f, 14.0f)
 INPUT_SET(InputSet::Fill, 42.0f)
 INPUT_SET(InputSet::Identity, 1.0f)
 END_INPUT_SETS()
@@ -174,20 +173,18 @@ INPUT_SET(InputSet::Identity, UNormF32_t(1.0f))
 END_INPUT_SETS()
 
 BEGIN_INPUT_SETS(SNormF64_t)
-INPUT_SET(InputSet::Seed, SNormF64_t(-0.9), SNormF64_t(-0.7),
-          SNormF64_t(-0.5), SNormF64_t(-0.3), SNormF64_t(-0.1),
-          SNormF64_t(0.1), SNormF64_t(0.2), SNormF64_t(0.3), SNormF64_t(0.4),
-          SNormF64_t(0.5), SNormF64_t(0.6), SNormF64_t(0.7), SNormF64_t(0.8),
-          SNormF64_t(0.9))
+INPUT_SET(InputSet::Seed, SNormF64_t(-0.9), SNormF64_t(-0.7), SNormF64_t(-0.5),
+          SNormF64_t(-0.3), SNormF64_t(-0.1), SNormF64_t(0.1), SNormF64_t(0.2),
+          SNormF64_t(0.3), SNormF64_t(0.4), SNormF64_t(0.5), SNormF64_t(0.6),
+          SNormF64_t(0.7), SNormF64_t(0.8), SNormF64_t(0.9))
 INPUT_SET(InputSet::Fill, SNormF64_t(0.5))
 INPUT_SET(InputSet::Identity, SNormF64_t(1.0))
 END_INPUT_SETS()
 
 BEGIN_INPUT_SETS(UNormF64_t)
-INPUT_SET(InputSet::Seed, UNormF64_t(0.05), UNormF64_t(0.1),
-          UNormF64_t(0.15), UNormF64_t(0.2), UNormF64_t(0.25),
-          UNormF64_t(0.3), UNormF64_t(0.35), UNormF64_t(0.4),
-          UNormF64_t(0.45), UNormF64_t(0.5), UNormF64_t(0.55),
+INPUT_SET(InputSet::Seed, UNormF64_t(0.05), UNormF64_t(0.1), UNormF64_t(0.15),
+          UNormF64_t(0.2), UNormF64_t(0.25), UNormF64_t(0.3), UNormF64_t(0.35),
+          UNormF64_t(0.4), UNormF64_t(0.45), UNormF64_t(0.5), UNormF64_t(0.55),
           UNormF64_t(0.6), UNormF64_t(0.7), UNormF64_t(0.8))
 INPUT_SET(InputSet::Fill, UNormF64_t(0.5))
 INPUT_SET(InputSet::Identity, UNormF64_t(1.0))

From 2c3b284bccdd46e66c1444f9a1b09a7e79c3c084 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 14:46:14 -0800
Subject: [PATCH 4/9] NFC: Move shared validation helpers into
 HLSLTestDataTypes.h

Move isFloatingPointType, ValidationType, ValidationConfig,
DefaultValidation, StrictValidation, and doValuesMatch overloads
from LongVectors.cpp and LinearAlgebra.cpp into the shared header.
Both files now use 'using' declarations to reference them.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../unittests/HLSLExec/HLSLTestDataTypes.h    | 108 ++++++++++++++++++
 .../unittests/HLSLExec/LinearAlgebra.cpp      |  92 ++-------------
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 106 +----------------
 3 files changed, 121 insertions(+), 185 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
index 9fa9274e1c..afdf5f08db 100644
--- a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -5,11 +5,13 @@
 #include <cstdint>
 #include <limits>
 #include <ostream>
+#include <type_traits>
 
 #include <DirectXMath.h>
 #include <DirectXPackedVector.h>
 
 #include "dxc/Support/Global.h"
+#include "HlslTestUtils.h"
 
 // Shared HLSL type wrappers for use in execution tests.
 // These types bridge the gap between C++ and HLSL type representations.
@@ -490,6 +492,112 @@ struct F8E5M2_t {
   }
 };
 
+//
+// Shared type traits and validation infrastructure.
+//
+
+template <typename T> constexpr bool isFloatingPointType() {
+  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
+         std::is_same_v<T, HLSLHalf_t>;
+}
+
+enum class ValidationType {
+  Epsilon,
+  Ulp,
+};
+
+struct ValidationConfig {
+  double Tolerance = 0.0;
+  ValidationType Type = ValidationType::Epsilon;
+
+  static ValidationConfig Epsilon(double Tolerance) {
+    return ValidationConfig{Tolerance, ValidationType::Epsilon};
+  }
+
+  static ValidationConfig Ulp(double Tolerance) {
+    return ValidationConfig{Tolerance, ValidationType::Ulp};
+  }
+};
+
+// Default validation: ULP for floating point, exact for integers.
+template <typename T> struct DefaultValidation {
+  ValidationConfig ValidationConfig;
+
+  DefaultValidation() {
+    if constexpr (isFloatingPointType<T>())
+      ValidationConfig = ValidationConfig::Ulp(1.0f);
+  }
+};
+
+// Strict validation: exact match by default.
+struct StrictValidation {
+  ValidationConfig ValidationConfig;
+};
+
+//
+// Value comparison overloads used by both LongVector and LinearAlgebra tests.
+//
+
+template <typename T>
+inline bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
+  if (Tolerance == 0.0)
+    return A == B;
+
+  T Diff = A > B ? A - B : B - A;
+  return Diff <= Tolerance;
+}
+
+inline bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double,
+                          ValidationType) {
+  return A == B;
+}
+
+inline bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
+  case ValidationType::Ulp:
+    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
+inline bool doValuesMatch(float A, float B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
+  case ValidationType::Ulp: {
+    const int IntTolerance = static_cast<int>(Tolerance);
+    return CompareFloatULP(A, B, IntTolerance);
+  }
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
+inline bool doValuesMatch(double A, double B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareDoubleEpsilon(A, B, Tolerance);
+  case ValidationType::Ulp: {
+    const int64_t IntTolerance = static_cast<int64_t>(Tolerance);
+    return CompareDoubleULP(A, B, IntTolerance);
+  }
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
 } // namespace HLSLTestDataTypes
 
 #endif // HLSLTESTDATATYPES_H
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index 522f8e91d7..8e82764b44 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -22,8 +22,6 @@
 #include <type_traits>
 #include <vector>
 
-using namespace HLSLTestDataTypes; // For HLSLHalf_t, HLSLBool_t
-
 namespace LinearAlgebra {
 
 //
@@ -89,89 +87,13 @@ DATA_TYPE(uint32_t, "uint", "ComponentType::U32", 4, false)
 
 #undef DATA_TYPE
 
-template <typename T> constexpr bool isFloatingPointType() {
-  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
-         std::is_same_v<T, HLSLHalf_t>;
-}
-
-//
-// Validation
-//
-
-enum class ValidationType { Epsilon, Ulp };
-
-struct ValidationConfig {
-  double Tolerance = 0.0;
-  ValidationType Type = ValidationType::Epsilon;
-
-  static ValidationConfig Epsilon(double Tol) {
-    return {Tol, ValidationType::Epsilon};
-  }
-
-  static ValidationConfig Ulp(double Tol) { return {Tol, ValidationType::Ulp}; }
-};
-
-// Default validation: ULP for floating point, exact for integers.
-template <typename T> struct DefaultValidation {
-  ValidationConfig ValidationConfig;
-
-  DefaultValidation() {
-    if constexpr (isFloatingPointType<T>())
-      ValidationConfig = ValidationConfig::Ulp(1.0);
-  }
-};
-
-// Strict validation: exact match.
-struct StrictValidation {
-  ValidationConfig ValidationConfig;
-};
-
-//
-// Value comparison overloads following LongVector patterns.
-//
-
-template <typename T>
-bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
-  if (Tolerance == 0.0)
-    return A == B;
-
-  T Diff = A > B ? A - B : B - A;
-  return Diff <= Tolerance;
-}
-
-bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
-                   ValidationType VType) {
-  switch (VType) {
-  case ValidationType::Epsilon:
-    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
-  case ValidationType::Ulp:
-    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
-  default:
-    return false;
-  }
-}
-
-bool doValuesMatch(float A, float B, double Tolerance, ValidationType VType) {
-  switch (VType) {
-  case ValidationType::Epsilon:
-    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
-  case ValidationType::Ulp:
-    return CompareFloatULP(A, B, static_cast<int>(Tolerance));
-  default:
-    return false;
-  }
-}
-
-bool doValuesMatch(double A, double B, double Tolerance, ValidationType VType) {
-  switch (VType) {
-  case ValidationType::Epsilon:
-    return CompareDoubleEpsilon(A, B, Tolerance);
-  case ValidationType::Ulp:
-    return CompareDoubleULP(A, B, static_cast<int64_t>(Tolerance));
-  default:
-    return false;
-  }
-}
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::ValidationType;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::DefaultValidation;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::doValuesMatch;
+using HLSLTestDataTypes::HLSLHalf_t;
 
 template <typename T>
 bool doVectorsMatch(const std::vector<T> &Actual,
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index dbb8a8d672..d0941756d4 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -64,10 +64,12 @@ DATA_TYPE(double, "double", 8)
 
 #undef DATA_TYPE
 
-template <typename T> constexpr bool isFloatingPointType() {
-  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
-         std::is_same_v<T, HLSLHalf_t>;
-}
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::ValidationType;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::DefaultValidation;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::doValuesMatch;
 
 //
 // Operation Types
@@ -186,72 +188,6 @@ void logLongVector(const std::vector<T> &Values, const std::wstring &Name) {
   hlsl_test::LogCommentFmt(Wss.str().c_str());
 }
 
-enum class ValidationType {
-  Epsilon,
-  Ulp,
-};
-
-template <typename T>
-bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
-  if (Tolerance == 0.0)
-    return A == B;
-
-  T Diff = A > B ? A - B : B - A;
-  return Diff <= Tolerance;
-}
-
-bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) {
-  return A == B;
-}
-
-bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
-  case ValidationType::Ulp:
-    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
-bool doValuesMatch(float A, float B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
-  case ValidationType::Ulp: {
-    // Tolerance is in ULPs. Convert to int for the comparison.
-    const int IntTolerance = static_cast<int>(Tolerance);
-    return CompareFloatULP(A, B, IntTolerance);
-  };
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
-bool doValuesMatch(double A, double B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareDoubleEpsilon(A, B, Tolerance);
-  case ValidationType::Ulp: {
-    // Tolerance is in ULPs. Convert to int64_t for the comparison.
-    const int64_t IntTolerance = static_cast<int64_t>(Tolerance);
-    return CompareDoubleULP(A, B, IntTolerance);
-  };
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
 template <typename T>
 bool doVectorsMatch(const std::vector<T> &ActualValues,
                     const std::vector<T> &ExpectedValues, double Tolerance,
@@ -563,19 +499,6 @@ InputSets<T> buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3],
   return Inputs;
 }
 
-struct ValidationConfig {
-  double Tolerance = 0.0;
-  ValidationType Type = ValidationType::Epsilon;
-
-  static ValidationConfig Epsilon(double Tolerance) {
-    return ValidationConfig{Tolerance, ValidationType::Epsilon};
-  }
-
-  static ValidationConfig Ulp(double Tolerance) {
-    return ValidationConfig{Tolerance, ValidationType::Ulp};
-  }
-};
-
 template <typename T, typename OUT_TYPE>
 void runAndVerify(
     ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Operation,
@@ -614,23 +537,6 @@ template <OpType OP, typename T, size_t Arity> struct Op;
 // member functions.
 template <OpType OP, typename T> struct ExpectedBuilder;
 
-// Default Validation configuration - ULP for floating point types, exact
-// matches for everything else.
-template <typename T> struct DefaultValidation {
-  ValidationConfig ValidationConfig;
-
-  DefaultValidation() {
-    if constexpr (isFloatingPointType<T>())
-      ValidationConfig = ValidationConfig::Ulp(1.0f);
-  }
-};
-
-// Strict Validation - Defaults to exact matches.
-// Tolerance can be set to a non-zero value to allow for a wider range.
-struct StrictValidation {
-  ValidationConfig ValidationConfig;
-};
-
 // Macros to build up common patterns of Op definitions
 
 #define OP_1(OP, VALIDATION, IMPL)                                             \

From e0bb6c5ef90be9a4033484dc231b514f4c1f5d12 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 14:57:06 -0800
Subject: [PATCH 5/9] Fix clang-format issues

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h  | 5 ++---
 tools/clang/unittests/HLSLExec/LinearAlgebra.cpp    | 8 ++++----
 tools/clang/unittests/HLSLExec/LinearAlgebraOps.def | 8 ++++----
 tools/clang/unittests/HLSLExec/LongVectors.cpp      | 8 ++++----
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
index afdf5f08db..c4a40e0c0f 100644
--- a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -10,8 +10,8 @@
 #include <DirectXMath.h>
 #include <DirectXPackedVector.h>
 
-#include "dxc/Support/Global.h"
 #include "HlslTestUtils.h"
+#include "dxc/Support/Global.h"
 
 // Shared HLSL type wrappers for use in execution tests.
 // These types bridge the gap between C++ and HLSL type representations.
@@ -547,8 +547,7 @@ inline bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
   return Diff <= Tolerance;
 }
 
-inline bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double,
-                          ValidationType) {
+inline bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) {
   return A == B;
 }
 
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index 8e82764b44..c63d0f171c 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -87,13 +87,13 @@ DATA_TYPE(uint32_t, "uint", "ComponentType::U32", 4, false)
 
 #undef DATA_TYPE
 
-using HLSLTestDataTypes::isFloatingPointType;
-using HLSLTestDataTypes::ValidationType;
-using HLSLTestDataTypes::ValidationConfig;
 using HLSLTestDataTypes::DefaultValidation;
-using HLSLTestDataTypes::StrictValidation;
 using HLSLTestDataTypes::doValuesMatch;
 using HLSLTestDataTypes::HLSLHalf_t;
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::ValidationType;
 
 template <typename T>
 bool doVectorsMatch(const std::vector<T> &Actual,
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
index 077ca27d33..9773eba608 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
@@ -18,9 +18,9 @@
 #define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)
 #endif
 
-OP(FillMatrix,        0, "FUNC_FILL_MATRIX",        "LinAlgOp", Fill, Fill)
-OP(MatrixStore,       1, "FUNC_MATRIX_STORE",        "LinAlgOp", Seed, Seed)
-OP(MatrixAccumulate,  1, "FUNC_MATRIX_ACCUMULATE",   "LinAlgOp", Seed, Seed)
-OP(MatrixMul,         2, "FUNC_MATRIX_MUL",          "LinAlgOp", Seed, Identity)
+OP(FillMatrix, 0, "FUNC_FILL_MATRIX", "LinAlgOp", Fill, Fill)
+OP(MatrixStore, 1, "FUNC_MATRIX_STORE", "LinAlgOp", Seed, Seed)
+OP(MatrixAccumulate, 1, "FUNC_MATRIX_ACCUMULATE", "LinAlgOp", Seed, Seed)
+OP(MatrixMul, 2, "FUNC_MATRIX_MUL", "LinAlgOp", Seed, Identity)
 
 #undef OP
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index d0941756d4..5f2c7d01ac 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -64,12 +64,12 @@ DATA_TYPE(double, "double", 8)
 
 #undef DATA_TYPE
 
-using HLSLTestDataTypes::isFloatingPointType;
-using HLSLTestDataTypes::ValidationType;
-using HLSLTestDataTypes::ValidationConfig;
 using HLSLTestDataTypes::DefaultValidation;
-using HLSLTestDataTypes::StrictValidation;
 using HLSLTestDataTypes::doValuesMatch;
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::ValidationType;
 
 //
 // Operation Types

From 6b69ff428c83d011c984f2d5b3b213a329a43cc5 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 15:09:57 -0800
Subject: [PATCH 6/9] Rename doVectorsMatch to doMatricesMatch with row/col
 logging

Renamed to better reflect the matrix data being compared. Added M and N
parameters so mismatch errors now log (row,col) coordinates instead of
flat indices, improving debuggability.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tools/clang/unittests/HLSLExec/LinearAlgebra.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index c63d0f171c..a943edc129 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -96,14 +96,15 @@ using HLSLTestDataTypes::ValidationConfig;
 using HLSLTestDataTypes::ValidationType;
 
 template <typename T>
-bool doVectorsMatch(const std::vector<T> &Actual,
-                    const std::vector<T> &Expected,
-                    const ValidationConfig &Config, bool VerboseLogging) {
+bool doMatricesMatch(const std::vector<T> &Actual,
+                     const std::vector<T> &Expected, size_t M, size_t N,
+                     const ValidationConfig &Config, bool VerboseLogging) {
   DXASSERT(Actual.size() == Expected.size(),
            "Actual and Expected must be the same size");
 
   if (VerboseLogging)
-    hlsl_test::LogCommentFmt(L"Verifying %zu elements", Actual.size());
+    hlsl_test::LogCommentFmt(L"Verifying %zux%zu matrix (%zu elements)", M, N,
+                             Actual.size());
 
   std::vector<size_t> MismatchedIndexes;
   for (size_t I = 0; I < Actual.size(); I++) {
@@ -117,7 +118,7 @@ bool doVectorsMatch(const std::vector<T> &Actual,
   for (size_t Index : MismatchedIndexes) {
     std::wstringstream Wss(L"");
     Wss << std::setprecision(15);
-    Wss << L"Mismatch at Index: " << Index;
+    Wss << L"Mismatch at (" << Index / N << L"," << Index % N << L")";
     Wss << L" Actual:" << Actual[Index];
     Wss << L" Expected:" << Expected[Index];
     hlsl_test::LogErrorFmt(Wss.str().c_str());
@@ -338,7 +339,8 @@ void runAndVerify(ID3D12Device *D3DDevice, bool VerboseLogging,
     return;
   }
 
-  VERIFY_IS_TRUE(doVectorsMatch(*Actual, Expected, Config, VerboseLogging));
+  VERIFY_IS_TRUE(
+      doMatricesMatch(*Actual, Expected, Rows, Cols, Config, VerboseLogging));
 }
 
 //

From f360b1489e68ccf2f081e0b55536ebb1cb1800b2 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 15:26:03 -0800
Subject: [PATCH 7/9] Address code review feedback

- Fix non-portable static_assert(false) to use sizeof(T)==0 idiom
- Replace placeholder TestId GUID with f00df946-9877-4453-8844-b1f4c8977953
- Extend isFloatingPointType to cover SNorm/UNorm/F8 wrapper types
- Rename ValidationConfig member to Validation to avoid type/member shadowing
- Rename local variables in dispatchTest to avoid type shadowing (CurOp, OpConfig)
- Add row-major assumption comment in doMatricesMatch
- Add MATRIX_LAYOUT comment explaining 0=RowMajor, 1=ColMajor
- Add TODO for non-square K dimension test coverage
- Clarify pre-staged input set comments in LinearAlgebraTestData.h
- Fix 'constuctor' typo in HLSLTestDataTypes.h

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../unittests/HLSLExec/HLSLTestDataTypes.h    | 14 ++++++----
 .../unittests/HLSLExec/LinearAlgebra.cpp      | 27 ++++++++++---------
 .../HLSLExec/LinearAlgebraTestData.h          |  7 ++---
 .../clang/unittests/HLSLExec/LongVectors.cpp  |  6 ++---
 4 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
index c4a40e0c0f..37becc8f39 100644
--- a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -18,7 +18,7 @@
 namespace HLSLTestDataTypes {
 
 // A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
-// Take int32_t as a constuctor argument and convert it to bool when needed.
+// Take int32_t as a constructor argument and convert it to bool when needed.
 // Comparisons cast to a bool because we only care if the bool representation is
 // true or false.
 struct HLSLBool_t {
@@ -498,7 +498,11 @@ struct F8E5M2_t {
 
 template <typename T> constexpr bool isFloatingPointType() {
   return std::is_same_v<T, float> || std::is_same_v<T, double> ||
-         std::is_same_v<T, HLSLHalf_t>;
+         std::is_same_v<T, HLSLHalf_t> || std::is_same_v<T, SNormF16_t> ||
+         std::is_same_v<T, UNormF16_t> || std::is_same_v<T, SNormF32_t> ||
+         std::is_same_v<T, UNormF32_t> || std::is_same_v<T, SNormF64_t> ||
+         std::is_same_v<T, UNormF64_t> || std::is_same_v<T, F8E4M3_t> ||
+         std::is_same_v<T, F8E5M2_t>;
 }
 
 enum class ValidationType {
@@ -521,17 +525,17 @@ struct ValidationConfig {
 
 // Default validation: ULP for floating point, exact for integers.
 template <typename T> struct DefaultValidation {
-  ValidationConfig ValidationConfig;
+  ValidationConfig Validation;
 
   DefaultValidation() {
     if constexpr (isFloatingPointType<T>())
-      ValidationConfig = ValidationConfig::Ulp(1.0f);
+      Validation = ValidationConfig::Ulp(1.0f);
   }
 };
 
 // Strict validation: exact match by default.
 struct StrictValidation {
-  ValidationConfig ValidationConfig;
+  ValidationConfig Validation;
 };
 
 //
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index a943edc129..0933d22375 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -70,7 +70,7 @@ struct DataType {
 };
 
 template <typename T> const DataType &getDataType() {
-  static_assert(false && "Unknown data type");
+  static_assert(sizeof(T) == 0, "Unknown data type");
 }
 
 #define DATA_TYPE(TYPE, HLSL_STRING, COMP_TYPE, HLSL_SIZE, IS_16BIT)           \
@@ -118,6 +118,7 @@ bool doMatricesMatch(const std::vector<T> &Actual,
   for (size_t Index : MismatchedIndexes) {
     std::wstringstream Wss(L"");
     Wss << std::setprecision(15);
+    // Assumes row-major layout for (row,col) decomposition.
     Wss << L"Mismatch at (" << Index / N << L"," << Index % N << L")";
     Wss << L" Actual:" << Actual[Index];
     Wss << L" Expected:" << Expected[Index];
@@ -162,7 +163,7 @@ std::string getCompilerOptionsString(const Operation &Op,
   if (KDim > 0)
     Options << " -DK_DIM=" << KDim;
 
-  Options << " -DMATRIX_LAYOUT=0";
+  Options << " -DMATRIX_LAYOUT=0"; // 0 = RowMajor, 1 = ColMajor
 
   return Options.str();
 }
@@ -416,26 +417,28 @@ template <typename T, OpType OP>
 void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging) {
 
   const std::vector<MatrixDims> Sizes = getMatrixSizesToTest();
-  constexpr const Operation &Operation = getOperation(OP);
-  Op<OP, T> Op;
+  constexpr const Operation &CurOp = getOperation(OP);
+  Op<OP, T> OpConfig;
 
   for (const MatrixDims &Dims : Sizes) {
     const size_t Rows = Dims.Rows;
     const size_t Cols = Dims.Cols;
-    const size_t KDim = (Operation.Arity >= 2) ? Cols : 0;
+    // TODO: K dimension currently equals Cols for simplicity (square inner
+    // dimension). Add non-square K sizes for better multiply coverage.
+    const size_t KDim = (CurOp.Arity >= 2) ? Cols : 0;
 
     // FillMatrix has special input handling (scalar, not a matrix).
     InputSets<T> Inputs;
     if constexpr (OP == OpType::FillMatrix)
-      Inputs = ExpectedBuilder<OP, T>::buildInputs(Operation, Rows, Cols, KDim);
+      Inputs = ExpectedBuilder<OP, T>::buildInputs(CurOp, Rows, Cols, KDim);
     else
-      Inputs = buildTestInputs<T>(Operation, Rows, Cols, KDim);
+      Inputs = buildTestInputs<T>(CurOp, Rows, Cols, KDim);
 
-    auto Expected =
-        ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, Rows, Cols, KDim);
+    auto Expected = ExpectedBuilder<OP, T>::buildExpected(OpConfig, Inputs,
+                                                          Rows, Cols, KDim);
 
-    runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
-                 Op.ValidationConfig, Rows, Cols, KDim);
+    runAndVerify(D3DDevice, VerboseLogging, CurOp, Inputs, Expected,
+                 OpConfig.Validation, Rows, Cols, KDim);
   }
 }
 
@@ -535,7 +538,7 @@ class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon {
   BEGIN_TEST_CLASS(DxilConf_SM610_LinearAlgebra)
   TEST_CLASS_PROPERTY("Kits.TestName",
                       "D3D12 - Shader Model 6.10 - Linear Algebra Tests")
-  TEST_CLASS_PROPERTY("Kits.TestId", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+  TEST_CLASS_PROPERTY("Kits.TestId", "f00df946-9877-4453-8844-b1f4c8977953")
   TEST_CLASS_PROPERTY("Kits.Description",
                       "Validates SM 6.10 linear algebra matrix operations")
   TEST_CLASS_PROPERTY(
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
index 37d453ec7f..a4bcdc0097 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
@@ -86,7 +86,8 @@ INPUT_SET(InputSet::Fill, 42)
 INPUT_SET(InputSet::Identity, 1)
 END_INPUT_SETS()
 
-// --- Additional scalar types ---
+// --- Additional scalar types (pre-staged for upcoming SM 6.10 ComponentTypes)
+// ---
 
 BEGIN_INPUT_SETS(int8_t)
 INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
@@ -124,7 +125,7 @@ INPUT_SET(InputSet::Fill, 42)
 INPUT_SET(InputSet::Identity, 1)
 END_INPUT_SETS()
 
-// --- Normalized types (SNorm [-1,1], UNorm [0,1]) ---
+// --- Normalized types (pre-staged for SM 6.10 SNorm/UNorm ComponentTypes) ---
 
 BEGIN_INPUT_SETS(SNormF16_t)
 INPUT_SET(InputSet::Seed, SNormF16_t(HLSLHalf_t(-0.9f)),
@@ -190,7 +191,7 @@ INPUT_SET(InputSet::Fill, UNormF64_t(0.5))
 INPUT_SET(InputSet::Identity, UNormF64_t(1.0))
 END_INPUT_SETS()
 
-// --- FP8 types (packed 4 elements per scalar in HLSL) ---
+// --- FP8 types (pre-staged for SM 6.10 packed ComponentTypes) ---
 
 BEGIN_INPUT_SETS(F8E4M3_t)
 INPUT_SET(InputSet::Seed, F8E4M3_t(1.0f), F8E4M3_t(1.5f), F8E4M3_t(2.0f),
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 5f2c7d01ac..e3e5d8b0fc 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1170,7 +1170,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
       AbsoluteEpsilon +=
           computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
 
-    Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon);
+    Op.Validation = ValidationConfig::Epsilon(AbsoluteEpsilon);
 
     std::vector<T> Expected;
     Expected.push_back(static_cast<T>(DotProduct));
@@ -1683,7 +1683,7 @@ void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging,
     auto Expected = ExpectedBuilder<OP, T>::buildExpected(Op, Inputs);
 
     runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
-                 Op.ValidationConfig);
+                 Op.Validation);
   }
 }
 
@@ -1708,7 +1708,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging,
     auto Expected = ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, WaveSize);
 
     runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
-                 Op.ValidationConfig, AdditionalCompilerOptions);
+                 Op.Validation, AdditionalCompilerOptions);
   }
 }
 

From c847462f631d5ad4b7878a0472d9b5f74a5087fb Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 15:31:16 -0800
Subject: [PATCH 8/9] Set LinearAlgebra test priority to 2 (skip in automation)

SM 6.10 linalg operations are not yet supported in test automation.
Test runners (lit, hcttest.cmd) only run tests with Priority < 2,
so these tests will be skipped until support is available.
See eede01664 for prior art on this pattern.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tools/clang/unittests/HLSLExec/LinearAlgebra.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index 0933d22375..2a7cf7f206 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -544,7 +544,9 @@ class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon {
   TEST_CLASS_PROPERTY(
       "Kits.Specification",
       "Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement")
-  TEST_METHOD_PROPERTY(L"Priority", L"0")
+  // Priority 2: SM 6.10 linalg not yet supported in automation. Test runners
+  // (lit, hcttest.cmd) only run tests with Priority < 2. See eede01664.
+  TEST_METHOD_PROPERTY(L"Priority", L"2")
   END_TEST_CLASS()
 
   TEST_CLASS_SETUP(setupClass) { return LinAlgTestClassCommon::setupClass(); }

From ac7b65c1d7cec677e5782a120ef9159900526296 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alsepkow@microsoft.com>
Date: Wed, 4 Mar 2026 15:42:34 -0800
Subject: [PATCH 9/9] Shader model 6.10

---
 tools/clang/unittests/HLSLExec/LinearAlgebra.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
index 2a7cf7f206..1178ac4f36 100644
--- a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -482,8 +482,8 @@ class LinAlgTestClassCommon {
           L"FailIfRequirementsNotMet", FailIfRequirementsNotMet);
 
       const bool SkipUnsupported = !FailIfRequirementsNotMet;
-      // Linear algebra requires at least SM 6.9 device support.
-      if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9,
+      // Linear algebra requires at least SM 6.10 device support.
+      if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10,
                                   SkipUnsupported)) {
         if (FailIfRequirementsNotMet)
           hlsl_test::LogErrorFmt(
@@ -507,7 +507,7 @@ class LinAlgTestClassCommon {
       hlsl_test::LogCommentFmt(L"Creating device");
 
       const bool SkipUnsupported = false;
-      VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9,
+      VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10,
                                             SkipUnsupported));
     }