diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt
index 8282fd5282..216799a464 100644
--- a/tools/clang/unittests/HLSLExec/CMakeLists.txt
+++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_library(ExecHLSLTests SHARED
   ShaderOpTest.cpp
   TableParameterHandler.cpp
   LongVectors.cpp
+  LinearAlgebra.cpp
   HlslExecTestUtils.cpp
   ExecHLSLTests.rc
   )
diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
new file mode 100644
index 0000000000..37becc8f39
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h
@@ -0,0 +1,606 @@
+#ifndef HLSLTESTDATATYPES_H
+#define HLSLTESTDATATYPES_H
+
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <ostream>
+#include <type_traits>
+
+#include <DirectXMath.h>
+#include <DirectXPackedVector.h>
+
+#include "HlslTestUtils.h"
+#include "dxc/Support/Global.h"
+
+// Shared HLSL type wrappers for use in execution tests.
+// These types bridge the gap between C++ and HLSL type representations.
+namespace HLSLTestDataTypes {
+
+// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
+// Take int32_t as a constructor argument and convert it to bool when needed.
+// Comparisons cast to a bool because we only care if the bool representation is
+// true or false.
+struct HLSLBool_t {
+  HLSLBool_t() : Val(0) {}
+  HLSLBool_t(int32_t Val) : Val(Val) {}
+  HLSLBool_t(bool Val) : Val(Val) {}
+
+  bool operator==(const HLSLBool_t &Other) const {
+    return static_cast<bool>(Val) == static_cast<bool>(Other.Val);
+  }
+
+  bool operator!=(const HLSLBool_t &Other) const {
+    return static_cast<bool>(Val) != static_cast<bool>(Other.Val);
+  }
+
+  bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; }
+
+  bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; }
+
+  bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; }
+
+  bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; }
+
+  HLSLBool_t operator*(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val * Other.Val);
+  }
+
+  HLSLBool_t operator+(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val + Other.Val);
+  }
+
+  HLSLBool_t operator-(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val - Other.Val);
+  }
+
+  HLSLBool_t operator/(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val / Other.Val);
+  }
+
+  HLSLBool_t operator%(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val % Other.Val);
+  }
+
+  HLSLBool_t operator&&(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val && Other.Val);
+  }
+
+  HLSLBool_t operator||(const HLSLBool_t &Other) const {
+    return HLSLBool_t(Val || Other.Val);
+  }
+
+  bool AsBool() const { return static_cast<bool>(Val); }
+
+  operator bool() const { return AsBool(); }
+  operator int16_t() const { return (int16_t)(AsBool()); }
+  operator int32_t() const { return (int32_t)(AsBool()); }
+  operator int64_t() const { return (int64_t)(AsBool()); }
+  operator uint16_t() const { return (uint16_t)(AsBool()); }
+  operator uint32_t() const { return (uint32_t)(AsBool()); }
+  operator uint64_t() const { return (uint64_t)(AsBool()); }
+  operator float() const { return (float)(AsBool()); }
+  operator double() const { return (double)(AsBool()); }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) {
+    Os << static_cast<bool>(Obj.Val);
+    return Os;
+  }
+
+  // So we can construct std::strings using std::ostream
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) {
+    Os << static_cast<bool>(Obj.Val);
+    return Os;
+  }
+
+  int32_t Val = 0;
+};
+
+//  No native float16 type in C++ until C++23 . So we use uint16_t to represent
+//  it. Simple little wrapping struct to help handle the right behavior.
+struct HLSLHalf_t {
+  HLSLHalf_t() : Val(0) {}
+  HLSLHalf_t(const float F) {
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+  HLSLHalf_t(const double D) {
+    float F;
+    if (D >= std::numeric_limits<double>::max())
+      F = std::numeric_limits<float>::max();
+    else if (D <= std::numeric_limits<double>::lowest())
+      F = std::numeric_limits<float>::lowest();
+    else
+      F = static_cast<float>(D);
+
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+  HLSLHalf_t(const uint32_t U) {
+    float F = static_cast<float>(U);
+    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
+  }
+
+  // PackedVector::HALF is a uint16. Make sure we don't ever accidentally
+  // convert one of these to a HLSLHalf_t by arithmetically converting it to a
+  // float.
+  HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
+
+  static double GetULP(HLSLHalf_t A) {
+    DXASSERT(!std::isnan(A) && !std::isinf(A),
+             "ULP of NaN or infinity is undefined");
+
+    HLSLHalf_t Next = A;
+    ++Next.Val;
+
+    double NextD = Next;
+    double AD = A;
+    return NextD - AD;
+  }
+
+  static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) {
+    HLSLHalf_t H;
+    H.Val = Half;
+    return H;
+  }
+
+  // Implicit conversion to float for use with things like std::acos, std::tan,
+  // etc
+  operator float() const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val);
+  }
+
+  bool operator==(const HLSLHalf_t &Other) const {
+    // Convert to floats to properly handle the '0 == -0' case which must
+    // compare to true but have different uint16_t values.
+    // That is, 0 == -0 is true. We store Val as a uint16_t.
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return A == B;
+  }
+
+  bool operator<(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator>(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  // Used by tolerance checks in the tests.
+  bool operator>(float F) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    return A > F;
+  }
+
+  bool operator<(float F) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    return A < F;
+  }
+
+  bool operator<=(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <=
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator>=(const HLSLHalf_t &Other) const {
+    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >=
+           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+  }
+
+  bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; }
+
+  HLSLHalf_t operator*(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B));
+  }
+
+  HLSLHalf_t operator+(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B)));
+  }
+
+  HLSLHalf_t operator-(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B));
+  }
+
+  HLSLHalf_t operator/(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B));
+  }
+
+  HLSLHalf_t operator%(const HLSLHalf_t &Other) const {
+    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
+    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
+    const float C = std::fmod(A, B);
+    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C));
+  }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) {
+    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
+    return Os;
+  }
+
+  // So we can construct std::wstrings using std::wostream
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) {
+    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
+    return Os;
+  }
+
+  // HALF is an alias to uint16_t
+  DirectX::PackedVector::HALF Val = 0;
+};
+
+// Normalized type wrappers for SNorm [-1,1] and UNorm [0,1] interpretations.
+// Thin wrappers over floating-point types to enable type-distinct input sets.
+// The Signed parameter distinguishes SNorm (true) from UNorm (false).
+template <typename BaseT, bool Signed> struct HLSLNorm_t {
+  BaseT Val;
+
+  HLSLNorm_t() : Val() {}
+  HLSLNorm_t(BaseT V) : Val(V) {}
+
+  operator BaseT() const { return Val; }
+
+  HLSLNorm_t operator*(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val * O.Val);
+  }
+  HLSLNorm_t operator+(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val + O.Val);
+  }
+  HLSLNorm_t operator-(const HLSLNorm_t &O) const {
+    return HLSLNorm_t(Val - O.Val);
+  }
+
+  bool operator==(const HLSLNorm_t &O) const { return Val == O.Val; }
+  bool operator!=(const HLSLNorm_t &O) const { return !(Val == O.Val); }
+  bool operator<(const HLSLNorm_t &O) const { return Val < O.Val; }
+  bool operator>(const HLSLNorm_t &O) const { return Val > O.Val; }
+  bool operator<=(const HLSLNorm_t &O) const { return Val <= O.Val; }
+  bool operator>=(const HLSLNorm_t &O) const { return Val >= O.Val; }
+
+  friend std::ostream &operator<<(std::ostream &Os, const HLSLNorm_t &Obj) {
+    Os << Obj.Val;
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const HLSLNorm_t &Obj) {
+    Os << Obj.Val;
+    return Os;
+  }
+};
+
+using SNormF16_t = HLSLNorm_t<HLSLHalf_t, true>;
+using UNormF16_t = HLSLNorm_t<HLSLHalf_t, false>;
+using SNormF32_t = HLSLNorm_t<float, true>;
+using UNormF32_t = HLSLNorm_t<float, false>;
+using SNormF64_t = HLSLNorm_t<double, true>;
+using UNormF64_t = HLSLNorm_t<double, false>;
+
+// FP8 E4M3 type wrapper (1 sign, 4 exponent, 3 mantissa, bias 7).
+// Range: [-448, 448]. No Inf; only NaN (0x7F/0xFF).
+struct F8E4M3_t {
+  uint8_t Val;
+
+  F8E4M3_t() : Val(0) {}
+  F8E4M3_t(float F) { Val = FloatToF8E4M3(F); }
+
+  operator float() const { return F8E4M3ToFloat(Val); }
+
+  F8E4M3_t operator*(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) * float(O));
+  }
+  F8E4M3_t operator+(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) + float(O));
+  }
+  F8E4M3_t operator-(const F8E4M3_t &O) const {
+    return F8E4M3_t(float(*this) - float(O));
+  }
+
+  bool operator==(const F8E4M3_t &O) const { return Val == O.Val; }
+  bool operator!=(const F8E4M3_t &O) const { return Val != O.Val; }
+  bool operator<(const F8E4M3_t &O) const { return float(*this) < float(O); }
+  bool operator>(const F8E4M3_t &O) const { return float(*this) > float(O); }
+  bool operator<=(const F8E4M3_t &O) const { return float(*this) <= float(O); }
+  bool operator>=(const F8E4M3_t &O) const { return float(*this) >= float(O); }
+
+  friend std::ostream &operator<<(std::ostream &Os, const F8E4M3_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const F8E4M3_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+
+private:
+  static float F8E4M3ToFloat(uint8_t V) {
+    uint8_t Sign = (V >> 7) & 1;
+    uint8_t Exp = (V >> 3) & 0xF;
+    uint8_t Mant = V & 0x7;
+
+    if (Exp == 0xF && Mant == 0x7)
+      return std::numeric_limits<float>::quiet_NaN();
+
+    float Result;
+    if (Exp == 0)
+      Result = std::ldexp(static_cast<float>(Mant), -9);
+    else
+      Result = std::ldexp(1.0f + static_cast<float>(Mant) / 8.0f, Exp - 7);
+
+    return Sign ? -Result : Result;
+  }
+
+  static uint8_t FloatToF8E4M3(float F) {
+    if (std::isnan(F))
+      return 0x7F;
+
+    uint8_t Sign = 0;
+    if (F < 0.0f) {
+      Sign = 1;
+      F = -F;
+    }
+
+    if (F == 0.0f)
+      return Sign << 7;
+
+    // Clamp to max representable (E=15, M=6 → 448).
+    if (F >= 448.0f)
+      return (Sign << 7) | (0xF << 3) | 0x6;
+
+    int Exp;
+    float Frac = std::frexp(F, &Exp);
+    int BiasedExp = Exp + 6;
+
+    if (BiasedExp <= 0) {
+      int Mant = static_cast<int>(std::round(F * 512.0f));
+      if (Mant > 7)
+        Mant = 7;
+      if (Mant < 0)
+        Mant = 0;
+      return (Sign << 7) | static_cast<uint8_t>(Mant);
+    }
+
+    float Significand = 2.0f * Frac;
+    int Mant = static_cast<int>(std::round((Significand - 1.0f) * 8.0f));
+
+    if (Mant >= 8) {
+      Mant = 0;
+      BiasedExp++;
+    }
+
+    if (BiasedExp >= 15) {
+      if (BiasedExp > 15 || Mant > 6)
+        return (Sign << 7) | (0xF << 3) | 0x6;
+    }
+
+    return (Sign << 7) | (static_cast<uint8_t>(BiasedExp) << 3) |
+           static_cast<uint8_t>(Mant);
+  }
+};
+
+// FP8 E5M2 type wrapper (1 sign, 5 exponent, 2 mantissa, bias 15).
+// Range: [-57344, 57344]. Has Inf and NaN (like IEEE 754).
+struct F8E5M2_t {
+  uint8_t Val;
+
+  F8E5M2_t() : Val(0) {}
+  F8E5M2_t(float F) { Val = FloatToF8E5M2(F); }
+
+  operator float() const { return F8E5M2ToFloat(Val); }
+
+  F8E5M2_t operator*(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) * float(O));
+  }
+  F8E5M2_t operator+(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) + float(O));
+  }
+  F8E5M2_t operator-(const F8E5M2_t &O) const {
+    return F8E5M2_t(float(*this) - float(O));
+  }
+
+  bool operator==(const F8E5M2_t &O) const { return Val == O.Val; }
+  bool operator!=(const F8E5M2_t &O) const { return Val != O.Val; }
+  bool operator<(const F8E5M2_t &O) const { return float(*this) < float(O); }
+  bool operator>(const F8E5M2_t &O) const { return float(*this) > float(O); }
+  bool operator<=(const F8E5M2_t &O) const { return float(*this) <= float(O); }
+  bool operator>=(const F8E5M2_t &O) const { return float(*this) >= float(O); }
+
+  friend std::ostream &operator<<(std::ostream &Os, const F8E5M2_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+  friend std::wostream &operator<<(std::wostream &Os, const F8E5M2_t &Obj) {
+    Os << float(Obj);
+    return Os;
+  }
+
+private:
+  static float F8E5M2ToFloat(uint8_t V) {
+    uint8_t Sign = (V >> 7) & 1;
+    uint8_t Exp = (V >> 2) & 0x1F;
+    uint8_t Mant = V & 0x3;
+
+    if (Exp == 0x1F) {
+      if (Mant == 0)
+        return Sign ? -std::numeric_limits<float>::infinity()
+                    : std::numeric_limits<float>::infinity();
+      return std::numeric_limits<float>::quiet_NaN();
+    }
+
+    float Result;
+    if (Exp == 0)
+      Result = std::ldexp(static_cast<float>(Mant), -16);
+    else
+      Result = std::ldexp(1.0f + static_cast<float>(Mant) / 4.0f, Exp - 15);
+
+    return Sign ? -Result : Result;
+  }
+
+  static uint8_t FloatToF8E5M2(float F) {
+    if (std::isnan(F))
+      return 0x7F;
+
+    uint8_t Sign = 0;
+    if (F < 0.0f) {
+      Sign = 1;
+      F = -F;
+    }
+
+    if (std::isinf(F))
+      return (Sign << 7) | (0x1F << 2);
+
+    if (F == 0.0f)
+      return Sign << 7;
+
+    // Clamp to max representable (E=30, M=3 → 57344).
+    if (F >= 57344.0f)
+      return (Sign << 7) | (0x1E << 2) | 0x3;
+
+    int Exp;
+    float Frac = std::frexp(F, &Exp);
+    int BiasedExp = Exp + 14;
+
+    if (BiasedExp <= 0) {
+      int Mant = static_cast<int>(std::round(F * 65536.0f));
+      if (Mant > 3)
+        Mant = 3;
+      if (Mant < 0)
+        Mant = 0;
+      return (Sign << 7) | static_cast<uint8_t>(Mant);
+    }
+
+    float Significand = 2.0f * Frac;
+    int Mant = static_cast<int>(std::round((Significand - 1.0f) * 4.0f));
+
+    if (Mant >= 4) {
+      Mant = 0;
+      BiasedExp++;
+    }
+
+    if (BiasedExp >= 31)
+      return (Sign << 7) | (0x1F << 2);
+
+    return (Sign << 7) | (static_cast<uint8_t>(BiasedExp) << 2) |
+           static_cast<uint8_t>(Mant);
+  }
+};
+
+//
+// Shared type traits and validation infrastructure.
+//
+
+template <typename T> constexpr bool isFloatingPointType() {
+  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
+         std::is_same_v<T, HLSLHalf_t> || std::is_same_v<T, SNormF16_t> ||
+         std::is_same_v<T, UNormF16_t> || std::is_same_v<T, SNormF32_t> ||
+         std::is_same_v<T, UNormF32_t> || std::is_same_v<T, SNormF64_t> ||
+         std::is_same_v<T, UNormF64_t> || std::is_same_v<T, F8E4M3_t> ||
+         std::is_same_v<T, F8E5M2_t>;
+}
+
+enum class ValidationType {
+  Epsilon,
+  Ulp,
+};
+
+struct ValidationConfig {
+  double Tolerance = 0.0;
+  ValidationType Type = ValidationType::Epsilon;
+
+  static ValidationConfig Epsilon(double Tolerance) {
+    return ValidationConfig{Tolerance, ValidationType::Epsilon};
+  }
+
+  static ValidationConfig Ulp(double Tolerance) {
+    return ValidationConfig{Tolerance, ValidationType::Ulp};
+  }
+};
+
+// Default validation: ULP for floating point, exact for integers.
+template <typename T> struct DefaultValidation {
+  ValidationConfig Validation;
+
+  DefaultValidation() {
+    if constexpr (isFloatingPointType<T>())
+      Validation = ValidationConfig::Ulp(1.0f);
+  }
+};
+
+// Strict validation: exact match by default.
+struct StrictValidation {
+  ValidationConfig Validation;
+};
+
+//
+// Value comparison overloads used by both LongVector and LinearAlgebra tests.
+//
+
+template <typename T>
+inline bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
+  if (Tolerance == 0.0)
+    return A == B;
+
+  T Diff = A > B ? A - B : B - A;
+  return Diff <= Tolerance;
+}
+
+inline bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) {
+  return A == B;
+}
+
+inline bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
+  case ValidationType::Ulp:
+    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
+inline bool doValuesMatch(float A, float B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
+  case ValidationType::Ulp: {
+    const int IntTolerance = static_cast<int>(Tolerance);
+    return CompareFloatULP(A, B, IntTolerance);
+  }
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
+inline bool doValuesMatch(double A, double B, double Tolerance,
+                          ValidationType VType) {
+  switch (VType) {
+  case ValidationType::Epsilon:
+    return CompareDoubleEpsilon(A, B, Tolerance);
+  case ValidationType::Ulp: {
+    const int64_t IntTolerance = static_cast<int64_t>(Tolerance);
+    return CompareDoubleULP(A, B, IntTolerance);
+  }
+  default:
+    hlsl_test::LogErrorFmt(
+        L"Invalid ValidationType. Expecting Epsilon or ULP.");
+    return false;
+  }
+}
+
+} // namespace HLSLTestDataTypes
+
+#endif // HLSLTESTDATATYPES_H
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
new file mode 100644
index 0000000000..1178ac4f36
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp
@@ -0,0 +1,576 @@
+#ifndef NOMINMAX
+#define NOMINMAX 1
+#endif
+
+#define INLINE_TEST_METHOD_MARKUP
+#include <WexTestClass.h>
+
+#include "LinearAlgebraTestData.h"
+
+#include "ShaderOpTest.h"
+#include "dxc/Support/Global.h"
+
+#include "HlslTestUtils.h"
+
+#include "HlslExecTestUtils.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+namespace LinearAlgebra {
+
+//
+// Operation Types
+//
+
+enum class OpType : unsigned {
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) SYMBOL,
+#include "LinearAlgebraOps.def"
+  NumOpTypes
+};
+
+struct Operation {
+  size_t Arity;
+  const char *Define;
+  const char *ShaderName;
+  InputSet InputSets[2];
+  OpType Type;
+};
+
+static constexpr Operation Operations[] = {
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)       \
+  {ARITY,                                                                      \
+   DEFINE,                                                                     \
+   SHADER_NAME,                                                                \
+   {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2},                             \
+   OpType::SYMBOL},
+#include "LinearAlgebraOps.def"
+};
+
+constexpr const Operation &getOperation(OpType Op) {
+  if (Op < OpType::NumOpTypes)
+    return Operations[unsigned(Op)];
+  std::abort();
+}
+
+//
+// Data Types
+//
+
+struct DataType {
+  const char *HLSLTypeString;
+  const char *CompTypeString;
+  bool Is16Bit;
+  size_t HLSLSizeInBytes;
+};
+
+template <typename T> const DataType &getDataType() {
+  static_assert(sizeof(T) == 0, "Unknown data type");
+}
+
+#define DATA_TYPE(TYPE, HLSL_STRING, COMP_TYPE, HLSL_SIZE, IS_16BIT)           \
+  template <> const DataType &getDataType<TYPE>() {                            \
+    static DataType DT{HLSL_STRING, COMP_TYPE, IS_16BIT, HLSL_SIZE};           \
+    return DT;                                                                 \
+  }
+
+DATA_TYPE(HLSLHalf_t, "float16_t", "ComponentType::F16", 2, true)
+DATA_TYPE(float, "float", "ComponentType::F32", 4, false)
+DATA_TYPE(double, "double", "ComponentType::F64", 8, false)
+DATA_TYPE(int32_t, "int", "ComponentType::I32", 4, false)
+DATA_TYPE(uint32_t, "uint", "ComponentType::U32", 4, false)
+
+#undef DATA_TYPE
+
+using HLSLTestDataTypes::DefaultValidation;
+using HLSLTestDataTypes::doValuesMatch;
+using HLSLTestDataTypes::HLSLHalf_t;
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::ValidationType;
+
+template <typename T>
+bool doMatricesMatch(const std::vector<T> &Actual,
+                     const std::vector<T> &Expected, size_t M, size_t N,
+                     const ValidationConfig &Config, bool VerboseLogging) {
+  DXASSERT(Actual.size() == Expected.size(),
+           "Actual and Expected must be the same size");
+
+  if (VerboseLogging)
+    hlsl_test::LogCommentFmt(L"Verifying %zux%zu matrix (%zu elements)", M, N,
+                             Actual.size());
+
+  std::vector<size_t> MismatchedIndexes;
+  for (size_t I = 0; I < Actual.size(); I++) {
+    if (!doValuesMatch(Actual[I], Expected[I], Config.Tolerance, Config.Type))
+      MismatchedIndexes.push_back(I);
+  }
+
+  if (MismatchedIndexes.empty())
+    return true;
+
+  for (size_t Index : MismatchedIndexes) {
+    std::wstringstream Wss(L"");
+    Wss << std::setprecision(15);
+    // Assumes row-major layout for (row,col) decomposition.
+    Wss << L"Mismatch at (" << Index / N << L"," << Index % N << L")";
+    Wss << L" Actual:" << Actual[Index];
+    Wss << L" Expected:" << Expected[Index];
+    hlsl_test::LogErrorFmt(Wss.str().c_str());
+  }
+
+  return false;
+}
+
+//
+// Matrix dimensions for test iteration.
+//
+
+struct MatrixDims {
+  size_t Rows;
+  size_t Cols;
+};
+
+std::vector<MatrixDims> getMatrixSizesToTest() {
+  return {{2, 2}, {4, 4}, {4, 8}, {8, 4}, {8, 8}};
+}
+
+//
+// Build compiler options.
+//
+
+std::string getCompilerOptionsString(const Operation &Op,
+                                     const DataType &ElemType, size_t Rows,
+                                     size_t Cols, size_t KDim = 0) {
+  std::stringstream Options;
+
+  if (ElemType.Is16Bit)
+    Options << " -enable-16bit-types";
+
+  Options << " -D" << Op.Define;
+  Options << " -DELEM_TYPE=" << ElemType.HLSLTypeString;
+  Options << " -DOUT_TYPE=" << ElemType.HLSLTypeString;
+  Options << " -DCOMP_TYPE=" << ElemType.CompTypeString;
+  Options << " -DROWS=" << Rows;
+  Options << " -DCOLS=" << Cols;
+
+  if (KDim > 0)
+    Options << " -DK_DIM=" << KDim;
+
+  Options << " -DMATRIX_LAYOUT=0"; // 0 = RowMajor, 1 = ColMajor
+
+  return Options.str();
+}
+
+//
+// Shader buffer helpers.
+//
+
+template <typename T>
+void fillShaderBuffer(std::vector<BYTE> &ShaderBuffer,
+                      const std::vector<T> &Data) {
+  const size_t DataSize = sizeof(T) * Data.size();
+  DXASSERT_NOMSG(ShaderBuffer.size() >= DataSize);
+
+  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
+    auto *Ptr =
+        reinterpret_cast<DirectX::PackedVector::HALF *>(ShaderBuffer.data());
+    for (size_t I = 0; I < Data.size(); I++)
+      Ptr[I] = Data[I].Val;
+    return;
+  }
+
+  auto *Ptr = reinterpret_cast<T *>(ShaderBuffer.data());
+  for (size_t I = 0; I < Data.size(); I++)
+    Ptr[I] = Data[I];
+}
+
+template <typename T>
+void readShaderBuffer(const MappedData &ShaderBuffer, std::vector<T> &OutData,
+                      size_t NumElements) {
+  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
+    auto *Ptr =
+        static_cast<const DirectX::PackedVector::HALF *>(ShaderBuffer.data());
+    for (size_t I = 0; I < NumElements; I++)
+      OutData.push_back(HLSLHalf_t::FromHALF(Ptr[I]));
+    return;
+  }
+
+  auto *Ptr = static_cast<const T *>(ShaderBuffer.data());
+  for (size_t I = 0; I < NumElements; I++)
+    OutData.push_back(Ptr[I]);
+}
+
+//
+// Input building helpers. Following LongVector::buildTestInput pattern.
+//
+
+template <typename T> using InputSets = std::vector<std::vector<T>>;
+
+template <typename T>
+std::vector<T> buildTestInput(InputSet Set, size_t NumElements) {
+  const std::vector<T> &RawData = getInputSet<T>(Set);
+
+  std::vector<T> Result;
+  Result.reserve(NumElements);
+  for (size_t I = 0; I < NumElements; ++I)
+    Result.push_back(RawData[I % RawData.size()]);
+
+  return Result;
+}
+
+// Build an identity matrix of the given dimensions using the Identity InputSet
+// for the diagonal value.
+template <typename T>
+std::vector<T> buildIdentityMatrix(size_t Rows, size_t Cols) {
+  const T One = getInputSet<T>(InputSet::Identity)[0];
+  const T Zero = One - One;
+  std::vector<T> Result(Rows * Cols, Zero);
+  size_t MinDim = Rows < Cols ? Rows : Cols;
+  for (size_t I = 0; I < MinDim; ++I)
+    Result[I * Cols + I] = One;
+  return Result;
+}
+
+template <typename T>
+InputSets<T> buildTestInputs(const Operation &Op, size_t Rows, size_t Cols,
+                             size_t KDim) {
+  InputSets<T> Inputs;
+  const size_t NumElements = Rows * Cols;
+
+  if (Op.Arity >= 1)
+    Inputs.push_back(buildTestInput<T>(Op.InputSets[0], NumElements));
+
+  if (Op.Arity >= 2) {
+    // For binary ops the second input may be an identity matrix.
+    if (Op.InputSets[1] == InputSet::Identity)
+      Inputs.push_back(buildIdentityMatrix<T>(KDim, Cols));
+    else
+      Inputs.push_back(buildTestInput<T>(Op.InputSets[1], KDim * Cols));
+  }
+
+  return Inputs;
+}
+
+//
+// Core GPU test runner. Returns the output buffer or nullopt if skipped.
+//
+
+template <typename T>
+std::optional<std::vector<T>>
+runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Op,
+              const InputSets<T> &Inputs, size_t Rows, size_t Cols, size_t KDim,
+              size_t ExpectedOutputSize) {
+
+  const DataType &ElemType = getDataType<T>();
+
+  std::string CompilerOptions =
+      getCompilerOptionsString(Op, ElemType, Rows, Cols, KDim);
+
+  if (VerboseLogging)
+    hlsl_test::LogCommentFmt(L"Compiler Options: %S", CompilerOptions.c_str());
+
+  dxc::SpecificDllLoader DxilDllLoader;
+  CComPtr<IStream> TestXML;
+  readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader);
+  auto ShaderOpSet = std::make_shared<st::ShaderOpSet>();
+  st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get());
+
+  std::shared_ptr<st::ShaderOpTestResult> TestResult =
+      st::RunShaderOpTestAfterParse(
+          D3DDevice, DxilDllLoader, Op.ShaderName,
+          [&](LPCSTR Name, std::vector<BYTE> &ShaderData,
+              st::ShaderOp *ShaderOp) {
+            if (VerboseLogging)
+              hlsl_test::LogCommentFmt(
+                  L"LinAlg RunShaderOpTest CallBack. Resource Name: %S", Name);
+
+            if (_stricmp(Name, "OutputMatrix") == 0) {
+              ShaderOp->Shaders.at(0).Arguments = CompilerOptions.c_str();
+              return;
+            }
+
+            for (size_t I = 0; I < 2; ++I) {
+              std::string BufferName = "InputMatrix";
+              BufferName += (char)('1' + I);
+              if (_stricmp(Name, BufferName.c_str()) == 0) {
+                if (I < Inputs.size() && !Inputs[I].empty())
+                  fillShaderBuffer(ShaderData, Inputs[I]);
+                return;
+              }
+            }
+
+            LOG_ERROR_FMT_THROW(
+                L"LinAlg RunShaderOpTest CallBack. Unexpected Resource: %S",
+                Name);
+          },
+          std::move(ShaderOpSet));
+
+  MappedData ShaderOutData;
+  TestResult->Test->GetReadBackData("OutputMatrix", &ShaderOutData);
+
+  std::vector<T> OutData;
+  readShaderBuffer(ShaderOutData, OutData, ExpectedOutputSize);
+
+  return OutData;
+}
+
+//
+// runAndVerify - runs the GPU test and verifies results.
+//
+
+template <typename T>
+void runAndVerify(ID3D12Device *D3DDevice, bool VerboseLogging,
+                  const Operation &Op, const InputSets<T> &Inputs,
+                  const std::vector<T> &Expected,
+                  const ValidationConfig &Config, size_t Rows, size_t Cols,
+                  size_t KDim) {
+
+  auto Actual = runLinAlgTest<T>(D3DDevice, VerboseLogging, Op, Inputs, Rows,
+                                 Cols, KDim, Expected.size());
+
+  if (!Actual) {
+    hlsl_test::LogCommentFmt(L"Test was skipped.");
+    return;
+  }
+
+  VERIFY_IS_TRUE(
+      doMatricesMatch(*Actual, Expected, Rows, Cols, Config, VerboseLogging));
+}
+
+//
+// Op definitions. Each op carries a ValidationConfig.
+// Specializations are expected to have a ValidationConfig member.
+//
+
+template <OpType OP, typename T> struct Op;
+
+// ExpectedBuilder - specializations compute expected output from inputs.
+template <OpType OP, typename T> struct ExpectedBuilder;
+
+// FillMatrix: splat a scalar value across the entire matrix.
+template <typename T> struct Op<OpType::FillMatrix, T> : StrictValidation {};
+
+template <typename T> struct ExpectedBuilder<OpType::FillMatrix, T> {
+  static std::vector<T> buildExpected(Op<OpType::FillMatrix, T> &,
+                                      const InputSets<T> &, size_t Rows,
+                                      size_t Cols, size_t) {
+    const T FillVal = getInputSet<T>(InputSet::Fill)[0];
+    return std::vector<T>(Rows * Cols, FillVal);
+  }
+
+  // FillMatrix input is special: just the scalar fill value.
+  static InputSets<T> buildInputs(const Operation &, size_t, size_t, size_t) {
+    return {{getInputSet<T>(InputSet::Fill)[0]}};
+  }
+};
+
+// MatrixStore: load and store round-trip.
+template <typename T>
+struct Op<OpType::MatrixStore, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixStore, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixStore, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    return Inputs[0];
+  }
+};
+
+// MatrixAccumulate: accumulate into zero-initialized output.
+template <typename T>
+struct Op<OpType::MatrixAccumulate, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixAccumulate, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixAccumulate, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    return Inputs[0];
+  }
+};
+
+// MatrixMul: multiply input matrix by identity.
+template <typename T> struct Op<OpType::MatrixMul, T> : DefaultValidation<T> {};
+
+template <typename T> struct ExpectedBuilder<OpType::MatrixMul, T> {
+  static std::vector<T> buildExpected(Op<OpType::MatrixMul, T> &,
+                                      const InputSets<T> &Inputs, size_t,
+                                      size_t, size_t) {
+    // Multiplying by identity: result should equal Input1.
+    return Inputs[0];
+  }
+};
+
+//
+// dispatchTest - orchestrates building inputs, computing expected results,
+// and running the test across multiple matrix sizes.
+// Follows the same pattern as LongVector::dispatchTest.
+//
+
+template <typename T, OpType OP>
+void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging) {
+
+  const std::vector<MatrixDims> Sizes = getMatrixSizesToTest();
+  constexpr const Operation &CurOp = getOperation(OP);
+  Op<OP, T> OpConfig;
+
+  for (const MatrixDims &Dims : Sizes) {
+    const size_t Rows = Dims.Rows;
+    const size_t Cols = Dims.Cols;
+    // TODO: K dimension currently equals Cols for simplicity (square inner
+    // dimension). Add non-square K sizes for better multiply coverage.
+    const size_t KDim = (CurOp.Arity >= 2) ? Cols : 0;
+
+    // FillMatrix has special input handling (scalar, not a matrix).
+    InputSets<T> Inputs;
+    if constexpr (OP == OpType::FillMatrix)
+      Inputs = ExpectedBuilder<OP, T>::buildInputs(CurOp, Rows, Cols, KDim);
+    else
+      Inputs = buildTestInputs<T>(CurOp, Rows, Cols, KDim);
+
+    auto Expected = ExpectedBuilder<OP, T>::buildExpected(OpConfig, Inputs,
+                                                          Rows, Cols, KDim);
+
+    runAndVerify(D3DDevice, VerboseLogging, CurOp, Inputs, Expected,
+                 OpConfig.Validation, Rows, Cols, KDim);
+  }
+}
+
+} // namespace LinearAlgebra
+
+using namespace LinearAlgebra;
+
+//
+// TAEF test entry point macro.
+//
+#define LINALG_TEST(Op, DataType)                                              \
+  TEST_METHOD(Op##_##DataType) { runTest<DataType, OpType::Op>(); }
+
+//
+// Common test class for linear algebra tests.
+// Follows the same pattern as LongVector::TestClassCommon.
+//
+class LinAlgTestClassCommon {
+public:
+  bool setupClass() {
+    WEX::TestExecution::SetVerifyOutput verifySettings(
+        WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+
+    if (!Initialized) {
+      Initialized = true;
+
+      D3D12SDK = D3D12SDKSelector();
+
+      WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging",
+                                                         VerboseLogging);
+      if (VerboseLogging)
+        hlsl_test::LogCommentFmt(L"Verbose logging is enabled for this test.");
+      else
+        hlsl_test::LogCommentFmt(L"Verbose logging is disabled for this test.");
+
+      bool FailIfRequirementsNotMet = false;
+#ifdef _HLK_CONF
+      FailIfRequirementsNotMet = true;
+#endif
+      WEX::TestExecution::RuntimeParameters::TryGetValue(
+          L"FailIfRequirementsNotMet", FailIfRequirementsNotMet);
+
+      const bool SkipUnsupported = !FailIfRequirementsNotMet;
+      // Linear algebra requires at least SM 6.10 device support.
+      if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10,
+                                  SkipUnsupported)) {
+        if (FailIfRequirementsNotMet)
+          hlsl_test::LogErrorFmt(
+              L"Device Creation failed, resulting in test failure, since "
+              L"FailIfRequirementsNotMet is set.");
+
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool setupMethod() {
+    if (D3DDevice && D3DDevice->GetDeviceRemovedReason() != S_OK) {
+      hlsl_test::LogCommentFmt(L"Device was lost!");
+      D3DDevice.Release();
+    }
+
+    if (!D3DDevice) {
+      hlsl_test::LogCommentFmt(L"Creating device");
+
+      const bool SkipUnsupported = false;
+      VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10,
+                                            SkipUnsupported));
+    }
+
+    return true;
+  }
+
+  template <typename T, OpType OP> void runTest() {
+    WEX::TestExecution::SetVerifyOutput verifySettings(
+        WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+
+    dispatchTest<T, OP>(D3DDevice, VerboseLogging);
+  }
+
+protected:
+  CComPtr<ID3D12Device> D3DDevice;
+
+private:
+  bool Initialized = false;
+  std::optional<D3D12SDKSelector> D3D12SDK;
+  bool VerboseLogging = false;
+};
+
+//
+// TAEF Test Class
+//
+class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon {
+public:
+  BEGIN_TEST_CLASS(DxilConf_SM610_LinearAlgebra)
+  TEST_CLASS_PROPERTY("Kits.TestName",
+                      "D3D12 - Shader Model 6.10 - Linear Algebra Tests")
+  TEST_CLASS_PROPERTY("Kits.TestId", "f00df946-9877-4453-8844-b1f4c8977953")
+  TEST_CLASS_PROPERTY("Kits.Description",
+                      "Validates SM 6.10 linear algebra matrix operations")
+  TEST_CLASS_PROPERTY(
+      "Kits.Specification",
+      "Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement")
+  // Priority 2: SM 6.10 linalg not yet supported in automation. Test runners
+  // (lit, hcttest.cmd) only run tests with Priority < 2. See eede01664.
+  TEST_METHOD_PROPERTY(L"Priority", L"2")
+  END_TEST_CLASS()
+
+  TEST_CLASS_SETUP(setupClass) { return LinAlgTestClassCommon::setupClass(); }
+  TEST_METHOD_SETUP(setupMethod) {
+    return LinAlgTestClassCommon::setupMethod();
+  }
+
+  // FillMatrix (Splat)
+  LINALG_TEST(FillMatrix, float);
+  LINALG_TEST(FillMatrix, HLSLHalf_t);
+  LINALG_TEST(FillMatrix, int32_t);
+  LINALG_TEST(FillMatrix, uint32_t);
+
+  // MatrixStore (Load + Store round-trip)
+  LINALG_TEST(MatrixStore, float);
+  LINALG_TEST(MatrixStore, HLSLHalf_t);
+  LINALG_TEST(MatrixStore, int32_t);
+  LINALG_TEST(MatrixStore, uint32_t);
+
+  // MatrixAccumulate (InterlockedAccumulate)
+  LINALG_TEST(MatrixAccumulate, float);
+  LINALG_TEST(MatrixAccumulate, HLSLHalf_t);
+
+  // MatrixMul (Multiply)
+  LINALG_TEST(MatrixMul, float);
+  LINALG_TEST(MatrixMul, HLSLHalf_t);
+};
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
new file mode 100644
index 0000000000..9773eba608
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def
@@ -0,0 +1,26 @@
+//
+// LinearAlgebraOps.def - X-Macro definitions for linear algebra operations
+//
+// This file defines the operations tested by the LinearAlgebra execution tests.
+// Each OP entry maps an operation symbol to its preprocessor define, arity
+// (number of input matrices), the ShaderOp name, and which InputSets to use.
+//
+// OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)
+//   SYMBOL      - C++ enum name for the operation
+//   ARITY       - Number of input matrices required (0 = scalar-only, 1, 2)
+//   DEFINE      - Preprocessor define passed to the shader to select the op
+//   SHADER_NAME - Name of the ShaderOp in ShaderOpArith.xml
+//   INPUT_SET_1 - InputSet for first matrix (or Fill for scalar ops)
+//   INPUT_SET_2 - InputSet for second matrix (or unused placeholder)
+//
+
+#ifndef OP
+#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2)
+#endif
+
+OP(FillMatrix, 0, "FUNC_FILL_MATRIX", "LinAlgOp", Fill, Fill)
+OP(MatrixStore, 1, "FUNC_MATRIX_STORE", "LinAlgOp", Seed, Seed)
+OP(MatrixAccumulate, 1, "FUNC_MATRIX_ACCUMULATE", "LinAlgOp", Seed, Seed)
+OP(MatrixMul, 2, "FUNC_MATRIX_MUL", "LinAlgOp", Seed, Identity)
+
+#undef OP
diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
new file mode 100644
index 0000000000..a4bcdc0097
--- /dev/null
+++ b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h
@@ -0,0 +1,220 @@
+#ifndef LINEARALGEBRATESTDATA_H
+#define LINEARALGEBRATESTDATA_H
+
+#include <vector>
+
+#include <Verify.h>
+
+#include "HLSLTestDataTypes.h"
+
+namespace LinearAlgebra {
+
+//
+// Input data sets for matrix operations.
+// Follows the same InputSet / BEGIN_INPUT_SETS pattern as LongVectorTestData.h.
+//
+
+enum class InputSet { Seed, Fill, Identity };
+
+template <typename T> const std::vector<T> &getInputSet(InputSet InputSet) {
+  static_assert(sizeof(T) == 0, "No InputSet for this type");
+}
+
+#define BEGIN_INPUT_SETS(TYPE)                                                 \
+  template <>                                                                  \
+  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {       \
+    using T = TYPE;                                                            \
+    switch (InputSet) {
+
+#define INPUT_SET(SET, ...)                                                    \
+  case SET: {                                                                  \
+    static std::vector<T> Data = {__VA_ARGS__};                                \
+    return Data;                                                               \
+  }
+
+#define END_INPUT_SETS()                                                       \
+  default:                                                                     \
+    break;                                                                     \
+    }                                                                          \
+    VERIFY_FAIL("Missing input set");                                          \
+    std::abort();                                                              \
+    }
+
+using HLSLTestDataTypes::F8E4M3_t;
+using HLSLTestDataTypes::F8E5M2_t;
+using HLSLTestDataTypes::HLSLHalf_t;
+using HLSLTestDataTypes::SNormF16_t;
+using HLSLTestDataTypes::SNormF32_t;
+using HLSLTestDataTypes::SNormF64_t;
+using HLSLTestDataTypes::UNormF16_t;
+using HLSLTestDataTypes::UNormF32_t;
+using HLSLTestDataTypes::UNormF64_t;
+
+BEGIN_INPUT_SETS(HLSLHalf_t)
+INPUT_SET(InputSet::Seed, HLSLHalf_t(1.0f), HLSLHalf_t(2.0f), HLSLHalf_t(3.0f),
+          HLSLHalf_t(4.0f), HLSLHalf_t(5.0f), HLSLHalf_t(6.0f),
+          HLSLHalf_t(7.0f), HLSLHalf_t(8.0f), HLSLHalf_t(9.0f),
+          HLSLHalf_t(10.0f), HLSLHalf_t(11.0f), HLSLHalf_t(12.0f),
+          HLSLHalf_t(13.0f), HLSLHalf_t(14.0f))
+INPUT_SET(InputSet::Fill, HLSLHalf_t(42.0f))
+INPUT_SET(InputSet::Identity, HLSLHalf_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(float)
+INPUT_SET(InputSet::Seed, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+          10.0f, 11.0f, 12.0f, 13.0f, 14.0f)
+INPUT_SET(InputSet::Fill, 42.0f)
+INPUT_SET(InputSet::Identity, 1.0f)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(double)
+INPUT_SET(InputSet::Seed, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+          11.0, 12.0, 13.0, 14.0)
+INPUT_SET(InputSet::Fill, 42.0)
+INPUT_SET(InputSet::Identity, 1.0)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int32_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint32_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+// --- Additional scalar types (pre-staged for upcoming SM 6.10 ComponentTypes)
+// ---
+
+BEGIN_INPUT_SETS(int8_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint8_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int16_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint16_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(int64_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(uint64_t)
+INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)
+INPUT_SET(InputSet::Fill, 42)
+INPUT_SET(InputSet::Identity, 1)
+END_INPUT_SETS()
+
+// --- Normalized types (pre-staged for SM 6.10 SNorm/UNorm ComponentTypes) ---
+
+BEGIN_INPUT_SETS(SNormF16_t)
+INPUT_SET(InputSet::Seed, SNormF16_t(HLSLHalf_t(-0.9f)),
+          SNormF16_t(HLSLHalf_t(-0.7f)), SNormF16_t(HLSLHalf_t(-0.5f)),
+          SNormF16_t(HLSLHalf_t(-0.3f)), SNormF16_t(HLSLHalf_t(-0.1f)),
+          SNormF16_t(HLSLHalf_t(0.1f)), SNormF16_t(HLSLHalf_t(0.2f)),
+          SNormF16_t(HLSLHalf_t(0.3f)), SNormF16_t(HLSLHalf_t(0.4f)),
+          SNormF16_t(HLSLHalf_t(0.5f)), SNormF16_t(HLSLHalf_t(0.6f)),
+          SNormF16_t(HLSLHalf_t(0.7f)), SNormF16_t(HLSLHalf_t(0.8f)),
+          SNormF16_t(HLSLHalf_t(0.9f)))
+INPUT_SET(InputSet::Fill, SNormF16_t(HLSLHalf_t(0.5f)))
+INPUT_SET(InputSet::Identity, SNormF16_t(HLSLHalf_t(1.0f)))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF16_t)
+INPUT_SET(InputSet::Seed, UNormF16_t(HLSLHalf_t(0.05f)),
+          UNormF16_t(HLSLHalf_t(0.1f)), UNormF16_t(HLSLHalf_t(0.15f)),
+          UNormF16_t(HLSLHalf_t(0.2f)), UNormF16_t(HLSLHalf_t(0.25f)),
+          UNormF16_t(HLSLHalf_t(0.3f)), UNormF16_t(HLSLHalf_t(0.35f)),
+          UNormF16_t(HLSLHalf_t(0.4f)), UNormF16_t(HLSLHalf_t(0.45f)),
+          UNormF16_t(HLSLHalf_t(0.5f)), UNormF16_t(HLSLHalf_t(0.55f)),
+          UNormF16_t(HLSLHalf_t(0.6f)), UNormF16_t(HLSLHalf_t(0.7f)),
+          UNormF16_t(HLSLHalf_t(0.8f)))
+INPUT_SET(InputSet::Fill, UNormF16_t(HLSLHalf_t(0.5f)))
+INPUT_SET(InputSet::Identity, UNormF16_t(HLSLHalf_t(1.0f)))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(SNormF32_t)
+INPUT_SET(InputSet::Seed, SNormF32_t(-0.9f), SNormF32_t(-0.7f),
+          SNormF32_t(-0.5f), SNormF32_t(-0.3f), SNormF32_t(-0.1f),
+          SNormF32_t(0.1f), SNormF32_t(0.2f), SNormF32_t(0.3f),
+          SNormF32_t(0.4f), SNormF32_t(0.5f), SNormF32_t(0.6f),
+          SNormF32_t(0.7f), SNormF32_t(0.8f), SNormF32_t(0.9f))
+INPUT_SET(InputSet::Fill, SNormF32_t(0.5f))
+INPUT_SET(InputSet::Identity, SNormF32_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF32_t)
+INPUT_SET(InputSet::Seed, UNormF32_t(0.05f), UNormF32_t(0.1f),
+          UNormF32_t(0.15f), UNormF32_t(0.2f), UNormF32_t(0.25f),
+          UNormF32_t(0.3f), UNormF32_t(0.35f), UNormF32_t(0.4f),
+          UNormF32_t(0.45f), UNormF32_t(0.5f), UNormF32_t(0.55f),
+          UNormF32_t(0.6f), UNormF32_t(0.7f), UNormF32_t(0.8f))
+INPUT_SET(InputSet::Fill, UNormF32_t(0.5f))
+INPUT_SET(InputSet::Identity, UNormF32_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(SNormF64_t)
+INPUT_SET(InputSet::Seed, SNormF64_t(-0.9), SNormF64_t(-0.7), SNormF64_t(-0.5),
+          SNormF64_t(-0.3), SNormF64_t(-0.1), SNormF64_t(0.1), SNormF64_t(0.2),
+          SNormF64_t(0.3), SNormF64_t(0.4), SNormF64_t(0.5), SNormF64_t(0.6),
+          SNormF64_t(0.7), SNormF64_t(0.8), SNormF64_t(0.9))
+INPUT_SET(InputSet::Fill, SNormF64_t(0.5))
+INPUT_SET(InputSet::Identity, SNormF64_t(1.0))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(UNormF64_t)
+INPUT_SET(InputSet::Seed, UNormF64_t(0.05), UNormF64_t(0.1), UNormF64_t(0.15),
+          UNormF64_t(0.2), UNormF64_t(0.25), UNormF64_t(0.3), UNormF64_t(0.35),
+          UNormF64_t(0.4), UNormF64_t(0.45), UNormF64_t(0.5), UNormF64_t(0.55),
+          UNormF64_t(0.6), UNormF64_t(0.7), UNormF64_t(0.8))
+INPUT_SET(InputSet::Fill, UNormF64_t(0.5))
+INPUT_SET(InputSet::Identity, UNormF64_t(1.0))
+END_INPUT_SETS()
+
+// --- FP8 types (pre-staged for SM 6.10 packed ComponentTypes) ---
+
+BEGIN_INPUT_SETS(F8E4M3_t)
+INPUT_SET(InputSet::Seed, F8E4M3_t(1.0f), F8E4M3_t(1.5f), F8E4M3_t(2.0f),
+          F8E4M3_t(2.5f), F8E4M3_t(3.0f), F8E4M3_t(4.0f), F8E4M3_t(5.0f),
+          F8E4M3_t(6.0f), F8E4M3_t(7.0f), F8E4M3_t(8.0f), F8E4M3_t(0.5f),
+          F8E4M3_t(0.25f), F8E4M3_t(0.75f), F8E4M3_t(10.0f))
+INPUT_SET(InputSet::Fill, F8E4M3_t(2.0f))
+INPUT_SET(InputSet::Identity, F8E4M3_t(1.0f))
+END_INPUT_SETS()
+
+BEGIN_INPUT_SETS(F8E5M2_t)
+INPUT_SET(InputSet::Seed, F8E5M2_t(1.0f), F8E5M2_t(1.5f), F8E5M2_t(2.0f),
+          F8E5M2_t(3.0f), F8E5M2_t(4.0f), F8E5M2_t(5.0f), F8E5M2_t(6.0f),
+          F8E5M2_t(7.0f), F8E5M2_t(8.0f), F8E5M2_t(0.5f), F8E5M2_t(0.25f),
+          F8E5M2_t(0.75f), F8E5M2_t(10.0f), F8E5M2_t(12.0f))
+INPUT_SET(InputSet::Fill, F8E5M2_t(2.0f))
+INPUT_SET(InputSet::Identity, F8E5M2_t(1.0f))
+END_INPUT_SETS()
+
+#undef BEGIN_INPUT_SETS
+#undef INPUT_SET
+#undef END_INPUT_SETS
+
+} // namespace LinearAlgebra
+
+#endif // LINEARALGEBRATESTDATA_H
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index 4126d861ac..cd58e05814 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -5,237 +5,16 @@
 
 #include <limits>
 #include <map>
-#include <ostream>
 #include <string>
 #include <vector>
 
-#include <DirectXMath.h>
-#include <DirectXPackedVector.h>
-
-#include "dxc/Support/Global.h"
+#include "HLSLTestDataTypes.h"
 
 namespace LongVector {
 
-// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
-// Take int32_t as a constuctor argument and convert it to bool when needed.
-// Comparisons cast to a bool because we only care if the bool representation is
-// true or false.
-struct HLSLBool_t {
-  HLSLBool_t() : Val(0) {}
-  HLSLBool_t(int32_t Val) : Val(Val) {}
-  HLSLBool_t(bool Val) : Val(Val) {}
-
-  bool operator==(const HLSLBool_t &Other) const {
-    return static_cast<bool>(Val) == static_cast<bool>(Other.Val);
-  }
-
-  bool operator!=(const HLSLBool_t &Other) const {
-    return static_cast<bool>(Val) != static_cast<bool>(Other.Val);
-  }
-
-  bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; }
-
-  bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; }
-
-  bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; }
-
-  bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; }
-
-  HLSLBool_t operator*(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val * Other.Val);
-  }
-
-  HLSLBool_t operator+(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val + Other.Val);
-  }
-
-  HLSLBool_t operator-(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val - Other.Val);
-  }
-
-  HLSLBool_t operator/(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val / Other.Val);
-  }
-
-  HLSLBool_t operator%(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val % Other.Val);
-  }
-
-  HLSLBool_t operator&&(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val && Other.Val);
-  }
-
-  HLSLBool_t operator||(const HLSLBool_t &Other) const {
-    return HLSLBool_t(Val || Other.Val);
-  }
-
-  bool AsBool() const { return static_cast<bool>(Val); }
-
-  operator bool() const { return AsBool(); }
-  operator int16_t() const { return (int16_t)(AsBool()); }
-  operator int32_t() const { return (int32_t)(AsBool()); }
-  operator int64_t() const { return (int64_t)(AsBool()); }
-  operator uint16_t() const { return (uint16_t)(AsBool()); }
-  operator uint32_t() const { return (uint32_t)(AsBool()); }
-  operator uint64_t() const { return (uint64_t)(AsBool()); }
-  operator float() const { return (float)(AsBool()); }
-  operator double() const { return (double)(AsBool()); }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) {
-    Os << static_cast<bool>(Obj.Val);
-    return Os;
-  }
-
-  // So we can construct std::strings using std::ostream
-  friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) {
-    Os << static_cast<bool>(Obj.Val);
-    return Os;
-  }
-
-  int32_t Val = 0;
-};
-
-//  No native float16 type in C++ until C++23 . So we use uint16_t to represent
-//  it. Simple little wrapping struct to help handle the right behavior.
-struct HLSLHalf_t {
-  HLSLHalf_t() : Val(0) {}
-  HLSLHalf_t(const float F) {
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-  HLSLHalf_t(const double D) {
-    float F;
-    if (D >= std::numeric_limits<double>::max())
-      F = std::numeric_limits<float>::max();
-    else if (D <= std::numeric_limits<double>::lowest())
-      F = std::numeric_limits<float>::lowest();
-    else
-      F = static_cast<float>(D);
-
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-  HLSLHalf_t(const uint32_t U) {
-    float F = static_cast<float>(U);
-    Val = DirectX::PackedVector::XMConvertFloatToHalf(F);
-  }
-
-  // PackedVector::HALF is a uint16. Make sure we don't ever accidentally
-  // convert one of these to a HLSLHalf_t by arithmetically converting it to a
-  // float.
-  HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
-
-  static double GetULP(HLSLHalf_t A) {
-    DXASSERT(!std::isnan(A) && !std::isinf(A),
-             "ULP of NaN or infinity is undefined");
-
-    HLSLHalf_t Next = A;
-    ++Next.Val;
-
-    double NextD = Next;
-    double AD = A;
-    return NextD - AD;
-  }
-
-  static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) {
-    HLSLHalf_t H;
-    H.Val = Half;
-    return H;
-  }
-
-  // Implicit conversion to float for use with things like std::acos, std::tan,
-  // etc
-  operator float() const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val);
-  }
-
-  bool operator==(const HLSLHalf_t &Other) const {
-    // Convert to floats to properly handle the '0 == -0' case which must
-    // compare to true but have different uint16_t values.
-    // That is, 0 == -0 is true. We store Val as a uint16_t.
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return A == B;
-  }
-
-  bool operator<(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator>(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  // Used by tolerance checks in the tests.
-  bool operator>(float F) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    return A > F;
-  }
-
-  bool operator<(float F) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    return A < F;
-  }
-
-  bool operator<=(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) <=
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator>=(const HLSLHalf_t &Other) const {
-    return DirectX::PackedVector::XMConvertHalfToFloat(Val) >=
-           DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-  }
-
-  bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; }
-
-  HLSLHalf_t operator*(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B));
-  }
-
-  HLSLHalf_t operator+(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B)));
-  }
-
-  HLSLHalf_t operator-(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B));
-  }
-
-  HLSLHalf_t operator/(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B));
-  }
-
-  HLSLHalf_t operator%(const HLSLHalf_t &Other) const {
-    const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
-    const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
-    const float C = std::fmod(A, B);
-    return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C));
-  }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) {
-    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
-    return Os;
-  }
-
-  // So we can construct std::wstrings using std::wostream
-  friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) {
-    Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val);
-    return Os;
-  }
-
-  // HALF is an alias to uint16_t
-  DirectX::PackedVector::HALF Val = 0;
-};
+// Import shared HLSL type wrappers into LongVector namespace.
+using HLSLTestDataTypes::HLSLBool_t;
+using HLSLTestDataTypes::HLSLHalf_t;
 
 enum class InputSet {
 #define INPUT_SET(SYMBOL) SYMBOL,
@@ -247,7 +26,8 @@ template <typename T> const std::vector<T> &getInputSet(InputSet InputSet) {
 }
 
 #define BEGIN_INPUT_SETS(TYPE)                                                 \
-  template <> const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {  \
+  template <>                                                                  \
+  inline const std::vector<TYPE> &getInputSet<TYPE>(InputSet InputSet) {       \
     using T = TYPE;                                                            \
     switch (InputSet) {
 
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index dbb8a8d672..e3e5d8b0fc 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -64,10 +64,12 @@ DATA_TYPE(double, "double", 8)
 
 #undef DATA_TYPE
 
-template <typename T> constexpr bool isFloatingPointType() {
-  return std::is_same_v<T, float> || std::is_same_v<T, double> ||
-         std::is_same_v<T, HLSLHalf_t>;
-}
+using HLSLTestDataTypes::DefaultValidation;
+using HLSLTestDataTypes::doValuesMatch;
+using HLSLTestDataTypes::isFloatingPointType;
+using HLSLTestDataTypes::StrictValidation;
+using HLSLTestDataTypes::ValidationConfig;
+using HLSLTestDataTypes::ValidationType;
 
 //
 // Operation Types
@@ -186,72 +188,6 @@ void logLongVector(const std::vector<T> &Values, const std::wstring &Name) {
   hlsl_test::LogCommentFmt(Wss.str().c_str());
 }
 
-enum class ValidationType {
-  Epsilon,
-  Ulp,
-};
-
-template <typename T>
-bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
-  if (Tolerance == 0.0)
-    return A == B;
-
-  T Diff = A > B ? A - B : B - A;
-  return Diff <= Tolerance;
-}
-
-bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) {
-  return A == B;
-}
-
-bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
-  case ValidationType::Ulp:
-    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
-bool doValuesMatch(float A, float B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
-  case ValidationType::Ulp: {
-    // Tolerance is in ULPs. Convert to int for the comparison.
-    const int IntTolerance = static_cast<int>(Tolerance);
-    return CompareFloatULP(A, B, IntTolerance);
-  };
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
-bool doValuesMatch(double A, double B, double Tolerance,
-                   ValidationType ValidationType) {
-  switch (ValidationType) {
-  case ValidationType::Epsilon:
-    return CompareDoubleEpsilon(A, B, Tolerance);
-  case ValidationType::Ulp: {
-    // Tolerance is in ULPs. Convert to int64_t for the comparison.
-    const int64_t IntTolerance = static_cast<int64_t>(Tolerance);
-    return CompareDoubleULP(A, B, IntTolerance);
-  };
-  default:
-    hlsl_test::LogErrorFmt(
-        L"Invalid ValidationType. Expecting Epsilon or ULP.");
-    return false;
-  }
-}
-
 template <typename T>
 bool doVectorsMatch(const std::vector<T> &ActualValues,
                     const std::vector<T> &ExpectedValues, double Tolerance,
@@ -563,19 +499,6 @@ InputSets<T> buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3],
   return Inputs;
 }
 
-struct ValidationConfig {
-  double Tolerance = 0.0;
-  ValidationType Type = ValidationType::Epsilon;
-
-  static ValidationConfig Epsilon(double Tolerance) {
-    return ValidationConfig{Tolerance, ValidationType::Epsilon};
-  }
-
-  static ValidationConfig Ulp(double Tolerance) {
-    return ValidationConfig{Tolerance, ValidationType::Ulp};
-  }
-};
-
 template <typename T, typename OUT_TYPE>
 void runAndVerify(
     ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Operation,
@@ -614,23 +537,6 @@ template <OpType OP, typename T, size_t Arity> struct Op;
 // member functions.
 template <OpType OP, typename T> struct ExpectedBuilder;
 
-// Default Validation configuration - ULP for floating point types, exact
-// matches for everything else.
-template <typename T> struct DefaultValidation {
-  ValidationConfig ValidationConfig;
-
-  DefaultValidation() {
-    if constexpr (isFloatingPointType<T>())
-      ValidationConfig = ValidationConfig::Ulp(1.0f);
-  }
-};
-
-// Strict Validation - Defaults to exact matches.
-// Tolerance can be set to a non-zero value to allow for a wider range.
-struct StrictValidation {
-  ValidationConfig ValidationConfig;
-};
-
 // Macros to build up common patterns of Op definitions
 
 #define OP_1(OP, VALIDATION, IMPL)                                             \
@@ -1264,7 +1170,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
       AbsoluteEpsilon +=
           computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
 
-    Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon);
+    Op.Validation = ValidationConfig::Epsilon(AbsoluteEpsilon);
 
     std::vector<T> Expected;
     Expected.push_back(static_cast<T>(DotProduct));
@@ -1777,7 +1683,7 @@ void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging,
     auto Expected = ExpectedBuilder<OP, T>::buildExpected(Op, Inputs);
 
     runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
-                 Op.ValidationConfig);
+                 Op.Validation);
   }
 }
 
@@ -1802,7 +1708,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging,
     auto Expected = ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, WaveSize);
 
     runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected,
-                 Op.ValidationConfig, AdditionalCompilerOptions);
+                 Op.Validation, AdditionalCompilerOptions);
   }
 }
 
diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
index b7edba9561..ebad1bb790 100644
--- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
+++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
@@ -4615,4 +4615,149 @@ void MSMain(uint GID : SV_GroupIndex,
       ]]>
     </Shader>
   </ShaderOp>
+
+  <!-- ================================================================== -->
+  <!-- LinAlgOp: Generic ShaderOp for SM 6.10 linear algebra operations.  -->
+  <!-- Uses preprocessor defines to select the operation under test.      -->
+  <!--                                                                    -->
+  <!-- Defines passed as compiler arguments:                              -->
+  <!--   ELEM_TYPE  : Matrix element type (e.g. float, float16_t)         -->
+  <!--   OUT_TYPE   : Output element type                                 -->
+  <!--   ROWS       : Matrix row count (M dimension)                      -->
+  <!--   COLS       : Matrix column count (N dimension)                   -->
+  <!--   K_DIM      : Inner dimension for multiply (K dimension)          -->
+  <!--   MATRIX_LAYOUT : 0=RowMajor, 1=ColMajor                          -->
+  <!--   FUNC_*     : Operation selector define                           -->
+  <!-- ================================================================== -->
+  <ShaderOp Name="LinAlgOp" CS="CS">
+    <RootSignature>UAV(u0), UAV(u1), UAV(u2)</RootSignature>
+    <!-- Width="2048" bytes: enough for 16x16 matrix of 64-bit elements -->
+    <Resource Name="InputMatrix1" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <Resource Name="InputMatrix2" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <Resource Name="OutputMatrix" Dimension="BUFFER" Width="2048"
+      Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
+      TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <RootValues>
+      <RootValue Index="0" ResName="InputMatrix1" />
+      <RootValue Index="1" ResName="InputMatrix2" />
+      <RootValue Index="2" ResName="OutputMatrix" />
+    </RootValues>
+    <Shader Name="CS" Target="cs_6_10" EntryPoint="main">
+      <![CDATA[
+        RWByteAddressBuffer g_InputMatrix1 : register(u0);
+        RWByteAddressBuffer g_InputMatrix2 : register(u1);
+        RWByteAddressBuffer g_OutputMatrix : register(u2);
+
+        // Matrix stride in bytes for row-major layout.
+        static const uint ByteStride = COLS * sizeof(ELEM_TYPE);
+
+        #ifdef FUNC_FILL_MATRIX
+        // Test FillMatrix (Splat): Create a matrix filled with a scalar value
+        // read from InputMatrix1[0], then store the result to OutputMatrix.
+        void TestFillMatrix() {
+          ELEM_TYPE FillVal = g_InputMatrix1.Load<ELEM_TYPE>(0);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Splat(FillVal);
+
+          Mat.Store(g_OutputMatrix, 0, ByteStride,
+                    dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_STORE
+        // Test MatrixStore: Load a matrix from InputMatrix1 and store it
+        // to OutputMatrix. Verifies the load-store round trip.
+        void TestMatrixStore() {
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, ByteStride,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          Mat.Store(g_OutputMatrix, 0, ByteStride,
+                    dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_ACCUMULATE
+        // Test MatrixAccumulate: Load a matrix from InputMatrix1, then
+        // InterlockedAccumulate it to OutputMatrix (which is pre-initialized).
+        void TestMatrixAccumulate() {
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Mat =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                                 dx::linalg::MatrixUse::Accumulator,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, ByteStride,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          Mat.InterlockedAccumulate(g_OutputMatrix, 0, ByteStride,
+                                   dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        #ifdef FUNC_MATRIX_MUL
+        // Test MatrixMul: Load two matrices and multiply them.
+        // MatA is ROWS x K_DIM (Use::A), MatB is K_DIM x COLS (Use::B).
+        // Result accumulator is ROWS x COLS.
+        void TestMatrixMul() {
+          static const uint StrideA = K_DIM * sizeof(ELEM_TYPE);
+          static const uint StrideB = COLS * sizeof(ELEM_TYPE);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, K_DIM,
+                             dx::linalg::MatrixUse::A,
+                             dx::linalg::MatrixScope::Wave> MatA =
+              dx::linalg::Matrix<COMP_TYPE, ROWS, K_DIM,
+                                 dx::linalg::MatrixUse::A,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix1, 0, StrideA,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          dx::linalg::Matrix<COMP_TYPE, K_DIM, COLS,
+                             dx::linalg::MatrixUse::B,
+                             dx::linalg::MatrixScope::Wave> MatB =
+              dx::linalg::Matrix<COMP_TYPE, K_DIM, COLS,
+                                 dx::linalg::MatrixUse::B,
+                                 dx::linalg::MatrixScope::Wave>::Load(
+                  g_InputMatrix2, 0, StrideB,
+                  dx::linalg::MatrixLayout::RowMajor);
+
+          dx::linalg::Matrix<COMP_TYPE, ROWS, COLS,
+                             dx::linalg::MatrixUse::Accumulator,
+                             dx::linalg::MatrixScope::Wave> Result =
+              dx::linalg::Multiply(MatA, MatB);
+
+          Result.Store(g_OutputMatrix, 0, ByteStride,
+                       dx::linalg::MatrixLayout::RowMajor);
+        }
+        #endif
+
+        [numthreads(1, 1, 1)]
+        void main(uint GI : SV_GroupIndex) {
+          #ifdef FUNC_FILL_MATRIX
+            TestFillMatrix();
+          #elif defined(FUNC_MATRIX_STORE)
+            TestMatrixStore();
+          #elif defined(FUNC_MATRIX_ACCUMULATE)
+            TestMatrixAccumulate();
+          #elif defined(FUNC_MATRIX_MUL)
+            TestMatrixMul();
+          #endif
+        };
+      ]]>
+    </Shader>
+  </ShaderOp>
 </ShaderOpSet>