From dd42c46735348cf2816ff40987dec2cf1015ab02 Mon Sep 17 00:00:00 2001
From: Javier de Jesus <javier.dejesusj9@gmail.com>
Date: Sat, 13 Jun 2026 19:36:09 +0000
Subject: [PATCH 1/2] [Relax][ONNX] Fix LayerNormalization no-bias zero tensor
 shape and dtype

When the optional bias input of LayerNormalization is omitted, the zero
bias was built from data.struct_info.shape[1] and hardcoded to float32
instead of following the scale (gamma) tensor. For a non-square input
such as [2, 3, 4, 8] with scale [8], this produced a bias of shape (3,)
while gamma is (8,), so relax.op.nn.layer_norm raised an InternalError
on the size mismatch. For a half-precision model with no bias, the
float32 bias was rejected because gamma, beta, and data must share one
dtype.

Synthesize the zero bias from gamma_shape and the scale dtype, matching
ONNX semantics where an omitted B is treated as zeros shaped and typed
like the scale. Add non-square no-bias regression cases: an fp16 case
checked end to end and a bf16 case checked through the importer, since
ONNX Runtime's CPU provider has no bf16 LayerNormalization kernel.

Fixes #19691
---
 .../tvm/relax/frontend/onnx/onnx_frontend.py  |  3 +-
 tests/python/relax/test_frontend_onnx.py      | 66 +++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relax/frontend/onnx/onnx_frontend.py b/python/tvm/relax/frontend/onnx/onnx_frontend.py
index 3d9dfba9a16b..02a492188946 100644
--- a/python/tvm/relax/frontend/onnx/onnx_frontend.py
+++ b/python/tvm/relax/frontend/onnx/onnx_frontend.py
@@ -3834,8 +3834,7 @@ def _impl_v17(cls, bb, inputs, attr, params):
         gamma_shape = get_const_tuple(scale.struct_info.shape)
 
         if bias is None:
-            seq_len = data.struct_info.shape[1].value
-            bias = relax.const([0.0] * seq_len, dtype="float32")
+            bias = relax.const(_np.zeros(gamma_shape, dtype=scale.struct_info.dtype))
         else:
             beta_shape = get_const_tuple(bias.struct_info.shape)
             if gamma_shape != beta_shape:
diff --git a/tests/python/relax/test_frontend_onnx.py b/tests/python/relax/test_frontend_onnx.py
index 8d8c1bc54b9b..7f77dac0c876 100644
--- a/tests/python/relax/test_frontend_onnx.py
+++ b/tests/python/relax/test_frontend_onnx.py
@@ -2330,6 +2330,72 @@ def test_layer_norm():
     model = helper.make_model(graph, producer_name="layer_norm_test")
     check_correctness(model)
 
+    # No bias with a non-square input where data.shape[1] differs from the scale
+    # shape, see https://github.com/apache/tvm/issues/19691.
+    layer_norm_node = helper.make_node(
+        "LayerNormalization", ["input", "scale"], ["Y"], axis=-1, epsilon=1e-12
+    )
+
+    graph = helper.make_graph(
+        [layer_norm_node],
+        "layer_norm_test",
+        inputs=[
+            helper.make_tensor_value_info("input", TensorProto.FLOAT, [2, 3, 4, 8]),
+            helper.make_tensor_value_info("scale", TensorProto.FLOAT, [8]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 8]),
+        ],
+    )
+
+    model = helper.make_model(graph, producer_name="layer_norm_test")
+    check_correctness(model)
+
+    # No bias with a non-square fp16 input. The synthesized zero bias must match
+    # the scale dtype, otherwise layer_norm rejects the float32 bias, see
+    # https://github.com/apache/tvm/issues/19691.
+    layer_norm_node = helper.make_node(
+        "LayerNormalization", ["input", "scale"], ["Y"], axis=-1, epsilon=1e-12
+    )
+
+    graph = helper.make_graph(
+        [layer_norm_node],
+        "layer_norm_test",
+        inputs=[
+            helper.make_tensor_value_info("input", TensorProto.FLOAT16, [2, 3, 4, 8]),
+            helper.make_tensor_value_info("scale", TensorProto.FLOAT16, [8]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("Y", TensorProto.FLOAT16, [2, 3, 4, 8]),
+        ],
+    )
+
+    model = helper.make_model(graph, producer_name="layer_norm_test")
+    check_correctness(model, opset=17, atol=1e-2, rtol=1e-2)
+
+    # Same no-bias path for bf16. ONNX Runtime's CPU provider has no bf16
+    # LayerNormalization kernel, so this only checks the importer builds the
+    # graph with a bf16 zero bias (the dtype the fix derives from the scale).
+    layer_norm_node = helper.make_node(
+        "LayerNormalization", ["input", "scale"], ["Y"], axis=-1, epsilon=1e-12
+    )
+
+    graph = helper.make_graph(
+        [layer_norm_node],
+        "layer_norm_test",
+        inputs=[
+            helper.make_tensor_value_info("input", TensorProto.BFLOAT16, [2, 3, 4, 8]),
+            helper.make_tensor_value_info("scale", TensorProto.BFLOAT16, [8]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("Y", TensorProto.BFLOAT16, [2, 3, 4, 8]),
+        ],
+    )
+
+    model = helper.make_model(graph, producer_name="layer_norm_test")
+    model.opset_import[0].version = 17
+    from_onnx(model, opset=17, keep_params_in_input=True)
+
 
 def test_layer_norm_with_nd_gamma_beta():
     layer_norm_node = helper.make_node(

From d06b125a5cdc84769a16e3de3d08fc50d4dfcec3 Mon Sep 17 00:00:00 2001
From: Javier de Jesus <javier.dejesusj9@gmail.com>
Date: Sun, 14 Jun 2026 22:17:55 +0000
Subject: [PATCH 2/2] [Relax][ONNX] Build no-bias LayerNorm zero bias with
 native dtype

np.zeros rejects TVM dtype strings that NumPy lacks natively, so
np.zeros(gamma_shape, dtype="bfloat16") raises "data type 'bfloat16' not
understood". relax.const imports ml_dtypes and casts internally, but its
np.zeros argument is evaluated first, so that import is too late. Build
the zeros array with a native dtype and pass the target dtype to
relax.const, matching the existing torch frontend convention.
---
 python/tvm/relax/frontend/onnx/onnx_frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relax/frontend/onnx/onnx_frontend.py b/python/tvm/relax/frontend/onnx/onnx_frontend.py
index 02a492188946..113193263d86 100644
--- a/python/tvm/relax/frontend/onnx/onnx_frontend.py
+++ b/python/tvm/relax/frontend/onnx/onnx_frontend.py
@@ -3834,7 +3834,7 @@ def _impl_v17(cls, bb, inputs, attr, params):
         gamma_shape = get_const_tuple(scale.struct_info.shape)
 
         if bias is None:
-            bias = relax.const(_np.zeros(gamma_shape, dtype=scale.struct_info.dtype))
+            bias = relax.const(_np.zeros(gamma_shape), dtype=scale.struct_info.dtype)
         else:
             beta_shape = get_const_tuple(bias.struct_info.shape)
             if gamma_shape != beta_shape: