pytorch · roman-janik-nxp · Jun 18, 2026
@@ -5,7 +5,6 @@
 
 import torch
 
-from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -29,14 +28,6 @@ def _is_supported_on_target(
         if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(node):
             return False
 
-        # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
-        # Transpose is currently not supported for new flow
-        if any(
-            input_node.meta[NXP_NODE_FORMAT].is_channels_first()
-            for input_node in node.all_input_nodes
-        ) and NodeConverter._node_inputs_ranks_not_equal(node):
-            return False
-
         supported_types = [torch.int8, torch.uint8]
         if not NodeConverter.uses_quantization_type_for_io(
             node, supported_types, [0, 1], [0]
@@ -68,4 +59,8 @@ def convert(self, node: Node):
         t_op = self._create_tflite_op_with_io_tensors(node)
         t_op.builtin_options = add_options.Add()
 
-        self.builder.append_operators([t_op])
+        # Create additional ops in case of shape broadcasting
+        additional_ops = self.builder.ensure_correct_broadcasting(
+            t_op, t_op.tmp_outputs[0]
+        )
+        self.builder.append_operators(additional_ops + [t_op])
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -28,14 +27,6 @@ def _is_supported_on_target(
         if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(node):
             return False
 
-        # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
-        # Transpose is currently not supported for new flow
-        if any(
-            input_node.meta[NXP_NODE_FORMAT].is_channels_first()
-            for input_node in node.all_input_nodes
-        ) and NodeConverter._node_inputs_ranks_not_equal(node):
-            return False
-
         supported_types = [torch.int8, torch.uint8]
         if not NodeConverter.uses_quantization_type_for_io(
             node, supported_types, [0, 1], [0]
@@ -64,4 +55,8 @@ def convert(self, node: Node):
         t_op = self._create_tflite_op_with_io_tensors(node)
         t_op.builtin_options = mul_options.Mul()
 
-        self.builder.append_operators([t_op])
+        # Create additional ops in case of shape broadcasting
+        additional_ops = self.builder.ensure_correct_broadcasting(
+            t_op, t_op.tmp_outputs[0]
+        )
+        self.builder.append_operators(additional_ops + [t_op])
@@ -5,7 +5,6 @@
 
 import torch
 
-from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -29,14 +28,6 @@ def _is_supported_on_target(
         if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(node):
             return False
 
-        # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
-        # Transpose is currently not supported for new flow
-        if any(
-            input_node.meta[NXP_NODE_FORMAT].is_channels_first()
-            for input_node in node.all_input_nodes
-        ) and NodeConverter._node_inputs_ranks_not_equal(node):
-            return False
-
         supported_types = [torch.int8, torch.uint8]
         if not NodeConverter.uses_quantization_type_for_io(
             node, supported_types, [0, 1], [0]
@@ -72,4 +63,9 @@ def convert(self, node: Node):
         t_op = self._create_tflite_op_with_io_tensors(node)
 
         t_op.builtin_options = sub_options.Sub()
-        self.builder.append_operators([t_op])
+
+        # Create additional ops in case of shape broadcasting
+        additional_ops = self.builder.ensure_correct_broadcasting(
+            t_op, t_op.tmp_outputs[0]
+        )
+        self.builder.append_operators(additional_ops + [t_op])
@@ -104,6 +104,10 @@ def test__basic_nsys_inference_qat(self, mocker, request):
             pytest.param(
                 [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D."
             ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 8, 8)), ModelInputSpec((8, 8))],
+                id="2 inputs 4D + 2D.",
+            ),
         ],
     )
     def test__broadcast(self, mocker, request, input_spec):
@@ -193,9 +197,33 @@ def test__w_conv(self, mocker, request, x_input_shape):
                 [ModelInputSpec((1, 4, 1, 67)), ModelInputSpec((1, 8, 5, 67))],
                 id="2 inputs 4D + 4D same width.",
             ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 1))],
+                id="2 inputs 4D + 2D ones tensor.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 8, 8)), ModelInputSpec((8, 8))],
+                id="2 inputs 4D + 2D both dims 8.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D + 2D one dim 5.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 12, 10)), ModelInputSpec((8, 1, 10))],
+                id="2 inputs 4D + 3D channels dim 1.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs 4D + 3D channels dim 4.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 25, 18)), ModelInputSpec((4, 1, 8, 25, 18))],
+                id="2 inputs 4D + 5D.",
+            ),
         ],
     )
-    def test__w_conv_broadcast(self, mocker, request, input_spec):
+    def test__broadcast_w_conv(self, mocker, request, input_spec):
         model = AddTensorConvModule()
 
         graph_verifier = DetailedGraphVerifier(
@@ -215,25 +243,3 @@ def test__w_conv_broadcast(self, mocker, request, input_spec):
             comparator,
             remove_quant_io_ops=True,
         )
-
-    @pytest.mark.parametrize(
-        "input_spec",
-        [
-            pytest.param(
-                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
-                id="2 inputs 4D + 2D.",
-            ),
-            pytest.param(
-                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
-                id="2 inputs 4D + 3D.",
-            ),
-        ],
-    )
-    def test__w_conv_unsupported(self, input_spec):
-        model = AddTensorConvModule()
-
-        delegated_ep = to_quantized_edge_program(model, input_spec).exported_program()
-
-        # Make sure the `add.Tensor` was NOT delegated.
-        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-        assert graph_contains_any_of_ops(delegated_ep.graph, [AddTensor])
@@ -9,12 +9,16 @@
 import pytest
 import torch
 
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import (
     ModelInputSpec,
     to_quantized_edge_program,
 )
 from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    AllCloseOutputComparator,
+)
 from executorch.backends.nxp.tests.models import MulTensorConvModule, MulTensorModule
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.ops_aliases import (
@@ -39,6 +43,7 @@ class TestMulTensor:
             pytest.param((6, 8), id="2D."),
             pytest.param((1, 4, 8), id="3D."),
             pytest.param((1, 4, 8, 8), id="4D."),
+            pytest.param((1, 4, 9, 11, 4), id="5D."),
         ],
     )
     def test__basic_nsys_inference(self, mocker, request, x_input_shape):
@@ -92,7 +97,7 @@ def test__basic_nsys_inference_qat(self, mocker, request, x_input_shape):
             ),
         ],
     )
-    def test__correct_broadcast(self, input_spec, mocker, request):
+    def test__broadcast(self, input_spec, mocker, request):
         model = MulTensorModule()
         graph_verifier = DetailedGraphVerifier(
             mocker, expected_delegated_ops={MulTensor: 1}, expected_non_delegated_ops={}
@@ -116,7 +121,7 @@ def test__correct_broadcast(self, input_spec, mocker, request):
             ),
         ],
     )
-    def test__incorrect_broadcast(self, input_spec):
+    def test__broadcast_unsupported(self, input_spec):
         # Broadcast where at least one of the inputs is not equal to output is not supported
         model = MulTensorModule()
 
@@ -159,21 +164,56 @@ def test__w_conv(self, mocker, request, x_input_shape):
     @pytest.mark.parametrize(
         "input_spec",
         [
+            pytest.param(
+                [ModelInputSpec((1, 4, 7, 1)), ModelInputSpec((1, 8, 1, 1))],
+                id="2 inputs 4D + 4D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 8, 5, 1))],
+                id="2 inputs 4D + 4D same height.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 1))],
+                id="2 inputs 4D + 2D ones tensor.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 8, 8)), ModelInputSpec((8, 8))],
+                id="2 inputs 4D + 2D both dims 8.",
+            ),
             pytest.param(
                 [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
-                id="2 inputs 4D + 2D.",
+                id="2 inputs 4D + 2D one dim 5.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 12, 10)), ModelInputSpec((8, 1, 10))],
+                id="2 inputs 4D + 3D channels dim 1.",
             ),
             pytest.param(
                 [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
-                id="2 inputs 4D + 3D.",
+                id="2 inputs 4D + 3D channels dim 4.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 25, 18)), ModelInputSpec((4, 1, 8, 25, 18))],
+                id="2 inputs 4D + 5D.",
             ),
         ],
     )
-    def test__w_conv_unsupported(self, input_spec):
+    def test__broadcast_w_conv(self, mocker, request, input_spec):
         model = MulTensorConvModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={MulTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+        comparator = AllCloseOutputComparator(atol=1)
 
-        delegated_ep = to_quantized_edge_program(model, input_spec).exported_program()
-
-        # Make sure the `mul.Tensor` was NOT delegated.
-        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-        assert graph_contains_any_of_ops(delegated_ep.graph, [MulTensor])
+        lower_run_compare(
+            model,
+            input_spec,
+            graph_verifier,
+            request,
+            dataset_creator,
+            comparator,
+            remove_quant_io_ops=True,
+        )
@@ -193,9 +193,33 @@ def test__w_conv(self, mocker, request, x_input_shape):
                 [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 8, 5, 1))],
                 id="2 inputs 4D + 4D same height.",
             ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 1))],
+                id="2 inputs 4D + 2D ones tensor.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 8, 8)), ModelInputSpec((8, 8))],
+                id="2 inputs 4D + 2D both dims 8.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D + 2D one dim 5.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 12, 10)), ModelInputSpec((8, 1, 10))],
+                id="2 inputs 4D + 3D channels dim 1.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs 4D + 3D channels dim 4.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 25, 18)), ModelInputSpec((4, 1, 8, 25, 18))],
+                id="2 inputs 4D + 5D.",
+            ),
         ],
     )
-    def test__w_conv_broadcast(self, mocker, request, input_spec):
+    def test__broadcast_w_conv(self, mocker, request, input_spec):
         model = SubTensorConvModule()
         graph_verifier = DetailedGraphVerifier(
             mocker,
@@ -214,25 +238,3 @@ def test__w_conv_broadcast(self, mocker, request, input_spec):
             comparator,
             remove_quant_io_ops=True,
         )
-
-    @pytest.mark.parametrize(
-        "input_spec",
-        [
-            pytest.param(
-                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
-                id="2 inputs 4D + 2D.",
-            ),
-            pytest.param(
-                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
-                id="2 inputs 4D + 3D.",
-            ),
-        ],
-    )
-    def test__w_conv_unsupported(self, input_spec):
-        model = SubTensorConvModule()
-
-        delegated_ep = to_quantized_edge_program(model, input_spec).exported_program()
-
-        # Make sure the `sub.Tensor` was NOT delegated.
-        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-        assert graph_contains_any_of_ops(delegated_ep.graph, [SubTensor])