pulp-platform
diff --git a/‎Deeploy/Targets/Generic/Layers.py‎
Lines changed: 1 addition & 1 deletion b/‎Deeploy/Targets/Generic/Layers.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py‎
Lines changed: 3 additions & 2 deletions b/‎Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎Deeploy/Targets/Redmule/Bindings.py‎
Lines changed: 9 additions & 2 deletions b/‎Deeploy/Targets/Redmule/Bindings.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎Deeploy/Targets/Redmule/Deployer.py‎
Lines changed: 3 additions & 4 deletions b/‎Deeploy/Targets/Redmule/Deployer.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎Deeploy/Targets/Redmule/Engine.py‎
Lines changed: 7 additions & 4 deletions b/‎Deeploy/Targets/Redmule/Engine.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎Deeploy/Targets/Redmule/Parsers.py‎
Lines changed: 99 additions & 0 deletions b/‎Deeploy/Targets/Redmule/Parsers.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎Deeploy/Targets/Redmule/Templates/GEMMTemplate.py‎
Lines changed: 62 additions & 0 deletions b/‎Deeploy/Targets/Redmule/Templates/GEMMTemplate.py‎
Lines changed: 62 additions & 0 deletions
@@ -280,7 +280,7 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
             N = inputShapes[1][-1]
 
         if len(inputShapes) == 3:
-            inputShapes[2] = [M, N]
+            inputShapes[2] = outputShapes[0]
 
         return (inputShapes, outputShapes)
 
 
@@ -235,6 +235,7 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
 
         dimOffsetA = len(bufferA.shape) - 2
         dimOffsetB = len(bufferB.shape) - 2
+        dimOffsetC = len(bufferC.shape) - 2
         dimOffsetOut = len(outputBuffer.shape) - 2
 
         AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA'])
@@ -253,8 +254,8 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
         # Add GEMM Geometrical constraints
         tilerModel.addConstraint(ASecondDimVar == BFirstDimVar)
 
-        addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 0)
-        addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 1)
+        addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = dimOffsetC)
+        addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = dimOffsetC + 1)
         tilerModel.addConstraint(outputFirstDimVar == addDimVar_1)
         tilerModel.addConstraint(outputSecondDimVar == addDimVar_2)
 
 
@@ -28,8 +28,8 @@
 from Deeploy.AbstractDataTypes import PointerClass
 from Deeploy.CommonExtensions.DataTypes import  float32_t
 from Deeploy.DeeployTypes import NodeBinding
-from Deeploy.Targets.Generic.TypeCheckers import MatMulChecker, ConvChecker
-from Deeploy.Targets.Redmule.Templates import MatmulTemplate, ConvTemplate  
+from Deeploy.Targets.Generic.TypeCheckers import MatMulChecker, ConvChecker, GEMMChecker
+from Deeploy.Targets.Redmule.Templates import MatmulTemplate, ConvTemplate, GEMMTemplate  
 from Deeploy.Targets.PULPOpen.Bindings import ForkTransformer
 
 RedmuleMatmulBindings =  [
@@ -42,4 +42,11 @@
         ConvChecker([PointerClass(float32_t), PointerClass(float32_t),
                      PointerClass(float32_t)], [PointerClass(float32_t)]), ConvTemplate.reference2DIm2ColTemplate,
         ForkTransformer)
+]
+
+RedmuleGEMMBindings = [
+    NodeBinding(
+        GEMMChecker([PointerClass(float32_t), PointerClass(float32_t),
+                     PointerClass(float32_t)], [PointerClass(float32_t)]), GEMMTemplate.referenceTemplate,
+        ForkTransformer)
 ]
@@ -30,9 +30,7 @@
 from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
 from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer
-from Deeploy.Targets.Redmule.TopologyOptimizationPasses.Passes import RedMuleAdjustWeightMemoryLayoutPass
-
-
+from Deeploy.Targets.Redmule.TopologyOptimizationPasses.Passes import RedMuleAdjustWeightMemoryLayoutPass, RedMuleGEMMTransposePass
 class RedmuleDeployer(PULPDeployer):
 
     def __init__(self,
@@ -49,5 +47,6 @@ def __init__(self,
                          default_channels_first, deeployStateDir, inputOffsets)
 
         self.loweringOptimizer.passes += [
-            RedMuleAdjustWeightMemoryLayoutPass("Redmule")
+            RedMuleAdjustWeightMemoryLayoutPass("Redmule"),
+            RedMuleGEMMTransposePass("Redmule")
         ]
@@ -26,21 +26,24 @@
 from typing import List
 
 import onnx_graphsurgeon as gs
-
+from Deeploy.Targets.Generic.Layers import GEMMLayer
 from Deeploy.DeeployTypes import DeploymentEngine, NodeMapper
 from Deeploy.Targets.Generic.Layers import MatMulLayer, ConvLayer
-from Deeploy.Targets.Generic.Parsers import MatMulParser, ConvParser
-from Deeploy.Targets.Redmule.Tiler import RedmuleMatMulTilingReadyBindings, RedmuleConvTilingReadyBindings
+from Deeploy.Targets.Generic.Parsers import MatMulParser
+from Deeploy.Targets.Redmule.Tiler import RedmuleMatMulTilingReadyBindings, RedmuleConvTilingReadyBindings, RedmuleGEMMTilingReadyBindings
 from Deeploy.Targets.PULPOpen.Parsers import PULPFPConv2DParser
+from Deeploy.Targets.Redmule.Parsers import GEMMRedmuleParser
 
 MatMulRedmuleMapper = NodeMapper(
     MatMulParser(), RedmuleMatMulTilingReadyBindings)
 Conv2DRedmuleMapper = NodeMapper(
     PULPFPConv2DParser(), RedmuleConvTilingReadyBindings)
+GEMMMRedmuleMapper = NodeMapper(GEMMRedmuleParser(), RedmuleGEMMTilingReadyBindings)
 
 RedmuleMapping = {
     'MatMul': MatMulLayer([MatMulRedmuleMapper]),
-    'Conv': ConvLayer([Conv2DRedmuleMapper])
+    'Conv': ConvLayer([Conv2DRedmuleMapper]),
+    'Gemm': GEMMLayer([GEMMMRedmuleMapper]),
 }
 
 _includeList = []
 
@@ -0,0 +1,99 @@
+# ----------------------------------------------------------------------
+#
+# File: BasicParsers.py
+#
+# Last edited: 15.12.2021
+#
+# Copyright (C) 2021, ETH Zurich and University of Bologna.
+#
+# Authors:
+# - Moritz Scherer, ETH Zurich
+# - Victor Jung, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from typing import Tuple
+
+import numpy as np
+import onnx_graphsurgeon as gs
+
+from Deeploy.DeeployTypes import NetworkContext, NodeParser
+from Deeploy.Targets.Generic.Parsers import MatMulParser
+
+class GEMMRedmuleParser(MatMulParser):
+
+    def __init__(self, noBiasHoisting = True):
+        self.noBiasHoisting = noBiasHoisting
+        super().__init__()
+
+    def parseNode(self, node: gs.Node) -> (bool):
+
+        ret = all([
+            len(node.inputs) >= 2,
+            len(node.outputs) == 1,
+            node.attrs['alpha'] == 1
+        ])
+
+        if ret:
+            if 'transA' in node.attrs:
+                self.operatorRepresentation['transA'] = node.attrs['transA']
+            else:
+                self.operatorRepresentation['transA'] = 0
+
+            if 'transB' in node.attrs:
+                self.operatorRepresentation['transB'] = node.attrs['transB']
+            else:
+                self.operatorRepresentation['transB'] = 0
+            if 'alpha' in node.attrs:
+                self.operatorRepresentation['alpha'] = node.attrs['alpha']
+            else:
+                self.operatorRepresentation['alpha'] = 1
+            if 'beta' in node.attrs:
+                self.operatorRepresentation['beta'] = node.attrs['beta']
+            else:
+                self.operatorRepresentation['beta'] = 1
+        
+        return ret
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
+
+        if ret:
+            inputs = ['A', 'B']
+            outputs = ['data_out']
+
+            for idx, inputNode in enumerate(node.inputs):
+                if idx < len(inputs):
+                    self.operatorRepresentation[inputs[idx]] = newCtxt.lookup(inputNode.name).name
+            for idx, outputNode in enumerate(node.outputs):
+                self.operatorRepresentation[outputs[idx]] = newCtxt.lookup(outputNode.name).name
+
+            if len(node.inputs) == 3:
+                self.operatorRepresentation['C'] = newCtxt.lookup(node.inputs[2].name).name
+            elif not self.noBiasHoisting:
+                values = np.zeros((1))
+                zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
+                newCtxt.hoistConstant(zeroTensor)
+                self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
+
+            self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape)
+
+        return newCtxt, ret
@@ -0,0 +1,62 @@
+# ----------------------------------------------------------------------
+#
+# File: MatMul.py.py
+#
+# Last edited: 27.01.2025
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the Licens
+from Deeploy.DeeployTypes import NodeTemplate
+
+referenceTemplate = NodeTemplate("""
+// GEMM using RedMule hardware accelerator (Name: ${nodeName}, Op: ${nodeOp})
+
+int8_t ${nodeName}_core_id = pi_core_id();
+
+if (${nodeName}_core_id == 0) {
+    for(uint32_t b=0; b<${batch}; b++) {
+        ${A_type.typeName} batch_A = ${A} + b * ${M} * ${N};
+        ${B_type.typeName} batch_B = ${B} + b * ${N} * ${O};
+        ${C_type.typeName} batch_C = ${C} + b * ${M} * ${O};
+        ${data_out_type.typeName} batch_out = ${data_out} + b * ${M} * ${O};
+        
+        % if beta == 0:
+        MatMul_fp${A_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_Redmule(
+            (const float32_t *) batch_A,
+            (const float32_t *) batch_B,
+            (float32_t *) batch_out,
+            ${M},
+            ${N},
+            ${O}
+        );
+        % else:
+        Gemm_fp${A_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_Redmule(
+            (const float32_t *) batch_A,
+            (const float32_t *) batch_B,
+            (const float32_t *) batch_C,
+            (float32_t *) batch_out,
+            ${M},
+            ${N},
+            ${O}
+        );
+        % endif
+    }
+}
+"""
+)