Skip to content

Commit d998fc3

Browse files
committed
GEMM with Redmule
1 parent c6e4890 commit d998fc3

13 files changed

Lines changed: 524 additions & 27 deletions

File tree

Deeploy/Targets/Generic/Layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
280280
N = inputShapes[1][-1]
281281

282282
if len(inputShapes) == 3:
283-
inputShapes[2] = [M, N]
283+
inputShapes[2] = outputShapes[0]
284284

285285
return (inputShapes, outputShapes)
286286

Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
235235

236236
dimOffsetA = len(bufferA.shape) - 2
237237
dimOffsetB = len(bufferB.shape) - 2
238+
dimOffsetC = len(bufferC.shape) - 2
238239
dimOffsetOut = len(outputBuffer.shape) - 2
239240

240241
AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA'])
@@ -253,8 +254,8 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
253254
# Add GEMM Geometrical constraints
254255
tilerModel.addConstraint(ASecondDimVar == BFirstDimVar)
255256

256-
addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 0)
257-
addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 1)
257+
addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = dimOffsetC)
258+
addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = dimOffsetC + 1)
258259
tilerModel.addConstraint(outputFirstDimVar == addDimVar_1)
259260
tilerModel.addConstraint(outputSecondDimVar == addDimVar_2)
260261

Deeploy/Targets/Redmule/Bindings.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
from Deeploy.AbstractDataTypes import PointerClass
2929
from Deeploy.CommonExtensions.DataTypes import float32_t
3030
from Deeploy.DeeployTypes import NodeBinding
31-
from Deeploy.Targets.Generic.TypeCheckers import MatMulChecker, ConvChecker
32-
from Deeploy.Targets.Redmule.Templates import MatmulTemplate, ConvTemplate
31+
from Deeploy.Targets.Generic.TypeCheckers import MatMulChecker, ConvChecker, GEMMChecker
32+
from Deeploy.Targets.Redmule.Templates import MatmulTemplate, ConvTemplate, GEMMTemplate
3333
from Deeploy.Targets.PULPOpen.Bindings import ForkTransformer
3434

3535
RedmuleMatmulBindings = [
@@ -42,4 +42,11 @@
4242
ConvChecker([PointerClass(float32_t), PointerClass(float32_t),
4343
PointerClass(float32_t)], [PointerClass(float32_t)]), ConvTemplate.reference2DIm2ColTemplate,
4444
ForkTransformer)
45+
]
46+
47+
RedmuleGEMMBindings = [
48+
NodeBinding(
49+
GEMMChecker([PointerClass(float32_t), PointerClass(float32_t),
50+
PointerClass(float32_t)], [PointerClass(float32_t)]), GEMMTemplate.referenceTemplate,
51+
ForkTransformer)
4552
]

Deeploy/Targets/Redmule/Deployer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
from Deeploy.AbstractDataTypes import Pointer
3131
from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
3232
from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer
33-
from Deeploy.Targets.Redmule.TopologyOptimizationPasses.Passes import RedMuleAdjustWeightMemoryLayoutPass
34-
35-
33+
from Deeploy.Targets.Redmule.TopologyOptimizationPasses.Passes import RedMuleAdjustWeightMemoryLayoutPass, RedMuleGEMMTransposePass
3634
class RedmuleDeployer(PULPDeployer):
3735

3836
def __init__(self,
@@ -49,5 +47,6 @@ def __init__(self,
4947
default_channels_first, deeployStateDir, inputOffsets)
5048

5149
self.loweringOptimizer.passes += [
52-
RedMuleAdjustWeightMemoryLayoutPass("Redmule")
50+
RedMuleAdjustWeightMemoryLayoutPass("Redmule"),
51+
RedMuleGEMMTransposePass("Redmule")
5352
]

Deeploy/Targets/Redmule/Engine.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,24 @@
2626
from typing import List
2727

2828
import onnx_graphsurgeon as gs
29-
29+
from Deeploy.Targets.Generic.Layers import GEMMLayer
3030
from Deeploy.DeeployTypes import DeploymentEngine, NodeMapper
3131
from Deeploy.Targets.Generic.Layers import MatMulLayer, ConvLayer
32-
from Deeploy.Targets.Generic.Parsers import MatMulParser, ConvParser
33-
from Deeploy.Targets.Redmule.Tiler import RedmuleMatMulTilingReadyBindings, RedmuleConvTilingReadyBindings
32+
from Deeploy.Targets.Generic.Parsers import MatMulParser
33+
from Deeploy.Targets.Redmule.Tiler import RedmuleMatMulTilingReadyBindings, RedmuleConvTilingReadyBindings, RedmuleGEMMTilingReadyBindings
3434
from Deeploy.Targets.PULPOpen.Parsers import PULPFPConv2DParser
35+
from Deeploy.Targets.Redmule.Parsers import GEMMRedmuleParser
3536

3637
MatMulRedmuleMapper = NodeMapper(
3738
MatMulParser(), RedmuleMatMulTilingReadyBindings)
3839
Conv2DRedmuleMapper = NodeMapper(
3940
PULPFPConv2DParser(), RedmuleConvTilingReadyBindings)
41+
GEMMMRedmuleMapper = NodeMapper(GEMMRedmuleParser(), RedmuleGEMMTilingReadyBindings)
4042

4143
RedmuleMapping = {
4244
'MatMul': MatMulLayer([MatMulRedmuleMapper]),
43-
'Conv': ConvLayer([Conv2DRedmuleMapper])
45+
'Conv': ConvLayer([Conv2DRedmuleMapper]),
46+
'Gemm': GEMMLayer([GEMMMRedmuleMapper]),
4447
}
4548

4649
_includeList = []

Deeploy/Targets/Redmule/Parsers.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# ----------------------------------------------------------------------
2+
#
3+
# File: BasicParsers.py
4+
#
5+
# Last edited: 15.12.2021
6+
#
7+
# Copyright (C) 2021, ETH Zurich and University of Bologna.
8+
#
9+
# Authors:
10+
# - Moritz Scherer, ETH Zurich
11+
# - Victor Jung, ETH Zurich
12+
#
13+
# ----------------------------------------------------------------------
14+
# SPDX-License-Identifier: Apache-2.0
15+
#
16+
# Licensed under the Apache License, Version 2.0 (the License); you may
17+
# not use this file except in compliance with the License.
18+
# You may obtain a copy of the License at
19+
#
20+
# www.apache.org/licenses/LICENSE-2.0
21+
#
22+
# Unless required by applicable law or agreed to in writing, software
23+
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
24+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25+
# See the License for the specific language governing permissions and
26+
# limitations under the License.
27+
28+
import math
29+
from typing import Tuple
30+
31+
import numpy as np
32+
import onnx_graphsurgeon as gs
33+
34+
from Deeploy.DeeployTypes import NetworkContext, NodeParser
35+
from Deeploy.Targets.Generic.Parsers import MatMulParser
36+
37+
class GEMMRedmuleParser(MatMulParser):
38+
39+
def __init__(self, noBiasHoisting = True):
40+
self.noBiasHoisting = noBiasHoisting
41+
super().__init__()
42+
43+
def parseNode(self, node: gs.Node) -> (bool):
44+
45+
ret = all([
46+
len(node.inputs) >= 2,
47+
len(node.outputs) == 1,
48+
node.attrs['alpha'] == 1
49+
])
50+
51+
if ret:
52+
if 'transA' in node.attrs:
53+
self.operatorRepresentation['transA'] = node.attrs['transA']
54+
else:
55+
self.operatorRepresentation['transA'] = 0
56+
57+
if 'transB' in node.attrs:
58+
self.operatorRepresentation['transB'] = node.attrs['transB']
59+
else:
60+
self.operatorRepresentation['transB'] = 0
61+
if 'alpha' in node.attrs:
62+
self.operatorRepresentation['alpha'] = node.attrs['alpha']
63+
else:
64+
self.operatorRepresentation['alpha'] = 1
65+
if 'beta' in node.attrs:
66+
self.operatorRepresentation['beta'] = node.attrs['beta']
67+
else:
68+
self.operatorRepresentation['beta'] = 1
69+
70+
return ret
71+
72+
def parseNodeCtxt(self,
73+
ctxt: NetworkContext,
74+
node: gs.Node,
75+
channels_first: bool = True) -> Tuple[NetworkContext, bool]:
76+
77+
newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
78+
79+
if ret:
80+
inputs = ['A', 'B']
81+
outputs = ['data_out']
82+
83+
for idx, inputNode in enumerate(node.inputs):
84+
if idx < len(inputs):
85+
self.operatorRepresentation[inputs[idx]] = newCtxt.lookup(inputNode.name).name
86+
for idx, outputNode in enumerate(node.outputs):
87+
self.operatorRepresentation[outputs[idx]] = newCtxt.lookup(outputNode.name).name
88+
89+
if len(node.inputs) == 3:
90+
self.operatorRepresentation['C'] = newCtxt.lookup(node.inputs[2].name).name
91+
elif not self.noBiasHoisting:
92+
values = np.zeros((1))
93+
zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
94+
newCtxt.hoistConstant(zeroTensor)
95+
self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
96+
97+
self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape)
98+
99+
return newCtxt, ret
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# ----------------------------------------------------------------------
2+
#
3+
# File: MatMul.py.py
4+
#
5+
# Last edited: 27.01.2025
6+
#
7+
# Copyright (C) 2023, ETH Zurich and University of Bologna.
8+
#
9+
# Author: Run Wang, ETH Zurich
10+
#
11+
# ----------------------------------------------------------------------
12+
# SPDX-License-Identifier: Apache-2.0
13+
#
14+
# Licensed under the Apache License, Version 2.0 (the License); you may
15+
# not use this file except in compliance with the License.
16+
# You may obtain a copy of the License at
17+
#
18+
# www.apache.org/licenses/LICENSE-2.0
19+
#
20+
# Unless required by applicable law or agreed to in writing, software
21+
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
22+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23+
# See the License for the specific language governing permissions and
24+
# limitations under the Licens
25+
from Deeploy.DeeployTypes import NodeTemplate
26+
27+
referenceTemplate = NodeTemplate("""
28+
// GEMM using RedMule hardware accelerator (Name: ${nodeName}, Op: ${nodeOp})
29+
30+
int8_t ${nodeName}_core_id = pi_core_id();
31+
32+
if (${nodeName}_core_id == 0) {
33+
for(uint32_t b=0; b<${batch}; b++) {
34+
${A_type.typeName} batch_A = ${A} + b * ${M} * ${N};
35+
${B_type.typeName} batch_B = ${B} + b * ${N} * ${O};
36+
${C_type.typeName} batch_C = ${C} + b * ${M} * ${O};
37+
${data_out_type.typeName} batch_out = ${data_out} + b * ${M} * ${O};
38+
39+
% if beta == 0:
40+
MatMul_fp${A_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_Redmule(
41+
(const float32_t *) batch_A,
42+
(const float32_t *) batch_B,
43+
(float32_t *) batch_out,
44+
${M},
45+
${N},
46+
${O}
47+
);
48+
% else:
49+
Gemm_fp${A_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_Redmule(
50+
(const float32_t *) batch_A,
51+
(const float32_t *) batch_B,
52+
(const float32_t *) batch_C,
53+
(float32_t *) batch_out,
54+
${M},
55+
${N},
56+
${O}
57+
);
58+
% endif
59+
}
60+
}
61+
"""
62+
)

0 commit comments

Comments
 (0)