Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cc1f68b
training-platform core: minimal MLP end-to-end training on Siracusa
runwangdl Apr 10, 2026
284f145
training-platform core: apply pre-commit formatting (yapf/isort/clang…
runwangdl Apr 10, 2026
763b464
training-platform core: canonicalise SoftmaxCrossEntropyLoss to 2 out…
runwangdl Apr 10, 2026
e348863
training-platform core: collapse InPlaceAccumulatorV2 template to sin…
runwangdl Apr 10, 2026
b844fe1
training-platform core: generalise SGD alias comment to L2 or L3
runwangdl Apr 10, 2026
ceeb951
training-platform core: trim InPlaceAccumulatorV2 tile constraint com…
runwangdl Apr 10, 2026
ecbffa0
training-platform core: drop leftover egress-target comment
runwangdl Apr 10, 2026
728c68f
training-platform core: drop stray ReluGradTileConstraint
runwangdl Apr 10, 2026
fc24a84
training-platform core: delete stray SoftmaxCrossEntropyLossDualOutpu…
runwangdl Apr 10, 2026
12597be
training-platform core: simplify MiniMalloc alias-skip block
runwangdl Apr 10, 2026
b42ea1d
training-platform core: restore per-layer { } block in generateInfere…
runwangdl Apr 10, 2026
5285021
training-platform core: restore upstream SoftmaxCrossEntropy kernel t…
runwangdl Apr 10, 2026
40e8339
training-platform core: drop legacy 1-output Softmax/CrossEntropy ker…
runwangdl Apr 10, 2026
91931cf
training-platform core: propagate loss verification result + drop dea…
runwangdl Apr 10, 2026
f177a5b
training-platform core: label Step B in run_optimizer_step
runwangdl Apr 10, 2026
0f853fc
training-platform core: drop _augment_path PATH manipulation
runwangdl Apr 10, 2026
55c91d0
training-platform core: extract training codegen helpers to trainingU…
runwangdl Apr 10, 2026
2d53fe2
training-platform core: collapse duplicate tiled/non-tiled training b…
runwangdl Apr 10, 2026
b689d3f
training-platform core: extract training codegen argparse builders to…
runwangdl Apr 10, 2026
4218ba1
training-platform core: lift execution.py training subprocess helpers…
runwangdl Apr 10, 2026
f5255d3
training-platform core: lift _resolve_optimizer_dir to trainingUtils
runwangdl Apr 10, 2026
5a839b1
training-platform core: decouple execution.py from training pipeline
runwangdl Apr 10, 2026
3d309b7
training-platform core: extract training codegen helpers to codeGener…
runwangdl Apr 10, 2026
969f593
training-platform core: drop redundant top-level deeployTrainingRunne…
runwangdl Apr 10, 2026
ac4df5b
training-platform core: drop non-training helper wrappers from traini…
runwangdl Apr 10, 2026
9d3445c
training-platform core: drop unused loop var and populate zero-sized …
runwangdl Apr 10, 2026
40c5da9
training-platform core: apply pre-commit yapf + autoflake autofixes
runwangdl Apr 10, 2026
191a30b
training-platform core: drop CCT2_FT2 from siracusa tiled L3 CI list
runwangdl Apr 10, 2026
38f9984
Add simpleMLP training to CI
runwangdl Apr 13, 2026
3fe61d5
fix linting
runwangdl Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,14 +336,14 @@ def has_live_aliases(self, ctxt: NetworkContext) -> bool:
True if this VariableBuffer has any live aliases, False otherwise
"""
# Do a breadth-first search across the aliasing double-linked list
live = self._live
live = self._live or self.is_input or self.is_output
queue = set(self.aliases)
visited = set(self.name)
while len(queue) > 0:
next = queue.pop()
buffNext = ctxt.lookup(next)
assert isinstance(buffNext, VariableBuffer)
live |= buffNext._live
live |= buffNext._live or buffNext.is_input or buffNext.is_output
visited.add(next)
queue |= buffNext.aliases - visited
return live
Expand Down Expand Up @@ -2800,8 +2800,7 @@ def generateInferenceCode(self) -> str:
self.ctxt, code = node.generate(self.ctxt)

sections = reduce(lambda a, b: a + b, code, [])
layerCode = reduce(lambda a, b: a + b, sections, "")
callStack += "{\n" + layerCode + "\n}\n"
callStack += reduce(lambda a, b: a + b, sections, "")

return callStack

Expand Down
16 changes: 16 additions & 0 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,22 @@ def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)


class InPlaceAccumulatorV2Layer(ONNXLayer):
"""Layer for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Gradient accumulation with optional reset:
if lazy_reset_grad: out = gradient
else: out = buffer + gradient
"""

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeOps(self):
# One conditional check + one element-wise op (copy or add) per element
return self.mapper.parser.operatorRepresentation['size']


class LinearAttentionLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down
55 changes: 53 additions & 2 deletions Deeploy/Targets/Generic/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2617,7 +2617,8 @@ def __init__(self):

def parseNode(self, node: gs.Node) -> bool:

ret = all([len(node.inputs) == 2, len(node.outputs) == 1])
# Accept 1 output (log_prob only) or 2 outputs (loss + log_prob)
ret = all([len(node.inputs) == 2, len(node.outputs) in (1, 2)])

return ret

Expand All @@ -2628,7 +2629,15 @@ def parseNodeCtxt(self,

logits = ctxt.lookup(node.inputs[0].name)
labels = ctxt.lookup(node.inputs[1].name)
log_prob = ctxt.lookup(node.outputs[0].name)
if len(node.outputs) == 2:
# Dual-output: outputs[0]=loss (scalar), outputs[1]=log_prob
loss = ctxt.lookup(node.outputs[0].name)
log_prob = ctxt.lookup(node.outputs[1].name)
self.operatorRepresentation['loss'] = loss.name
else:
# Single-output (legacy): outputs[0]=log_prob
log_prob = ctxt.lookup(node.outputs[0].name)
self.operatorRepresentation['loss'] = ''
self.operatorRepresentation['logits'] = logits.name
self.operatorRepresentation['labels'] = labels.name
self.operatorRepresentation['log_prob'] = log_prob.name
Expand Down Expand Up @@ -2697,6 +2706,48 @@ def parseNodeCtxt(self,
return ctxt, True


class InPlaceAccumulatorV2Parser(NodeParser):
"""Parser for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Semantics:
if lazy_reset_grad: out = gradient (reset)
else: out = buffer + gradient (accumulate)

Inputs:
0: buffer - current accumulation buffer (float tensor)
1: gradient - new gradient to accumulate (float tensor, same shape)
2: lazy_reset_grad - reset flag; if true, overwrite; else add (bool[1])

Output:
0: output_buffer - updated accumulation buffer (float tensor)
"""

def __init__(self):
super().__init__()

def parseNode(self, node: gs.Node) -> bool:
# Require exactly 3 inputs (buffer, gradient, lazy_reset_grad) and 1 output
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NITPICK: Redundent comment

return len(node.inputs) == 3 and len(node.outputs) == 1

def parseNodeCtxt(self,
ctxt: NetworkContext,
node: gs.Node,
channels_first: bool = True) -> Tuple[NetworkContext, bool]:

buffer = ctxt.lookup(node.inputs[0].name)
gradient = ctxt.lookup(node.inputs[1].name)
lazy_reset_grad = ctxt.lookup(node.inputs[2].name)
data_out = ctxt.lookup(node.outputs[0].name)

self.operatorRepresentation['accum_buffer'] = buffer.name
self.operatorRepresentation['gradient'] = gradient.name
self.operatorRepresentation['lazy_reset_grad'] = lazy_reset_grad.name
self.operatorRepresentation['data_out'] = data_out.name
self.operatorRepresentation['size'] = int(np.prod(buffer.shape))

return ctxt, True
Comment thread
Victor-Jung marked this conversation as resolved.


class BatchNormParser(NodeParser):

def __init__(self):
Expand Down
37 changes: 35 additions & 2 deletions Deeploy/Targets/Generic/TypeCheckers.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,14 +574,21 @@ class SoftmaxCrossEntropyLossChecker(SignPropTypeChecker):
def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
super().__init__(input_types, output_types)

def checkOutputType(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> bool:
# The parser sets 'loss' to a non-empty string for 2-output nodes, '' for 1-output.
# Use this to determine the actual output count and match it against this binding.
actual_num_outputs = 2 if operatorRepresentation.get('loss', '') != '' else 1
return actual_num_outputs == len(self.output_types)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:

return [2**(self.input_types[0].referencedType.typeWidth)]
return [2**(self.input_types[0].referencedType.typeWidth)] * len(self.output_types)

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
return [False]
return [False] * len(self.output_types)


class SGDChecker(SignPropTypeChecker):
Expand All @@ -598,6 +605,32 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
return [True]


class InPlaceAccumulatorV2Checker(SignPropTypeChecker):
"""Type checker for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Inputs:
0: buffer (float32*)
1: gradient (float32*)
2: lazy_reset_grad (uint8_t* or bool* - 1 element)

Output:
0: output_buffer (float32*)
"""

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
super().__init__(input_types, output_types)

def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[int]:
# Output has same precision as the buffer input (float32)
return [2**(self.input_types[0].referencedType.typeWidth)]

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[bool]:
# Float32 output is signed
return [True]


class BatchNormChecker(SignPropTypeChecker):

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
Expand Down
38 changes: 30 additions & 8 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \
GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \
GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \
QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \
SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker
GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, InPlaceAccumulatorV2Checker, LayerNormChecker, \
MatMulChecker, MulChecker, QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, \
RQHardswishChecker, SGDChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
Expand All @@ -29,11 +29,12 @@
from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack
from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatConvTemplate, \
FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \
FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, \
MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, \
RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, \
TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate
FloatGELUTemplate, FloatGemmTemplate, FloatInPlaceAccumulatorV2Template, FloatLayernormTemplate, \
FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, \
FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, \
SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \
iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \
PULPRequantShiftChecker
from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
Expand Down Expand Up @@ -357,6 +358,13 @@
SoftmaxCrossEntropyLossTemplate.referenceTemplate, ForkTransformer) for type in IntegerDataTypes
]

PULPSoftmaxCrossEntropyLossDualOutputBindings = [
NodeBinding(
SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(type)],
[PointerClass(float32_t), PointerClass(float32_t)]),
SoftmaxCrossEntropyLossTemplate.referenceDualOutputTemplate, ForkTransformer) for type in IntegerDataTypes
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

cat -n Deeploy/Targets/PULPOpen/Bindings.py | sed -n '355,375p'

Repository: pulp-platform/Deeploy

Length of output: 1271


🏁 Script executed:

rg "for type in IntegerDataTypes" Deeploy/Targets/PULPOpen/Bindings.py

Repository: pulp-platform/Deeploy

Length of output: 1155


Rename the comprehension variable to avoid Ruff A001 throughout the file.

The variable type in the list comprehension shadows Python's built-in type, which Ruff correctly flags. This pattern appears in multiple binding definitions in this file (at least 11 instances). Rename type to label_type (or similar) consistently across all occurrences.

At lines 361-365 specifically:

♻️ Fix for PULPSoftmaxCrossEntropyLossDualOutputBindings
 PULPSoftmaxCrossEntropyLossDualOutputBindings = [
     NodeBinding(
-        SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(type)],
+        SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(label_type)],
                                        [PointerClass(float32_t), PointerClass(float32_t)]),
-        SoftmaxCrossEntropyLossTemplate.referenceDualOutputTemplate, ForkTransformer) for type in IntegerDataTypes
+        SoftmaxCrossEntropyLossTemplate.referenceDualOutputTemplate, ForkTransformer)
+    for label_type in IntegerDataTypes
 ]
🧰 Tools
🪛 Ruff (0.15.9)

[error] 365-365: Variable type is shadowing a Python builtin

(A001)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Deeploy/Targets/PULPOpen/Bindings.py` around lines 361 - 365, The list
comprehension in PULPSoftmaxCrossEntropyLossDualOutputBindings (and the other
~11 binding definitions) uses the comprehension variable named type which
shadows the built-in; rename that variable to label_type (or similar) in the
comprehension for PULPSoftmaxCrossEntropyLossDualOutputBindings and do the same
consistently for other bindings using IntegerDataTypes so the generator
expression for NodeBinding(SoftmaxCrossEntropyLossChecker(...),
SoftmaxCrossEntropyLossTemplate.referenceDualOutputTemplate, ForkTransformer)
becomes a comprehension using label_type instead of type; update all occurrences
referencing the comprehension variable in this file (e.g., any usages inside
SoftmaxCrossEntropyLossChecker or other binding list comprehensions) to match
the new name.

]

PULPSoftmaxCrossEntropyLossGradBindings = [
NodeBinding(
SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]),
Expand All @@ -368,6 +376,20 @@
SGDTemplate.referenceTemplate, ForkTransformer)
]

PULPInPlaceAccumulatorV2Bindings = [
NodeBinding(
InPlaceAccumulatorV2Checker(
[PointerClass(float32_t), PointerClass(float32_t), PointerClass(uint8_t)], [PointerClass(float32_t)]),
FloatInPlaceAccumulatorV2Template.referenceTemplate, ForkTransformer)
]

PULPInPlaceAccumulatorV2TiledBindings = [
NodeBinding(
InPlaceAccumulatorV2Checker(
[PointerClass(float32_t), PointerClass(float32_t), PointerClass(uint8_t)], [PointerClass(float32_t)]),
FloatInPlaceAccumulatorV2Template.tiledReferenceTemplate, ForkTransformer)
]

PULPTransposeBindings = [
NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate,
ForkTransformer) for type in IntegerDataTypes
Expand Down
46 changes: 26 additions & 20 deletions Deeploy/Targets/PULPOpen/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \
BasicRQIntegerDivBinding
from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELUGradLayer, GELULayer, \
GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \
ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \
RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \
SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer
GEMMLayer, InPlaceAccumulatorV2Layer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, \
PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, \
RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, \
SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer
from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \
GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \
MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \
RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, \
SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, \
SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, \
iSoftmaxParser
GELUGradParser, GELUParser, GEMMParser, InPlaceAccumulatorV2Parser, LayerNormGradParser, LayerNormParser, \
MatMulParser, MaxPool1DParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, \
ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, \
RQSiHardswishParser, SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, \
SoftmaxGradParser, SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, \
iRMSNormParser, iSoftmaxParser
Comment thread
coderabbitai[bot] marked this conversation as resolved.
from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate
from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, IntegerDivRequantMergePass, \
MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \
Expand All @@ -39,14 +39,15 @@
from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \
PULPConv2DTilingReadyBindings, PULPDWConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, \
PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \
PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \
PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \
PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \
PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \
PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \
PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \
PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \
PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, \
PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPInPlaceAccumulatorV2TilingReadyBindings, \
PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, \
PULPLayernormTilingReadyBindings, PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, \
PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, \
PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, \
PULPRQSConv1DTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \
PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \
PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, \
PULPSliceTilingReadyBindings, PULPSoftmaxCrossEntropyDualOutputTilingReadyBindings, \
PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \
PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \
PULPUniformRQSTilingReadyBindings
Expand Down Expand Up @@ -105,9 +106,12 @@
iHardswishMapper = NodeMapper(iHardswishParser(), PULPiHardswishTilingReadyBindings)
RQSiHardswishMapper = NodeMapper(RQSiHardswishParser(), PULPRQSiHardswishTilingReadyBindings)
SoftmaxCrossEntropyLossMapper = NodeMapper(SoftmaxCrossEntropyLossParser(), PULPSoftmaxCrossEntropyTilingReadyBindings)
SoftmaxCrossEntropyLossDualOutputMapper = NodeMapper(SoftmaxCrossEntropyLossParser(),
PULPSoftmaxCrossEntropyDualOutputTilingReadyBindings)
SoftmaxCrossEntropyLossGradMapper = NodeMapper(SoftmaxCrossEntropyLossGradParser(),
PULPSoftmaxCrossEntropyGradTilingReadyBindings)
SGDMapper = NodeMapper(SGDParser(), PULPSGDTilingReadyBindings)
InPlaceAccumulatorV2Mapper = NodeMapper(InPlaceAccumulatorV2Parser(), PULPInPlaceAccumulatorV2TilingReadyBindings)
QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings)
DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings)
GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings)
Expand Down Expand Up @@ -149,9 +153,11 @@
'Quant': QuantLayer([QuantMapper]),
'Dequant': QuantLayer([DequantMapper]),
'SoftmaxGrad': SoftmaxGradLayer([SoftmaxGradMapper]),
'SoftmaxCrossEntropyLoss': SoftmaxCrossEntropyLossLayer([SoftmaxCrossEntropyLossMapper]),
'SoftmaxCrossEntropyLoss':
SoftmaxCrossEntropyLossLayer([SoftmaxCrossEntropyLossDualOutputMapper, SoftmaxCrossEntropyLossMapper]),
'SoftmaxCrossEntropyLossGrad': SoftmaxCrossEntropyLossGradLayer([SoftmaxCrossEntropyLossGradMapper]),
'SGD': SGDLayer([SGDMapper])
'SGD': SGDLayer([SGDMapper]),
'InPlaceAccumulatorV2': InPlaceAccumulatorV2Layer([InPlaceAccumulatorV2Mapper]),
}


Expand Down
5 changes: 3 additions & 2 deletions Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from typing import Dict, List, Tuple

from Deeploy.AbstractDataTypes import float32_tPtr
from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.DataTypes import float32_t
from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation


Expand All @@ -19,7 +20,7 @@ def alignToContext(self, ctxt: NetworkContext,
if 'C' not in operatorRepresentation or operatorRepresentation['C'] is None:
# No bias case - set C to NULL and provide a default type
operatorRepresentation['C'] = None
operatorRepresentation['C_type'] = float32_tPtr # Default to fp32 type
operatorRepresentation['C_type'] = PointerClass(float32_t) # Default to fp32 type
operatorRepresentation['C_batched'] = False

return ctxt, operatorRepresentation, []
Expand Down
Loading
Loading