diff --git a/src/infiniop/devices/ascend/common_ascend.cc b/src/infiniop/devices/ascend/common_ascend.cc index a57f3a647..d70efee27 100644 --- a/src/infiniop/devices/ascend/common_ascend.cc +++ b/src/infiniop/devices/ascend/common_ascend.cc @@ -236,3 +236,24 @@ std::string aclnnTensorDescriptor::toString() { // 返回构建的字符串 return oss.str(); } + +aclnnScalarDescriptor::aclnnScalarDescriptor(aclDataType dtype, const void *value, size_t size) { + this->dataType = dtype; + this->size = size; + this->value = const_cast(value); + this->scalar = aclCreateScalar(this->value, this->dataType); +} + +aclnnScalarDescriptor::aclnnScalarDescriptor(infiniDtype_t dtype, const void *value, size_t size) { + this->dataType = toAclDataType(dtype); + this->size = size; + this->value = const_cast(value); + this->scalar = aclCreateScalar(this->value, this->dataType); +} + +aclnnScalarDescriptor::~aclnnScalarDescriptor() { + if (this->scalar) { + aclDestroyScalar(this->scalar); + this->scalar = nullptr; + } +} diff --git a/src/infiniop/devices/ascend/common_ascend.h b/src/infiniop/devices/ascend/common_ascend.h index e9fec1765..4c2e42c9b 100644 --- a/src/infiniop/devices/ascend/common_ascend.h +++ b/src/infiniop/devices/ascend/common_ascend.h @@ -43,6 +43,18 @@ struct aclnnTensorDescriptor { }; typedef aclnnTensorDescriptor *aclnnTensorDescriptor_t; +struct aclnnScalarDescriptor { + aclDataType dataType; + void *value; + size_t size; + aclScalar *scalar; + + aclnnScalarDescriptor(aclDataType dtype, const void *value, size_t size); + aclnnScalarDescriptor(infiniDtype_t dtype, const void *value, size_t size); + ~aclnnScalarDescriptor(); +}; +typedef aclnnScalarDescriptor *aclnnScalarDescriptor_t; + aclDataType toAclDataType(infiniDtype_t dt); #define GetRecentErrMsg() \ diff --git a/src/infiniop/ops/add/add.h b/src/infiniop/ops/add/add.h new file mode 100644 index 000000000..c6dd76f4e --- /dev/null +++ b/src/infiniop/ops/add/add.h @@ -0,0 +1,46 @@ +#ifndef ADD_H +#define ADD_H + +#include "../../operator.h" +#include "info.h" + +#define DESCRIPTOR(NAMESPACE) \ + \ + namespace op::add::NAMESPACE { \ + class Descriptor final : public InfiniopDescriptor { \ + struct Opaque; \ + Opaque *_opaque; \ + AddInfo _info; \ + size_t _workspace_size; \ + \ + Descriptor( \ + Opaque *opaque, \ + AddInfo info, \ + size_t workspace_size, \ + infiniDevice_t device_type, \ + int device_id) \ + : InfiniopDescriptor{device_type, device_id}, \ + _opaque(opaque), \ + _info(info), \ + _workspace_size(workspace_size) {} \ + \ + public: \ + ~Descriptor(); \ + \ + size_t workspaceSize() const { return _workspace_size; } \ + \ + static infiniStatus_t create( \ + infiniopHandle_t handle, \ + Descriptor **desc_ptr, \ + infiniopTensorDescriptor_t c_desc, \ + std::vector input_descs); \ + \ + infiniStatus_t calculate( \ + void *workspace, size_t workspace_size, \ + void *c, \ + std::vector inputs, \ + void *stream) const; \ + }; \ + } + +#endif // ADD_H \ No newline at end of file diff --git a/src/infiniop/ops/add/ascend/add_ascend.cc b/src/infiniop/ops/add/ascend/add_ascend.cc new file mode 100644 index 000000000..05a4b6f83 --- /dev/null +++ b/src/infiniop/ops/add/ascend/add_ascend.cc @@ -0,0 +1,112 @@ +#include "add_ascend.h" +#include "../../../devices/ascend/common_ascend.h" +#include + +namespace op::add::ascend { + +// Opaque structure must be defined AFTER the class declaration (which is in add.h via DESCRIPTOR macro) +struct Descriptor::Opaque { + aclnnTensorDescriptor_t a; + aclnnTensorDescriptor_t b; + aclnnTensorDescriptor_t c; + aclnnScalarDescriptor_t alpha; + size_t workspaceSize; + aclOpExecutor *executor; + + Opaque(aclnnTensorDescriptor_t a_, aclnnTensorDescriptor_t b_, aclnnTensorDescriptor_t c_, + aclnnScalarDescriptor_t alpha_, size_t ws, aclOpExecutor *exec) + : a(a_), b(b_), c(c_), alpha(alpha_), workspaceSize(ws), executor(exec) {} + + ~Opaque() { + delete a; + delete b; + delete c; + delete alpha; + aclDestroyAclOpExecutor(executor); + } +}; + +Descriptor::~Descriptor() { + delete _opaque; +} + +infiniStatus_t Descriptor::create( + infiniopHandle_t handle, + Descriptor **desc_ptr, + infiniopTensorDescriptor_t c_desc, + std::vector input_descs) { + + if (input_descs.size() != 2) { + return INFINI_STATUS_BAD_PARAM; + } + + auto a_desc = input_descs[0]; + auto b_desc = input_descs[1]; + + // Create AddInfo first + auto result = AddInfo::create(c_desc, a_desc, b_desc); + CHECK_RESULT(result); + + auto handle_ascend = reinterpret_cast(handle); + + aclnnTensorDescriptor_t a = new aclnnTensorDescriptor(a_desc); + aclnnTensorDescriptor_t b = new aclnnTensorDescriptor(b_desc); + aclnnTensorDescriptor_t c = new aclnnTensorDescriptor(c_desc); + + // Default alpha = 1.0 + float alpha_value = 1.0f; + aclnnScalarDescriptor_t alpha = new aclnnScalarDescriptor( + INFINI_DTYPE_F32, &alpha_value, sizeof(float)); + + size_t workspace_size = 0; + aclOpExecutor *executor = nullptr; + + CHECK_ACL(aclnnAddGetWorkspaceSize( + a->tensor, + b->tensor, + alpha->scalar, + c->tensor, + &workspace_size, + &executor)); + + aclSetAclOpExecutorRepeatable(executor); + + *desc_ptr = new Descriptor( + new Opaque{a, b, c, alpha, workspace_size, executor}, + result.take(), + workspace_size, + handle_ascend->device, + handle_ascend->device_id); + + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t Descriptor::calculate( + void *workspace, size_t workspace_size, + void *c, std::vector inputs, + void *stream) const { + + if (inputs.size() != 2) { + return INFINI_STATUS_BAD_PARAM; + } + + if (workspace_size < workspaceSize()) { + return INFINI_STATUS_INSUFFICIENT_WORKSPACE; + } + + // Set input/output tensor addresses in the executor + // Parameters: executor, tensor_index, tensor_descriptor, data_pointer + AclSetTensorAddr(_opaque->executor, 0, _opaque->a->tensor, const_cast(inputs[0])); + AclSetTensorAddr(_opaque->executor, 1, _opaque->b->tensor, const_cast(inputs[1])); + AclSetTensorAddr(_opaque->executor, 2, _opaque->c->tensor, c); + + CHECK_ACL(aclnnAdd( + workspace, + workspace_size, + _opaque->executor, + stream)); + + return INFINI_STATUS_SUCCESS; +} + +} // namespace op::add::ascend diff --git a/src/infiniop/ops/add/ascend/add_ascend.h b/src/infiniop/ops/add/ascend/add_ascend.h new file mode 100644 index 000000000..f63831498 --- /dev/null +++ b/src/infiniop/ops/add/ascend/add_ascend.h @@ -0,0 +1,8 @@ +#ifndef __ADD_ASCEND_H__ +#define __ADD_ASCEND_H__ + +#include "../add.h" + +DESCRIPTOR(ascend) + +#endif // __ADD_ASCEND_H__ diff --git a/src/infiniop/ops/add/info.h b/src/infiniop/ops/add/info.h new file mode 100644 index 000000000..2a6cda7a1 --- /dev/null +++ b/src/infiniop/ops/add/info.h @@ -0,0 +1,59 @@ +#ifndef __ADD_INFO_H__ +#define __ADD_INFO_H__ + +#include "../../../utils.h" +#include "../../tensor.h" +#include + +namespace op::add { + +class AddInfo { + AddInfo() = default; + +public: + infiniDtype_t dtype; + std::vector shape; + size_t numel; + + static utils::Result create( + infiniopTensorDescriptor_t c_desc, + infiniopTensorDescriptor_t a_desc, + infiniopTensorDescriptor_t b_desc) { + + auto dtype = c_desc->dtype(); + + // Check dtype compatibility + CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16); + + // Check shape compatibility (broadcast) + auto c_shape = c_desc->shape(); + auto a_shape = a_desc->shape(); + auto b_shape = b_desc->shape(); + + auto c_ndim = c_desc->ndim(); + + // Require same ndim and shape for now + if (c_ndim != a_desc->ndim() || c_ndim != b_desc->ndim()) { + CHECK_STATUS(INFINI_STATUS_BAD_TENSOR_SHAPE); + } + + CHECK_SAME_SHAPE(c_shape, a_shape); + CHECK_SAME_SHAPE(c_shape, b_shape); + + size_t numel = 1; + std::vector shape; + for (size_t i = 0; i < c_ndim; i++) { + shape.push_back(c_shape[i]); + numel *= c_shape[i]; + } + + return utils::Result(AddInfo{ + dtype, + shape, + numel}); + } +}; + +} // namespace op::add + +#endif // __ADD_INFO_H__ diff --git a/src/infiniop/ops/add/operator.cc b/src/infiniop/ops/add/operator.cc index c6e9f28fb..88677c75b 100644 --- a/src/infiniop/ops/add/operator.cc +++ b/src/infiniop/ops/add/operator.cc @@ -20,6 +20,9 @@ #ifdef ENABLE_MOORE_API #include "moore/add_moore.h" #endif +#ifdef ENABLE_ASCEND_API +#include "ascend/add_ascend.h" +#endif __INFINI_C infiniStatus_t infiniopCreateAddDescriptor( infiniopHandle_t handle, @@ -69,6 +72,9 @@ __INFINI_C infiniStatus_t infiniopCreateAddDescriptor( #ifdef ENABLE_MOORE_API CREATE(INFINI_DEVICE_MOORE, moore); #endif +#ifdef ENABLE_ASCEND_API + CREATE(INFINI_DEVICE_ASCEND, ascend); +#endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -114,6 +120,9 @@ __INFINI_C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t de #endif #ifdef ENABLE_MOORE_API GET(INFINI_DEVICE_MOORE, moore); +#endif +#ifdef ENABLE_ASCEND_API + GET(INFINI_DEVICE_ASCEND, ascend); #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -169,6 +178,9 @@ __INFINI_C infiniStatus_t infiniopAdd( #ifdef ENABLE_MOORE_API CALCULATE(INFINI_DEVICE_MOORE, moore); #endif +#ifdef ENABLE_ASCEND_API + CALCULATE(INFINI_DEVICE_ASCEND, ascend); +#endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -217,6 +229,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) { #ifdef ENABLE_MOORE_API DELETE(INFINI_DEVICE_MOORE, moore); #endif +#ifdef ENABLE_ASCEND_API + DELETE(INFINI_DEVICE_ASCEND, ascend); +#endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;