Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/infiniop/devices/ascend/common_ascend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,24 @@ std::string aclnnTensorDescriptor::toString() {
// 返回构建的字符串
return oss.str();
}

aclnnScalarDescriptor::aclnnScalarDescriptor(aclDataType dtype, const void *value, size_t size) {
this->dataType = dtype;
this->size = size;
this->value = const_cast<void*>(value);
this->scalar = aclCreateScalar(this->value, this->dataType);
}

aclnnScalarDescriptor::aclnnScalarDescriptor(infiniDtype_t dtype, const void *value, size_t size) {
this->dataType = toAclDataType(dtype);
this->size = size;
this->value = const_cast<void*>(value);
this->scalar = aclCreateScalar(this->value, this->dataType);
}

aclnnScalarDescriptor::~aclnnScalarDescriptor() {
if (this->scalar) {
aclDestroyScalar(this->scalar);
this->scalar = nullptr;
}
}
12 changes: 12 additions & 0 deletions src/infiniop/devices/ascend/common_ascend.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ struct aclnnTensorDescriptor {
};
typedef aclnnTensorDescriptor *aclnnTensorDescriptor_t;

struct aclnnScalarDescriptor {
aclDataType dataType;
void *value;
size_t size;
aclScalar *scalar;

aclnnScalarDescriptor(aclDataType dtype, const void *value, size_t size);
aclnnScalarDescriptor(infiniDtype_t dtype, const void *value, size_t size);
~aclnnScalarDescriptor();
};
typedef aclnnScalarDescriptor *aclnnScalarDescriptor_t;

aclDataType toAclDataType(infiniDtype_t dt);

#define GetRecentErrMsg() \
Expand Down
46 changes: 46 additions & 0 deletions src/infiniop/ops/add/add.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef ADD_H
#define ADD_H

#include "../../operator.h"
#include "info.h"

#define DESCRIPTOR(NAMESPACE) \
\
namespace op::add::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
AddInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
Opaque *opaque, \
AddInfo info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t c_desc, \
std::vector<infiniopTensorDescriptor_t> input_descs); \
\
infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \
void *c, \
std::vector<const void *> inputs, \
void *stream) const; \
}; \
}

#endif // ADD_H
112 changes: 112 additions & 0 deletions src/infiniop/ops/add/ascend/add_ascend.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include "add_ascend.h"
#include "../../../devices/ascend/common_ascend.h"
#include <aclnnop/aclnn_add.h>

namespace op::add::ascend {

// Opaque structure must be defined AFTER the class declaration (which is in add.h via DESCRIPTOR macro)
struct Descriptor::Opaque {
aclnnTensorDescriptor_t a;
aclnnTensorDescriptor_t b;
aclnnTensorDescriptor_t c;
aclnnScalarDescriptor_t alpha;
size_t workspaceSize;
aclOpExecutor *executor;

Opaque(aclnnTensorDescriptor_t a_, aclnnTensorDescriptor_t b_, aclnnTensorDescriptor_t c_,
aclnnScalarDescriptor_t alpha_, size_t ws, aclOpExecutor *exec)
: a(a_), b(b_), c(c_), alpha(alpha_), workspaceSize(ws), executor(exec) {}

~Opaque() {
delete a;
delete b;
delete c;
delete alpha;
aclDestroyAclOpExecutor(executor);
}
};

Descriptor::~Descriptor() {
delete _opaque;
}

infiniStatus_t Descriptor::create(
infiniopHandle_t handle,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
std::vector<infiniopTensorDescriptor_t> input_descs) {

if (input_descs.size() != 2) {
return INFINI_STATUS_BAD_PARAM;
}

auto a_desc = input_descs[0];
auto b_desc = input_descs[1];

// Create AddInfo first
auto result = AddInfo::create(c_desc, a_desc, b_desc);
CHECK_RESULT(result);

auto handle_ascend = reinterpret_cast<device::ascend::Handle *>(handle);

aclnnTensorDescriptor_t a = new aclnnTensorDescriptor(a_desc);
aclnnTensorDescriptor_t b = new aclnnTensorDescriptor(b_desc);
aclnnTensorDescriptor_t c = new aclnnTensorDescriptor(c_desc);

// Default alpha = 1.0
float alpha_value = 1.0f;
aclnnScalarDescriptor_t alpha = new aclnnScalarDescriptor(
INFINI_DTYPE_F32, &alpha_value, sizeof(float));

size_t workspace_size = 0;
aclOpExecutor *executor = nullptr;

CHECK_ACL(aclnnAddGetWorkspaceSize(
a->tensor,
b->tensor,
alpha->scalar,
c->tensor,
&workspace_size,
&executor));

aclSetAclOpExecutorRepeatable(executor);

*desc_ptr = new Descriptor(
new Opaque{a, b, c, alpha, workspace_size, executor},
result.take(),
workspace_size,
handle_ascend->device,
handle_ascend->device_id);

return INFINI_STATUS_SUCCESS;
}

infiniStatus_t Descriptor::calculate(
void *workspace, size_t workspace_size,
void *c, std::vector<const void *> inputs,
void *stream) const {

if (inputs.size() != 2) {
return INFINI_STATUS_BAD_PARAM;
}

if (workspace_size < workspaceSize()) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}

// Set input/output tensor addresses in the executor
// Parameters: executor, tensor_index, tensor_descriptor, data_pointer
AclSetTensorAddr(_opaque->executor, 0, _opaque->a->tensor, const_cast<void*>(inputs[0]));
AclSetTensorAddr(_opaque->executor, 1, _opaque->b->tensor, const_cast<void*>(inputs[1]));
AclSetTensorAddr(_opaque->executor, 2, _opaque->c->tensor, c);

CHECK_ACL(aclnnAdd(
workspace,
workspace_size,
_opaque->executor,
stream));

return INFINI_STATUS_SUCCESS;
}

} // namespace op::add::ascend
8 changes: 8 additions & 0 deletions src/infiniop/ops/add/ascend/add_ascend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef __ADD_ASCEND_H__
#define __ADD_ASCEND_H__

#include "../add.h"

DESCRIPTOR(ascend)

#endif // __ADD_ASCEND_H__
59 changes: 59 additions & 0 deletions src/infiniop/ops/add/info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef __ADD_INFO_H__
#define __ADD_INFO_H__

#include "../../../utils.h"
#include "../../tensor.h"
#include <vector>

namespace op::add {

class AddInfo {
AddInfo() = default;

public:
infiniDtype_t dtype;
std::vector<int64_t> shape;
size_t numel;

static utils::Result<AddInfo> create(
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {

auto dtype = c_desc->dtype();

// Check dtype compatibility
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16);

// Check shape compatibility (broadcast)
auto c_shape = c_desc->shape();
auto a_shape = a_desc->shape();
auto b_shape = b_desc->shape();

auto c_ndim = c_desc->ndim();

// Require same ndim and shape for now
if (c_ndim != a_desc->ndim() || c_ndim != b_desc->ndim()) {
CHECK_STATUS(INFINI_STATUS_BAD_TENSOR_SHAPE);
}

CHECK_SAME_SHAPE(c_shape, a_shape);
CHECK_SAME_SHAPE(c_shape, b_shape);

size_t numel = 1;
std::vector<int64_t> shape;
for (size_t i = 0; i < c_ndim; i++) {
shape.push_back(c_shape[i]);
numel *= c_shape[i];
}

return utils::Result<AddInfo>(AddInfo{
dtype,
shape,
numel});
}
};

} // namespace op::add

#endif // __ADD_INFO_H__
15 changes: 15 additions & 0 deletions src/infiniop/ops/add/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#ifdef ENABLE_MOORE_API
#include "moore/add_moore.h"
#endif
#ifdef ENABLE_ASCEND_API
#include "ascend/add_ascend.h"
#endif

__INFINI_C infiniStatus_t infiniopCreateAddDescriptor(
infiniopHandle_t handle,
Expand Down Expand Up @@ -69,6 +72,9 @@ __INFINI_C infiniStatus_t infiniopCreateAddDescriptor(
#ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, moore);
#endif
#ifdef ENABLE_ASCEND_API
CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif

default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down Expand Up @@ -114,6 +120,9 @@ __INFINI_C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t de
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
#ifdef ENABLE_ASCEND_API
GET(INFINI_DEVICE_ASCEND, ascend);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down Expand Up @@ -169,6 +178,9 @@ __INFINI_C infiniStatus_t infiniopAdd(
#ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, moore);
#endif
#ifdef ENABLE_ASCEND_API
CALCULATE(INFINI_DEVICE_ASCEND, ascend);
#endif

default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down Expand Up @@ -217,6 +229,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) {
#ifdef ENABLE_MOORE_API
DELETE(INFINI_DEVICE_MOORE, moore);
#endif
#ifdef ENABLE_ASCEND_API
DELETE(INFINI_DEVICE_ASCEND, ascend);
#endif

default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down