Skip to content

Commit 5ed48cf

Browse files
committed
issue/1105 - fix hpcc, maca, and hygon compilation
1 parent f44330d commit 5ed48cf

24 files changed

Lines changed: 134 additions & 71 deletions

File tree

src/infiniop/devices/metax/metax_ht2mc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,5 @@
101101
#define hcGraphDestroy mcGraphDestroy
102102
#define hcGraphExecDestroy mcGraphExecDestroy
103103
#define hcGraphLaunch mcGraphLaunch
104+
#define hcMemsetAsync mcMemsetAsync
104105
#endif

src/infiniop/ops/addcmul/cuda/kernel.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#ifndef __ADDCMUL_CUDA_CUH__
22
#define __ADDCMUL_CUDA_CUH__
33

4+
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
45
#include <cuda_bf16.h>
56
#include <cuda_fp16.h>
7+
#endif
68
#include <type_traits>
79

810
namespace op::addcmul::cuda {

src/infiniop/ops/addcmul/metax/addcmul_metax.maca

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "../../../devices/metax/metax_handle.h"
12
#include "../../../elementwise/metax/elementwise_metax.h"
23

34
#include "addcmul_metax.h"
@@ -129,7 +130,7 @@ static inline infiniStatus_t launch_addcmul_kernel(
129130
auto *t1_ptr = reinterpret_cast<const T *>(inputs.at(1));
130131
auto *t2_ptr = reinterpret_cast<const T *>(inputs.at(2));
131132

132-
mcStream_t metax_stream = reinterpret_cast<mcStream_t>(stream);
133+
hcStream_t metax_stream = reinterpret_cast<hcStream_t>(stream);
133134

134135
constexpr uint32_t BLOCK_SIZE = 256;
135136
uint32_t grid = static_cast<uint32_t>((output_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
@@ -146,7 +147,7 @@ static inline infiniStatus_t launch_addcmul_kernel(
146147
t2_ptr,
147148
desc->getValue());
148149

149-
CHECK_METAX(mcGetLastError());
150+
CHECK_METAX(hcGetLastError());
150151
return INFINI_STATUS_SUCCESS;
151152
}
152153

src/infiniop/ops/addr/cpu/addr_cpu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "addr_cpu.h"
22
#include "../../../devices/cpu/common_cpu.h"
3-
#include <spdlog/spdlog.h>
3+
44
namespace op::addr::cpu {
55
Descriptor::~Descriptor() = default;
66

src/infiniop/ops/argwhere/moore/argwhere_moore.mu

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,6 @@
33
#include "argwhere_kernel.h"
44
#include "argwhere_moore.h"
55
#include "infinicore.h"
6-
#include <spdlog/spdlog.h>
7-
8-
// template <typename T>
9-
// INFINIOP_MOORE_KERNEL parallel_block_argwhere(T *data, int64_t *results, size_t N,
10-
// size_t M, const size_t *shapes,
11-
// const ptrdiff_t *strides, size_t ndim,
12-
// size_t *count) {
13-
// parallel_block_argwhere_kernel<float><<<1, M / 2, M>>>(
14-
// data, results, N, shapes, strides, ndim, count);
15-
// }
166

177
infiniStatus_t launchKernel(const void *data, int64_t *results, size_t N,
188
size_t M, const size_t *shapes,
@@ -90,12 +80,6 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size,
9080
musaMemcpyAsync(*y, result, sizeof(int64_t) * (*count) * ndim,
9181
musaMemcpyDeviceToHost, moore_stream);
9282

93-
// cudaStreamSynchronize(cuda_stream);
94-
// for (size_t i = 0; i < (*count) * ndim; i++) {
95-
// spdlog::debug("(*y)[{}]:{}", i, static_cast<size_t *>(*y)[i]);
96-
// }
97-
// cudaFreeAsync(result, cuda_stream);
98-
// cudaFreeAsync(count_cuda, cuda_stream);
9983
return INFINI_STATUS_SUCCESS;
10084
}
10185

src/infiniop/ops/atanh/cuda/kernel.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#ifndef __ATANH_CUDA_H__
22
#define __ATANH_CUDA_H__
33

4+
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
45
#include <cuda_bf16.h>
56
#include <cuda_fp16.h>
7+
#endif
68

79
namespace op::atanh::cuda {
810
typedef struct AtanhOp {

src/infiniop/ops/binary_cross_entropy_with_logits/metax/binary_cross_entropy_with_logits_metax.maca

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
#include "../../../devices/metax/metax_handle.h"
33
#include "../../../devices/metax/metax_kernel_common.h"
44
#include "binary_cross_entropy_with_logits_metax.h"
5+
#if defined(ENABLE_METAX_MC_API)
56
#include <mc_runtime.h>
7+
#else
8+
#include <hc_runtime.h>
9+
#endif
610
#include <type_traits>
711

812
namespace op::bce_with_logits::metax {
@@ -191,7 +195,7 @@ infiniStatus_t Descriptor::calculate(
191195
const void *pos_weight,
192196
void *stream) const {
193197

194-
mcStream_t custream = (mcStream_t)stream;
198+
hcStream_t custream = (hcStream_t)stream;
195199
size_t n = _info.num_elements;
196200

197201
// F16/BF16 + 归约需要 float workspace
@@ -219,7 +223,7 @@ infiniStatus_t Descriptor::calculate(
219223
case INFINI_DTYPE_F32: {
220224
// 如果是规约操作,计算前需将输出位置清零
221225
if (_reduction != INFINIOP_REDUCTION_NONE) {
222-
mcMemsetAsync(out, 0, sizeof(float), custream);
226+
hcMemsetAsync(out, 0, sizeof(float), custream);
223227
}
224228

225229
bce_logits_kernel<float, float><<<grid, block, 0, custream>>>(
@@ -255,7 +259,7 @@ infiniStatus_t Descriptor::calculate(
255259
out_raw = out;
256260
} else {
257261
workspace_f = static_cast<float *>(workspace);
258-
mcMemsetAsync(workspace_f, 0, sizeof(float), custream);
262+
hcMemsetAsync(workspace_f, 0, sizeof(float), custream);
259263
out_raw = workspace_f;
260264
}
261265

@@ -294,7 +298,7 @@ infiniStatus_t Descriptor::calculate(
294298
out_raw = out;
295299
} else {
296300
workspace_f = static_cast<float *>(workspace);
297-
mcMemsetAsync(workspace_f, 0, sizeof(float), custream);
301+
hcMemsetAsync(workspace_f, 0, sizeof(float), custream);
298302
out_raw = workspace_f;
299303
}
300304

@@ -324,8 +328,8 @@ infiniStatus_t Descriptor::calculate(
324328
return INFINI_STATUS_BAD_TENSOR_DTYPE;
325329
}
326330

327-
mcError_t err = mcGetLastError();
328-
if (err != mcSuccess) {
331+
hcError_t err = hcGetLastError();
332+
if (err != hcSuccess) {
329333
return INFINI_STATUS_INTERNAL_ERROR;
330334
}
331335
return INFINI_STATUS_SUCCESS;

src/infiniop/ops/cdist/metax/cdist_metax.maca

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
#include "../../../devices/metax/metax_common.h"
12
#include "../../../devices/metax/metax_handle.h"
3+
#include "../../../devices/metax/metax_kernel_common.h"
24
#include "cdist_metax.h"
35
#include <iostream>
46
namespace op::cdist::metax {
@@ -126,7 +128,7 @@ infiniStatus_t Descriptor::calculate(
126128
return INFINI_STATUS_BAD_TENSOR_DTYPE;
127129
}
128130

129-
mcStream_t custream = (mcStream_t)stream;
131+
hcStream_t custream = (hcStream_t)stream;
130132
dim3 block(16, 16);
131133
dim3 grid(
132134
static_cast<unsigned int>((_info.n + block.x - 1) / block.x),
@@ -151,8 +153,8 @@ infiniStatus_t Descriptor::calculate(
151153
_info.y_matrix.col_stride,
152154
_p);
153155

154-
auto err = mcGetLastError();
155-
if (err != mcSuccess) {
156+
auto err = hcGetLastError();
157+
if (err != hcSuccess) {
156158
return INFINI_STATUS_INTERNAL_ERROR;
157159
}
158160

src/infiniop/ops/equal/cuda/kernel.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#ifndef __EQUAL_CUDA_H__
22
#define __EQUAL_CUDA_H__
33

4-
#if defined(__MACACC__)
4+
#if ENABLE_METAX_API
5+
#if defined(ENABLE_METAX_MC_API)
56
#include <maca_bfloat16.h>
67
#include <maca_fp16.h>
78
#else
9+
#include <hpcc_bfloat16.h>
10+
#include <hpcc_fp16.h>
11+
#endif
12+
#elif defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
813
#include <cuda_bf16.h>
914
#include <cuda_fp16.h>
1015
#endif

src/infiniop/ops/hardswish/cuda/kernel.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22
#define __HARDSWISH_CUDA_H__
33

44
#include <cmath>
5-
#if defined(__MACACC__)
5+
#if ENABLE_METAX_API
6+
#if defined(ENABLE_METAX_MC_API)
67
#include <maca_bfloat16.h>
78
#include <maca_fp16.h>
89
#else
10+
#include <hpcc_bfloat16.h>
11+
#include <hpcc_fp16.h>
12+
#endif
13+
#elif defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
914
#include <cuda_bf16.h>
1015
#include <cuda_fp16.h>
1116
#endif

0 commit comments

Comments
 (0)