Skip to content

Commit a76e142

Browse files
committed
issue/1105 - fix hpcc compilation
1 parent f44330d commit a76e142

22 files changed

Lines changed: 123 additions & 67 deletions

File tree

src/infiniop/ops/addcmul/cuda/kernel.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#ifndef __ADDCMUL_CUDA_CUH__
22
#define __ADDCMUL_CUDA_CUH__
33

4+
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
45
#include <cuda_bf16.h>
56
#include <cuda_fp16.h>
7+
#endif
68
#include <type_traits>
79

810
namespace op::addcmul::cuda {

src/infiniop/ops/addcmul/metax/addcmul_metax.maca

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ static inline infiniStatus_t launch_addcmul_kernel(
129129
auto *t1_ptr = reinterpret_cast<const T *>(inputs.at(1));
130130
auto *t2_ptr = reinterpret_cast<const T *>(inputs.at(2));
131131

132-
mcStream_t metax_stream = reinterpret_cast<mcStream_t>(stream);
132+
hcStream_t metax_stream = reinterpret_cast<hcStream_t>(stream);
133133

134134
constexpr uint32_t BLOCK_SIZE = 256;
135135
uint32_t grid = static_cast<uint32_t>((output_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
@@ -146,7 +146,7 @@ static inline infiniStatus_t launch_addcmul_kernel(
146146
t2_ptr,
147147
desc->getValue());
148148

149-
CHECK_METAX(mcGetLastError());
149+
CHECK_METAX(hcGetLastError());
150150
return INFINI_STATUS_SUCCESS;
151151
}
152152

src/infiniop/ops/addr/cpu/addr_cpu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "addr_cpu.h"
22
#include "../../../devices/cpu/common_cpu.h"
3-
#include <spdlog/spdlog.h>
3+
44
namespace op::addr::cpu {
55
Descriptor::~Descriptor() = default;
66

src/infiniop/ops/argwhere/moore/argwhere_moore.mu

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,6 @@
33
#include "argwhere_kernel.h"
44
#include "argwhere_moore.h"
55
#include "infinicore.h"
6-
#include <spdlog/spdlog.h>
7-
8-
// template <typename T>
9-
// INFINIOP_MOORE_KERNEL parallel_block_argwhere(T *data, int64_t *results, size_t N,
10-
// size_t M, const size_t *shapes,
11-
// const ptrdiff_t *strides, size_t ndim,
12-
// size_t *count) {
13-
// parallel_block_argwhere_kernel<float><<<1, M / 2, M>>>(
14-
// data, results, N, shapes, strides, ndim, count);
15-
// }
166

177
infiniStatus_t launchKernel(const void *data, int64_t *results, size_t N,
188
size_t M, const size_t *shapes,
@@ -90,12 +80,6 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size,
9080
musaMemcpyAsync(*y, result, sizeof(int64_t) * (*count) * ndim,
9181
musaMemcpyDeviceToHost, moore_stream);
9282

93-
// cudaStreamSynchronize(cuda_stream);
94-
// for (size_t i = 0; i < (*count) * ndim; i++) {
95-
// spdlog::debug("(*y)[{}]:{}", i, static_cast<size_t *>(*y)[i]);
96-
// }
97-
// cudaFreeAsync(result, cuda_stream);
98-
// cudaFreeAsync(count_cuda, cuda_stream);
9983
return INFINI_STATUS_SUCCESS;
10084
}
10185

src/infiniop/ops/atanh/cuda/kernel.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#ifndef __ATANH_CUDA_H__
22
#define __ATANH_CUDA_H__
33

4+
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
45
#include <cuda_bf16.h>
56
#include <cuda_fp16.h>
7+
#endif
68

79
namespace op::atanh::cuda {
810
typedef struct AtanhOp {

src/infiniop/ops/binary_cross_entropy_with_logits/metax/binary_cross_entropy_with_logits_metax.maca

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
#include "../../../devices/metax/metax_handle.h"
33
#include "../../../devices/metax/metax_kernel_common.h"
44
#include "binary_cross_entropy_with_logits_metax.h"
5+
#if defined(ENABLE_METAX_MC_API)
56
#include <mc_runtime.h>
7+
#else
8+
#include <hc_runtime.h>
9+
#endif
610
#include <type_traits>
711

812
namespace op::bce_with_logits::metax {
@@ -191,7 +195,7 @@ infiniStatus_t Descriptor::calculate(
191195
const void *pos_weight,
192196
void *stream) const {
193197

194-
mcStream_t custream = (mcStream_t)stream;
198+
hcStream_t custream = (hcStream_t)stream;
195199
size_t n = _info.num_elements;
196200

197201
// F16/BF16 + 归约需要 float workspace
@@ -219,7 +223,7 @@ infiniStatus_t Descriptor::calculate(
219223
case INFINI_DTYPE_F32: {
220224
// 如果是规约操作,计算前需将输出位置清零
221225
if (_reduction != INFINIOP_REDUCTION_NONE) {
222-
mcMemsetAsync(out, 0, sizeof(float), custream);
226+
hcMemsetAsync(out, 0, sizeof(float), custream);
223227
}
224228

225229
bce_logits_kernel<float, float><<<grid, block, 0, custream>>>(
@@ -255,7 +259,7 @@ infiniStatus_t Descriptor::calculate(
255259
out_raw = out;
256260
} else {
257261
workspace_f = static_cast<float *>(workspace);
258-
mcMemsetAsync(workspace_f, 0, sizeof(float), custream);
262+
hcMemsetAsync(workspace_f, 0, sizeof(float), custream);
259263
out_raw = workspace_f;
260264
}
261265

@@ -294,7 +298,7 @@ infiniStatus_t Descriptor::calculate(
294298
out_raw = out;
295299
} else {
296300
workspace_f = static_cast<float *>(workspace);
297-
mcMemsetAsync(workspace_f, 0, sizeof(float), custream);
301+
hcMemsetAsync(workspace_f, 0, sizeof(float), custream);
298302
out_raw = workspace_f;
299303
}
300304

@@ -324,8 +328,8 @@ infiniStatus_t Descriptor::calculate(
324328
return INFINI_STATUS_BAD_TENSOR_DTYPE;
325329
}
326330

327-
mcError_t err = mcGetLastError();
328-
if (err != mcSuccess) {
331+
hcError_t err = hcGetLastError();
332+
if (err != hcSuccess) {
329333
return INFINI_STATUS_INTERNAL_ERROR;
330334
}
331335
return INFINI_STATUS_SUCCESS;

src/infiniop/ops/cdist/metax/cdist_metax.maca

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ infiniStatus_t Descriptor::calculate(
126126
return INFINI_STATUS_BAD_TENSOR_DTYPE;
127127
}
128128

129-
mcStream_t custream = (mcStream_t)stream;
129+
hcStream_t custream = (hcStream_t)stream;
130130
dim3 block(16, 16);
131131
dim3 grid(
132132
static_cast<unsigned int>((_info.n + block.x - 1) / block.x),
@@ -151,8 +151,8 @@ infiniStatus_t Descriptor::calculate(
151151
_info.y_matrix.col_stride,
152152
_p);
153153

154-
auto err = mcGetLastError();
155-
if (err != mcSuccess) {
154+
auto err = hcGetLastError();
155+
if (err != hcSuccess) {
156156
return INFINI_STATUS_INTERNAL_ERROR;
157157
}
158158

src/infiniop/ops/equal/cuda/kernel.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#ifndef __EQUAL_CUDA_H__
22
#define __EQUAL_CUDA_H__
33

4-
#if defined(__MACACC__)
4+
#if ENABLE_METAX_API
5+
#if defined(ENABLE_METAX_MC_API)
56
#include <maca_bfloat16.h>
67
#include <maca_fp16.h>
78
#else
9+
#include <hpcc_bfloat16.h>
10+
#include <hpcc_fp16.h>
11+
#endif
12+
#elif defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
813
#include <cuda_bf16.h>
914
#include <cuda_fp16.h>
1015
#endif

src/infiniop/ops/hardswish/cuda/kernel.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22
#define __HARDSWISH_CUDA_H__
33

44
#include <cmath>
5-
#if defined(__MACACC__)
5+
#if ENABLE_METAX_API
6+
#if defined(ENABLE_METAX_MC_API)
67
#include <maca_bfloat16.h>
78
#include <maca_fp16.h>
89
#else
10+
#include <hpcc_bfloat16.h>
11+
#include <hpcc_fp16.h>
12+
#endif
13+
#elif defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
914
#include <cuda_bf16.h>
1015
#include <cuda_fp16.h>
1116
#endif

src/infiniop/ops/hardtanh/cuda/kernel.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#ifndef __HARDTANH_CUDA_H__
22
#define __HARDTANH_CUDA_H__
33

4-
#if defined(__MACACC__)
4+
#if ENABLE_METAX_API
5+
#if defined(ENABLE_METAX_MC_API)
56
#include <maca_bfloat16.h>
67
#include <maca_fp16.h>
78
#else
9+
#include <hpcc_bfloat16.h>
10+
#include <hpcc_fp16.h>
11+
#endif
12+
#elif defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API)
813
#include <cuda_bf16.h>
914
#include <cuda_fp16.h>
1015
#endif

0 commit comments

Comments
 (0)