Skip to content

Commit 17a9d8e

Browse files
metax666duqimengroot1184319564
authored
[Metax] Dnn for debug (PaddlePaddle#2383)
* [Metax] support arm (metax666#221) * [Metax] lock_runtime.c * [Metax] support arm --------- Co-authored-by: root <root@lt-wks-10-0-180-15.pub.metax-tech.com> * [Metax] Dnn for debug (metax666#220) * [Metax] mccl * dnn * dnn * dnn * dnn:patch * dnn:patch * dnn:patch * Move some files to staging * ignore test --------- Co-authored-by: duqimeng <1640472053@qq.com> --------- Co-authored-by: duqimeng <77875733+duqimeng@users.noreply.github.com> Co-authored-by: root <root@lt-wks-10-0-180-15.pub.metax-tech.com> Co-authored-by: ZhouDuan <136539532+1184319564@users.noreply.github.com> Co-authored-by: duqimeng <1640472053@qq.com>
1 parent 030e146 commit 17a9d8e

63 files changed

Lines changed: 4452 additions & 11019 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

backends/metax_gpu/CMakeLists.txt

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,20 @@ set(PYTHON_VERSION ${PY_VERSION})
2525
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
2626
message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}")
2727
set(WITH_MKLML ON)
28-
28+
if(WITH_ARM)
29+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
30+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
31+
set(WITH_XBYAK
32+
OFF
33+
CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE)
34+
set(WITH_MKL
35+
OFF
36+
CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE)
37+
set(WITH_AVX
38+
OFF
39+
CACHE STRING "Disable AVX when compiling WITH_ARM=ON." FORCE)
40+
add_definitions(-DPADDLE_WITH_ARM)
41+
endif()
2942
include(paddle)
3043
set(THIRD_PARTY_PATH
3144
"${PADDLE_SOURCE_DIR}/build/third_party"
@@ -129,6 +142,24 @@ file(
129142
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_recv_kernel.cu
130143
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_concat_grad_kernel.cu
131144
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_allgather_kernel.cu
145+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/softmax_grad_kernel.cu
146+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/softmax_kernel.cu
147+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
148+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/batch_norm_kernel.cu
149+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu
150+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy_kernel.cu
151+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cudnn_lstm_grad_kernel.cu
152+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cudnn_lstm_kernel.cu
153+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu
154+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu
155+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/conv_kernel.cu
156+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/conv_grad_kernel.cu
157+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu
158+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/grid_sample_kernel.cu
159+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu
160+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/instance_norm_kernel.cu
161+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
162+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
132163
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ctc_align_kernel.cu
133164
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/yolo_box_head_kernel.cu
134165
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stft_grad_kernel.cu
@@ -799,6 +830,7 @@ target_compile_definitions(
799830
PADDLE_WITH_CUSTOM_DEVICE=1
800831
mcblasContext=cublasContext
801832
cublasLtContext=mcblasLtContext
833+
cudnnContext==mcdnnContex
802834
GPUContext=CustomContext
803835
KPSContext=CustomContext
804836
STREAM_TYPE=cudaStream_t

backends/metax_gpu/compile.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,20 @@ fi
3434

3535
echo "make_maca"
3636
cd build
37-
cmake_maca .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="80"
38-
make_maca -j18 VERBOSE=1
37+
arch=$(uname -m)
38+
echo ${arch}
39+
if [ "${arch}" = "x86_64" ]; then
40+
echo 系统架构是:${arch}
41+
cmake_maca .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="80"
42+
make_maca -j18 VERBOSE=1
43+
elif [ "${arch}" = "aarch64" ] || [ "${arch}" = "arm64" ]; then
44+
echo "arm64"
45+
cmake_maca .. -DWITH_ARM=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="80"
46+
make_maca TARGET=ARMV8 -j18 VERBOSE=1
47+
else
48+
echo "unknown"
49+
exit 1
50+
fi
3951

4052

4153
echo "install whl"
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/conv_grad_kernel.h"
17+
18+
PD_CUSTOM_KERNEL_REGISTER(
19+
conv2d_grad, metax_gpu, ALL_LAYOUT, phi::ConvGradKernel, float, double) {}
20+
21+
PD_CUSTOM_KERNEL_REGISTER(
22+
conv3d_grad, metax_gpu, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {}
23+
24+
PD_CUSTOM_KERNEL_REGISTER(conv2d_double_grad,
25+
metax_gpu,
26+
ALL_LAYOUT,
27+
phi::ConvGradGradKernel,
28+
float,
29+
double) {}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/conv_kernel.h"
17+
PD_CUSTOM_KERNEL_REGISTER(
18+
conv2d, metax_gpu, ALL_LAYOUT, phi::ConvKernel, float, double) {}
19+
20+
PD_CUSTOM_KERNEL_REGISTER(
21+
conv3d, metax_gpu, ALL_LAYOUT, phi::Conv3DKernel, float, double) {}

backends/metax_gpu/kernels/cuda_kernels/cross_entropy_bwd_w_downcast.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ limitations under the License. */
2222
namespace cub = hipcub;
2323
#endif
2424

25-
#include "kernels/gpudnn/softmax_gpudnn.h"
2625
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2726
#include "paddle/phi/backends/gpu/gpu_dnn.h"
2827
#include "paddle/phi/common/amp_type_traits.h"
@@ -33,6 +32,7 @@ namespace cub = hipcub;
3332
#include "paddle/phi/kernels/funcs/for_range.h"
3433
#include "paddle/phi/kernels/funcs/math_function.h"
3534
#include "paddle/phi/kernels/funcs/softmax.h"
35+
#include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
3636

3737
namespace phi {
3838

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/cudnn_lstm_grad_kernel.h"
17+
#include "paddle/phi/kernels/gpu/cudnn_lstm_utils.h"
18+
19+
#ifdef PADDLE_WITH_HIP
20+
PD_CUSTOM_KERNEL_REGISTER(
21+
cudnn_lstm_grad, metax_gpu, ALL_LAYOUT, phi::CudnnLSTMGradKernel, float) {}
22+
#else
23+
PD_CUSTOM_KERNEL_REGISTER(cudnn_lstm_grad,
24+
metax_gpu,
25+
ALL_LAYOUT,
26+
phi::CudnnLSTMGradKernel,
27+
float,
28+
double) {}
29+
#endif
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "glog/logging.h"
16+
#include "paddle/phi/core/kernel_registry.h"
17+
#include "paddle/phi/kernels/cudnn_lstm_kernel.h"
18+
#include "paddle/phi/kernels/gpu/cudnn_lstm_utils.h"
19+
20+
#ifdef PADDLE_WITH_HIP
21+
PD_CUSTOM_KERNEL_REGISTER(
22+
cudnn_lstm, metax_gpu, ALL_LAYOUT, phi::CudnnLSTMKernel, float) {
23+
kernel->InputAt(5).SetDataType(phi::DataType::INT32);
24+
kernel->OutputAt(3).SetDataType(phi::DataType::UINT8);
25+
kernel->OutputAt(4).SetDataType(phi::DataType::UINT8);
26+
}
27+
#else
28+
PD_CUSTOM_KERNEL_REGISTER(
29+
cudnn_lstm, metax_gpu, ALL_LAYOUT, phi::CudnnLSTMKernel, float, double) {
30+
kernel->InputAt(5).SetDataType(phi::DataType::INT32);
31+
kernel->OutputAt(3).SetDataType(phi::DataType::UINT8);
32+
kernel->OutputAt(4).SetDataType(phi::DataType::UINT8);
33+
}
34+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/conv_grad_kernel.h"
17+
18+
PD_CUSTOM_KERNEL_REGISTER(depthwise_conv2d_grad,
19+
metax_gpu,
20+
ALL_LAYOUT,
21+
phi::DepthwiseConvGradKernel,
22+
float,
23+
double,
24+
phi::dtype::float16,
25+
phi::dtype::bfloat16) {}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/conv_kernel.h"
17+
18+
PD_CUSTOM_KERNEL_REGISTER(depthwise_conv2d,
19+
metax_gpu,
20+
ALL_LAYOUT,
21+
phi::DepthwiseConvKernel,
22+
float,
23+
double,
24+
phi::dtype::float16,
25+
phi::dtype::bfloat16) {}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/core/kernel_registry.h"
16+
#include "paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu" // NOLINT
17+
18+
PD_CUSTOM_KERNEL_REGISTER(depthwise_conv2d_transpose_grad,
19+
metax_gpu,
20+
ALL_LAYOUT,
21+
phi::DepthwiseConv2dTransposeGradKernel,
22+
float,
23+
double) {}

0 commit comments

Comments
 (0)