1717 name : [cpu, nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1818 include :
1919 - name : cpu
20- runner : cern-nextgen-h100
20+ runner : cern-nextgen-mi300x
2121 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=0
2222 profiler_runs : 42
2323 standalone_runs : 42
5353 STANDALONE_DIR : /root/standalone
5454 BENCHMARK_CSV : standalone_${{ matrix.name }}.csv
5555 PROFILER_CSV : profiler_${{ matrix.name }}.csv
56- TIMING_CA : ./ca -e 50kHz ${{ matrix.cpu_gpu }} --seed 0 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
56+ TIMING_CA : ./ca -e 50kHz ${{ matrix.cpu_gpu }} --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 --debug 1 # Add --runs 42 for benchmark runs
5757 LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
5858
5959 name : ${{ matrix.name }}
@@ -104,7 +104,7 @@ jobs:
104104 source /etc/profile.d/modules.sh
105105 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
106106 cd ${STANDALONE_DIR}
107- ${TIMING_CA} --debug 1 -- runs ${{ matrix.standalone_runs }} --runsInit 0 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
107+ ${TIMING_CA} --runs ${{ matrix.standalone_runs }} --PROCdebugCSV /root/${BENCHMARK_CSV}
108108 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_standalone.py --discard 0 --input /root/${BENCHMARK_CSV} --output /root/summary_${BENCHMARK_CSV}
109109
110110 - name : Profiler - Nsight Compute
@@ -114,7 +114,7 @@ jobs:
114114 source /etc/profile.d/modules.sh
115115 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
116116 cd ${STANDALONE_DIR}
117- ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs ${{ matrix.profiler_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
117+ ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.ncu-rep
118118 ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
119119 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
120120 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
@@ -127,7 +127,7 @@ jobs:
127127 source /etc/profile.d/modules.sh
128128 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
129129 cd ${STANDALONE_DIR}
130- nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.profiler_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
130+ nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.nsys-rep
131131 nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
132132 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
133133 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
@@ -138,7 +138,7 @@ jobs:
138138 source /etc/profile.d/modules.sh
139139 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
140140 cd ${STANDALONE_DIR}
141- rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.standalone_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv
141+ rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.standalone_runs }} # Generates results_${{ matrix.name }}.csv
142142 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
143143 mv /root/results_${{ matrix.name }}.csv /root/${PROFILER_CSV}
144144 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
0 commit comments