@@ -19,65 +19,126 @@ jobs:
1919 - name : nvidia-h100
2020 runner : cern-nextgen-h100
2121 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22- ca_args : --gpuType CUDA # --RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-h100.par
2322 - name : nvidia-l40s
2423 runner : cern-nextgen-l40s
2524 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26- ca_args : --gpuType CUDA # --RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-l40s.par
2725 - name : amd-mi300x
2826 runner : cern-nextgen-mi300x
2927 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
30- ca_args : --gpuType HIP
3128 - name : amd-w7900
3229 runner : cern-nextgen-w7900
3330 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
34- ca_args : --gpuType HIP --RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-w7900.par
31+
32+ env :
33+ WORK_DIR : /cvmfs/alice.cern.ch
34+ ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
35+ MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
36+ STANDALONE_DIR : /root/standalone
37+ BENCHMARK_CSV : ${{ matrix.name }}.csv
38+ PROFILER_CSV : results_${{ matrix.name }}.csv
39+ TIMING_CA : ./ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
40+ LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
3541
3642 name : ${{ matrix.name }}
3743 steps :
3844 - name : Checkout Repository
39- uses : actions/checkout@v4
45+ uses : actions/checkout@v6
4046
41- - name : Build and Run
47+ - name : Download Files
4248 run : |
4349 mkdir -p ${STANDALONE_DIR}
44- . ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/ninja-fortran/fortran-v1.11.1.g9-3/etc/profile.d/init.sh
45- . ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
46-
47- cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=GPU -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
48- cd ${BUILD_DIR}
49- make install -j8
5050
51- cd ${STANDALONE_DIR}
52- mkdir -p ${STANDALONE_DIR}/genGPUArch
53- curl -v -o ${STANDALONE_DIR}/genGPUArch/${{ matrix.name }}.par https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/genGPUArch/${{ matrix.name }}.par
51+ curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
5452
5553 mkdir -p ${STANDALONE_DIR}/events
54+ curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
55+ tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
5656
57- curl -v -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
57+ curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
5858 tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
59- ${STANDALONE_DIR}/ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 1 ${{ matrix.ca_args }} > ${ARTIFACT_FILE}
6059
61- curl -v -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
62- tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
63- ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 6 ${{ matrix.ca_args }}
60+ - name : Build Deterministic
61+ run : &build |
62+ source /etc/profile.d/modules.sh
63+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
64+
65+ mkdir -p ${STANDALONE_DIR}
66+ cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
67+ cmake --build ${STANDALONE_DIR}/build --target install -j 8
68+ env :
69+ DETERMINISTIC_MODE : GPU
6470
65- curl -v -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
71+ - name : Test GPU Track Reconstruction
72+ run : |
73+ source /etc/profile.d/modules.sh
74+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
75+ cd ${STANDALONE_DIR}
76+ ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
6677 cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
67- rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
78+ rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
6879
69- rm -rf ${STANDALONE_DIR}/events
80+ - name : Build Non-Deterministic
81+ run : *build
7082 env :
71- WORK_DIR : /cvmfs/alice.cern.ch
72- ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
73- O2_REVISION : daily-20260217-0000-1
74- STANDALONE_DIR : /root/standalone
75- BUILD_DIR : /root/standalone/build
76- ARTIFACT_FILE : /root/artifact.txt
77- LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
83+ DETERMINISTIC_MODE : OFF
84+
85+ - name : Benchmark GPU Track Reconstruction
86+ run : |
87+ source /etc/profile.d/modules.sh
88+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
89+ cd ${STANDALONE_DIR}
90+ ${TIMING_CA} --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
91+ python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
92+
93+ - name : Profiler - Nsight Compute
94+ if : ${{ matrix.name == 'nvidia-h100' }}
95+ run : |
96+ dnf install -y cuda-nsight-compute-13-1
97+ source /etc/profile.d/modules.sh
98+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
99+ cd ${STANDALONE_DIR}
100+ ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 21 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
101+ ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
102+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
103+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 21 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
104+
105+ - name : Profiler - Nsight Systems
106+ if : ${{ matrix.name == 'nvidia-l40s' }}
107+ run : |
108+ dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
109+ dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
110+ source /etc/profile.d/modules.sh
111+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
112+ cd ${STANDALONE_DIR}
113+ nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
114+ nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
115+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
116+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
117+
118+ - name : Profiler - rocprofv2
119+ if : ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
120+ run : |
121+ source /etc/profile.d/modules.sh
122+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
123+ cd ${STANDALONE_DIR}
124+ rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
125+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
126+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
78127
79128 - name : Upload Artifact
80- uses : actions/upload-artifact@v4
129+ uses : actions/upload-artifact@v6
81130 with :
82131 name : ${{ matrix.name }}-artifact
83- path : /root/artifact.txt
132+ path : " /root/*.csv"
133+
134+ - name : Display table on GitHub web
135+ run : |
136+ source /etc/profile.d/modules.sh
137+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
138+ mkdir -p ${STANDALONE_DIR}/baseline
139+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
140+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
141+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
142+ echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
143+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
144+ rm -rf ${STANDALONE_DIR}/baseline
0 commit comments