Skip to content

Commit 2e22986

Browse files
Store benchmark results as CI artifacts
1 parent fa950ad commit 2e22986

6 files changed

Lines changed: 270 additions & 32 deletions

File tree

.github/scripts/csv_to_md.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import argparse
2+
import csv
3+
import tabulate as tab
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-b', '--baseline', required=True, help='Baseline CSV file')
7+
parser.add_argument('-c', '--current', required=True, help='Current CSV file')
8+
args = parser.parse_args()
9+
10+
def get_2d_list(csv_filename):
11+
with open(csv_filename) as csv_file:
12+
csv_reader = csv.reader(csv_file)
13+
next(csv_reader)
14+
return [[str(name), float(mean), float(stdev)] for name, mean, stdev in csv_reader]
15+
16+
table_baseline = get_2d_list(args.baseline)
17+
table_current = get_2d_list(args.current)
18+
19+
def get_emoji(d, stdev):
20+
z = 1.96 # 95% confidence interval
21+
if d < -z * stdev:
22+
return ':green_circle:'
23+
elif d > z * stdev:
24+
return ':red_circle:'
25+
else:
26+
return ':white_circle:'
27+
28+
table = []
29+
for baseline, current in zip(table_baseline, table_current):
30+
baseline_name, baseline_mean, _ = baseline
31+
name, mean, stdev = current
32+
assert(baseline_name == name)
33+
diff = baseline_mean - mean
34+
impact = 0.0 if stdev == 0.0 else diff / stdev
35+
emoji = get_emoji(diff, stdev)
36+
table.append([name, int(mean), f'{stdev:.2f}', int(diff), f'{impact:.2f}', emoji])
37+
38+
header = ['name', 'mean (\u03BCs)', 'stdev \u03C3', 'diff \u0394', '\u0394 / \u03C3', '']
39+
print(tab.tabulate(table, header, tablefmt="github"))

.github/scripts/merge_runs.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-d', '--discard', type=int, default=0, help='Number of initial measurements to discard')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
time_dict = dict({})
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
for row in csv_reader:
16+
name = row[2]
17+
time = float(row[3])
18+
if name in time_dict.keys():
19+
time_dict[name].append(time)
20+
else:
21+
time_dict[name] = [time]
22+
23+
data = [["name", "time", "stdev"]]
24+
for name, time_list in time_dict.items():
25+
mean = int(statistics.mean(time_list[args.discard:]))
26+
runs = len(time_list[args.discard:])
27+
stdev = 0.0 if runs == 1 else statistics.stdev(time_list[args.discard:])
28+
data.append([name, mean, stdev])
29+
30+
with open(args.output, 'w') as csv_file:
31+
csv_writer = csv.writer(csv_file)
32+
csv_writer.writerows(data)

.github/scripts/profiler_ncu.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-r', '--runs', type=int, required=True, help='Number of runs')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
kernel_dict = {}
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
for row in csv_reader:
16+
full_name = row[4]
17+
time = int(row[14]) / 1000.0
18+
if len(full_name) > 5 and full_name[:5] == "krnl_":
19+
name = full_name[5:]
20+
if name in kernel_dict.keys():
21+
kernel_dict[name].append(time)
22+
else:
23+
kernel_dict[name] = [time]
24+
25+
data = [["name", "time", "stdev"]]
26+
for name, time_list in kernel_dict.items():
27+
count = len(time_list) // args.runs
28+
mean = statistics.mean(time_list) * count
29+
stdev = 0 if args.runs == 1 else statistics.stdev(time_list) * count
30+
data.append([name, mean, stdev])
31+
32+
with open(args.output, 'w') as csv_file:
33+
csv_writer = csv.writer(csv_file)
34+
csv_writer.writerows(data)

.github/scripts/profiler_nsys.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-r', '--runs', type=int, required=True, help='Number of runs')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
ntsi_list = []
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
next(csv_reader)
16+
next(csv_reader)
17+
for row in csv_reader:
18+
if row:
19+
full_name = row[8]
20+
instances = int(row[2])
21+
time = float(row[3])
22+
sigma = float(row[7])
23+
if len(full_name) > 5 and full_name[:5] == "krnl_":
24+
name = full_name[5:]
25+
ntsi_list.append([name, time, sigma, instances])
26+
27+
ntsi_list.sort(key = lambda row: row[0])
28+
29+
data = [["name", "time", "stdev"]]
30+
for name, time, sigma, instances in ntsi_list:
31+
count = instances / args.runs
32+
mean = int(time * count)
33+
stdev = sigma * count
34+
data.append([name, mean, stdev])
35+
36+
with open(args.output, 'w') as csv_file:
37+
csv_writer = csv.writer(csv_file)
38+
csv_writer.writerows(data)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-r', '--runs', type=int, required=True, help='Number of runs')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
time_dict = dict({})
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
for row in csv_reader:
16+
full_name = row[13]
17+
time = (int(row[15]) - int(row[14])) / 1000.0
18+
if len(full_name) > 5 and full_name[:5] == "krnl_":
19+
name = full_name[5:-3]
20+
if name in time_dict.keys():
21+
time_dict[name].append(time)
22+
else:
23+
time_dict[name] = [time]
24+
25+
data = [["name", "time", "stdev"]]
26+
for name, time_list in time_dict.items():
27+
count = len(time_list) / args.runs
28+
mean = int(statistics.mean(time_list) * count)
29+
stdev = 0 if args.runs == 1 else statistics.stdev(time_list) * count
30+
data.append([name, mean, stdev])
31+
32+
with open(args.output, 'w') as csv_file:
33+
csv_writer = csv.writer(csv_file)
34+
csv_writer.writerows(data)

.github/workflows/standalone-benchmark.yml

Lines changed: 93 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,65 +19,126 @@ jobs:
1919
- name: nvidia-h100
2020
runner: cern-nextgen-h100
2121
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22-
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-h100.par
2322
- name: nvidia-l40s
2423
runner: cern-nextgen-l40s
2524
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26-
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-l40s.par
2725
- name: amd-mi300x
2826
runner: cern-nextgen-mi300x
2927
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
30-
ca_args: --gpuType HIP
3128
- name: amd-w7900
3229
runner: cern-nextgen-w7900
3330
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
34-
ca_args: --gpuType HIP --RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-w7900.par
31+
32+
env:
33+
WORK_DIR: /cvmfs/alice.cern.ch
34+
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
35+
MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
36+
STANDALONE_DIR: /root/standalone
37+
BENCHMARK_CSV: ${{ matrix.name }}.csv
38+
PROFILER_CSV: results_${{ matrix.name }}.csv
39+
TIMING_CA: ./ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
40+
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
3541

3642
name: ${{ matrix.name }}
3743
steps:
3844
- name: Checkout Repository
39-
uses: actions/checkout@v4
45+
uses: actions/checkout@v6
4046

41-
- name: Build and Run
47+
- name: Download Files
4248
run: |
4349
mkdir -p ${STANDALONE_DIR}
44-
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/ninja-fortran/fortran-v1.11.1.g9-3/etc/profile.d/init.sh
45-
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
46-
47-
cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=GPU -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
48-
cd ${BUILD_DIR}
49-
make install -j8
5050
51-
cd ${STANDALONE_DIR}
52-
mkdir -p ${STANDALONE_DIR}/genGPUArch
53-
curl -v -o ${STANDALONE_DIR}/genGPUArch/${{ matrix.name }}.par https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/genGPUArch/${{ matrix.name }}.par
51+
curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
5452
5553
mkdir -p ${STANDALONE_DIR}/events
54+
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
55+
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
5656
57-
curl -v -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
57+
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
5858
tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
59-
${STANDALONE_DIR}/ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 1 ${{ matrix.ca_args }} > ${ARTIFACT_FILE}
6059
61-
curl -v -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
62-
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
63-
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 6 ${{ matrix.ca_args }}
60+
- name: Build Deterministic
61+
run: &build |
62+
source /etc/profile.d/modules.sh
63+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
64+
65+
mkdir -p ${STANDALONE_DIR}
66+
cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
67+
cmake --build ${STANDALONE_DIR}/build --target install -j 8
68+
env:
69+
DETERMINISTIC_MODE: GPU
6470

65-
curl -v -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
71+
- name: Test GPU Track Reconstruction
72+
run: |
73+
source /etc/profile.d/modules.sh
74+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
75+
cd ${STANDALONE_DIR}
76+
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
6677
cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
67-
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
78+
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
6879
69-
rm -rf ${STANDALONE_DIR}/events
80+
- name: Build Non-Deterministic
81+
run: *build
7082
env:
71-
WORK_DIR: /cvmfs/alice.cern.ch
72-
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
73-
O2_REVISION: daily-20260217-0000-1
74-
STANDALONE_DIR: /root/standalone
75-
BUILD_DIR: /root/standalone/build
76-
ARTIFACT_FILE: /root/artifact.txt
77-
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
83+
DETERMINISTIC_MODE: OFF
84+
85+
- name: Benchmark GPU Track Reconstruction
86+
run: |
87+
source /etc/profile.d/modules.sh
88+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
89+
cd ${STANDALONE_DIR}
90+
${TIMING_CA} --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
91+
python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
92+
93+
- name: Profiler - Nsight Compute
94+
if: ${{ matrix.name == 'nvidia-h100' }}
95+
run: |
96+
dnf install -y cuda-nsight-compute-13-1
97+
source /etc/profile.d/modules.sh
98+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
99+
cd ${STANDALONE_DIR}
100+
ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 21 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
101+
ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
102+
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
103+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 21 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
104+
105+
- name: Profiler - Nsight Systems
106+
if: ${{ matrix.name == 'nvidia-l40s' }}
107+
run: |
108+
dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
109+
dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
110+
source /etc/profile.d/modules.sh
111+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
112+
cd ${STANDALONE_DIR}
113+
nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
114+
nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
115+
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
116+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
117+
118+
- name: Profiler - rocprofv2
119+
if: ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
120+
run: |
121+
source /etc/profile.d/modules.sh
122+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
123+
cd ${STANDALONE_DIR}
124+
rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
125+
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
126+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
78127
79128
- name: Upload Artifact
80-
uses: actions/upload-artifact@v4
129+
uses: actions/upload-artifact@v6
81130
with:
82131
name: ${{ matrix.name }}-artifact
83-
path: /root/artifact.txt
132+
path: "/root/*.csv"
133+
134+
- name: Display table on GitHub web
135+
run: |
136+
source /etc/profile.d/modules.sh
137+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
138+
mkdir -p ${STANDALONE_DIR}/baseline
139+
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
140+
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
141+
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
142+
echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
143+
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
144+
rm -rf ${STANDALONE_DIR}/baseline

0 commit comments

Comments
 (0)