Skip to content

Commit 6d78e22

Browse files
committed
Add complex-norm benchmark
ChangeLog: * benchtests/Makefile: * benchtests/complex-norm.cpp: New file.
1 parent 7eb6a7a commit 6d78e22

2 files changed

Lines changed: 95 additions & 2 deletions

File tree

benchtests/Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
benchmarks=$(patsubst %.cpp,%,$(wildcard *.cpp))
66
archs=native ivybridge westmere x86-64
7-
CXXFLAGS=-g0 -O2 -std=gnu++26 -Wall -Wextra -Wno-psabi -fmax-errors=2 -DVIR_NEXT_PATCH -DVIR_EXTENSIONS
7+
CXXFLAGS=-g0 -O2 -std=gnu++26 -Wall -Wextra -Wno-psabi -fmax-errors=2 -D VIR_PATCH_MATH -DVIR_EXTENSIONS
88

99
fastmath=-ffast-math
1010
default=
11+
improve_cx1=-DVIR_PATCH_IMPROVE_CX=1
12+
improve_cx2=-DVIR_PATCH_IMPROVE_CX=2
13+
improve_cx3=-DVIR_PATCH_IMPROVE_CX=3
1114

12-
variants=default fastmath
15+
variants=default fastmath improve_cx1 improve_cx2 improve_cx3
1316

1417
all: all-targets
1518

benchtests/complex-norm.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/* SPDX-License-Identifier: GPL-3.0-or-later */
2+
/* Copyright © 2019–2026 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
3+
* Matthias Kretz <m.kretz@gsi.de>
4+
*/
5+
6+
#include "bench.h"
7+
#include <climits>
8+
#include <complex>
9+
10+
template <int Special>
11+
struct Benchmark<Special>
12+
{
13+
template <typename T>
14+
static constexpr bool accept
15+
= complex_like<T>
16+
or (simd_vec_type<T> and requires { requires T::abi_type::_S_is_cx_ileav; });
17+
18+
static constexpr Info<2> info = {"Latency", "Throughput"};
19+
20+
template <simd_vec_type T>
21+
[[gnu::always_inline]]
22+
static void
23+
d_from_r(T& d, simd_vec_type auto& r)
24+
{
25+
vir::fake_modify(r);
26+
d = std::bit_cast<T>(cat(r, r));
27+
vir::fake_modify(d);
28+
}
29+
30+
template <complex_like T>
31+
[[gnu::always_inline]]
32+
static void
33+
d_from_r(T& d, typename T::value_type& r)
34+
{
35+
vir::fake_modify(r);
36+
d = {r, r};
37+
vir::fake_modify(d);
38+
}
39+
40+
41+
template <class T>
42+
[[gnu::flatten]]
43+
static Times<info.size()>
44+
run()
45+
{
46+
using TT = value_type_t<T>;
47+
using TTT = typename TT::value_type;
48+
49+
constexpr TT init = TT(1, 0);
50+
51+
T zerov = T();
52+
T b = zerov + TT(TTT(0), TTT(1));
53+
vir::fake_modify(zerov, b);
54+
55+
T data[6];
56+
for (T& a : data)
57+
{
58+
a = zerov + init;
59+
vir::fake_modify(a);
60+
}
61+
62+
return {
63+
0.25 * time_median([&] [[gnu::always_inline]] {
64+
auto d = b;
65+
using std::norm;
66+
auto r = norm(d); d_from_r(d, r);
67+
r = norm(d); d_from_r(d, r);
68+
r = norm(d); d_from_r(d, r);
69+
r = norm(d); d_from_r(b, r);
70+
}),
71+
1./6. * time_median([&] [[gnu::always_inline]] {
72+
auto d0 = data[0]; vir::fake_modify(d0); auto r0 = std::norm(d0);
73+
auto d1 = data[1]; vir::fake_modify(d1); auto r1 = std::norm(d1);
74+
auto d2 = data[2]; vir::fake_modify(d2); auto r2 = std::norm(d2);
75+
auto d3 = data[3]; vir::fake_modify(d3); auto r3 = std::norm(d3);
76+
auto d4 = data[4]; vir::fake_modify(d4); auto r4 = std::norm(d4);
77+
auto d5 = data[5]; vir::fake_modify(d5); auto r5 = std::norm(d5);
78+
vir::fake_read(r0, r1, r2, r3, r4, r5);
79+
})
80+
};
81+
}
82+
};
83+
84+
void
85+
bench_main()
86+
{
87+
//bench_all<std::complex<std::float16_t>>();
88+
bench_all<std::complex<float>>();
89+
bench_all<std::complex<double>>();
90+
}

0 commit comments

Comments
 (0)