Skip to content

Commit 5249282

Browse files
authored
Merge pull request #321 from mhoemmen/LWG4302
Implement LWG4302; fix #320
2 parents 3a63e9e + 0e215d4 commit 5249282

8 files changed

Lines changed: 42 additions & 195 deletions

File tree

examples/kokkos-based/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,5 @@ linalg_add_example(dotc_kokkos)
55
linalg_add_example(idx_abs_max_kokkos)
66
linalg_add_example(vector_norm2_kokkos)
77
linalg_add_example(vector_abs_sum_kokkos)
8-
linalg_add_example(vector_sum_of_squares_kokkos)
98
linalg_add_example(scale_kokkos)
109
linalg_add_example(matrix_vector_product_kokkos)

examples/kokkos-based/vector_sum_of_squares_kokkos.cpp

Lines changed: 0 additions & 52 deletions
This file was deleted.

include/experimental/__p1673_bits/blas1_vector_norm2.hpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_
1919
#define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_VECTOR_NORM2_HPP_
2020

21+
#include "abs_if_needed.hpp"
2122
#include "blas1_vector_sum_of_squares.hpp"
2223
#include <cmath>
2324
#include <cstdlib>
@@ -60,18 +61,26 @@ Scalar vector_two_norm(
6061
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> x,
6162
Scalar init)
6263
{
63-
// Initialize the sum of squares result
64-
sum_of_squares_result<Scalar> ssq_init;
65-
ssq_init.scaling_factor = Scalar{};
66-
// FIXME (Hoemmen 2021/05/27) We'll need separate versions of this
67-
// for types whose "one" we don't know how to construct.
68-
ssq_init.scaled_sum_of_squares = 1.0;
69-
70-
// Compute the sum of squares using an algorithm that avoids
71-
// underflow and overflow by scaling.
72-
auto ssq_res = vector_sum_of_squares(exec, x, ssq_init);
7364
using std::sqrt;
74-
return init + ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares);
65+
66+
if constexpr (std::is_floating_point_v<Scalar> && std::is_floating_point_v<typename decltype(x)::value_type>) {
67+
// Initialize the sum of squares result
68+
detail::sum_of_squares_result<Scalar> ssq_init;
69+
ssq_init.scaling_factor = init == Scalar{} ? Scalar{} : impl::abs_if_needed(init);
70+
ssq_init.scaled_sum_of_squares = init == Scalar{} ? Scalar{} : Scalar{1.0};
71+
72+
// Compute the sum of squares using an algorithm that avoids
73+
// underflow and overflow by scaling.
74+
auto ssq_res = detail::vector_sum_of_squares(exec, x, ssq_init);
75+
return ssq_res.scaling_factor * sqrt(ssq_res.scaled_sum_of_squares);
76+
}
77+
else {
78+
Scalar result = impl::abs_if_needed(init) * impl::abs_if_needed(init);
79+
for (SizeType i = 0; i < x.extent(0); ++i) {
80+
result += impl::abs_if_needed(x(i)) * impl::abs_if_needed(x(i));
81+
}
82+
return sqrt(result);
83+
}
7584
}
7685

7786
template<class ExecutionPolicy,

include/experimental/__p1673_bits/blas1_vector_sum_of_squares.hpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
2626
inline namespace __p1673_version_0 {
2727
namespace linalg {
2828

29+
namespace detail {
30+
2931
// Scaled sum of squares of a vector's elements
3032
template<class Scalar>
3133
struct sum_of_squares_result {
@@ -53,19 +55,23 @@ struct is_custom_vector_sum_of_squares_avail<
5355
&& ! impl::is_inline_exec_v<Exec>
5456
>
5557
>
56-
: std::true_type{};
58+
: std::bool_constant<
59+
std::is_floating_point_v<typename x_t::value_type> &&
60+
std::is_floating_point_v<Scalar>
61+
>
62+
{};
5763

5864
} // end anonymous namespace
5965

6066
template<class ElementType,
61-
class SizeType,
67+
class IndexType,
6268
::std::size_t ext0,
6369
class Layout,
6470
class Accessor,
6571
class Scalar>
6672
sum_of_squares_result<Scalar> vector_sum_of_squares(
6773
impl::inline_exec_t&& /* exec */,
68-
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> x,
74+
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> x,
6975
sum_of_squares_result<Scalar> init)
7076
{
7177
using std::abs;
@@ -79,7 +85,7 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(
7985

8086
Scalar scale = init.scaling_factor;
8187
Scalar ssq = init.scaled_sum_of_squares;
82-
for (SizeType i = 0; i < x.extent(0); ++i) {
88+
for (IndexType i = 0; i < x.extent(0); ++i) {
8389
if (abs(x(i)) != 0.0) {
8490
const auto absxi = abs(x(i));
8591
if (scale < absxi) {
@@ -102,14 +108,14 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(
102108

103109
template<class ExecutionPolicy,
104110
class ElementType,
105-
class SizeType,
111+
class IndexType,
106112
::std::size_t ext0,
107113
class Layout,
108114
class Accessor,
109115
class Scalar>
110116
sum_of_squares_result<Scalar> vector_sum_of_squares(
111117
ExecutionPolicy&& exec,
112-
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> v,
118+
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> v,
113119
sum_of_squares_result<Scalar> init)
114120
{
115121
constexpr bool use_custom = is_custom_vector_sum_of_squares_avail<
@@ -125,18 +131,19 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(
125131
}
126132

127133
template<class ElementType,
128-
class SizeType,
134+
class IndexType,
129135
::std::size_t ext0,
130136
class Layout,
131137
class Accessor,
132138
class Scalar>
133139
sum_of_squares_result<Scalar> vector_sum_of_squares(
134-
mdspan<ElementType, extents<SizeType, ext0>, Layout, Accessor> v,
140+
mdspan<ElementType, extents<IndexType, ext0>, Layout, Accessor> v,
135141
sum_of_squares_result<Scalar> init)
136142
{
137143
return vector_sum_of_squares(impl::default_exec_t{}, v, init);
138144
}
139145

146+
} // end namespace detail
140147

141148
} // end namespace linalg
142149
} // end inline namespace __p1673_version_0

tests/kokkos-based/CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ linalg_add_test_kokkos(
8080
linalg_add_test_kokkos(
8181
vector_norm2_kokkos
8282
"vector_norm2: kokkos impl")
83-
linalg_add_test_kokkos(
84-
vector_sum_of_squares_kokkos
85-
"vector_sum_of_squares: kokkos impl")
8683

8784
linalg_add_test_kokkos(
8885
vector_abs_sum_kokkos

tests/kokkos-based/vector_sum_of_squares_kokkos.cpp

Lines changed: 0 additions & 111 deletions
This file was deleted.

tests/native/norm2.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@ namespace {
3131
std::vector<scalar_t> storage(vectorSize);
3232
vector_t x(storage.data(), vectorSize);
3333

34-
// Testing for absolute equality
3534
const auto normResult = vector_two_norm(x, mag_t{});
3635
static_assert( std::is_same_v<std::remove_const_t<decltype(normResult)>, mag_t> );
3736
const mag_t expectedNormResult{};
3837
EXPECT_EQ( expectedNormResult, normResult );
3938

40-
// Make sure that init always gets added to the result.
41-
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0));
42-
EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne );
39+
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(3.0));
40+
const mag_t expectedNormResultPlusOne = mag_t(3.0);
41+
EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne );
4342

4443
// Test 'auto' overload.
4544
const auto normResultAuto = vector_two_norm(x);
@@ -63,15 +62,14 @@ namespace {
6362

6463
x[0] = -3;
6564

66-
// Testing for absolute equality
6765
const auto normResult = vector_two_norm(x, mag_t{});
6866
static_assert( std::is_same_v<std::remove_const_t<decltype(normResult)>, mag_t> );
6967
const mag_t expectedNormResult = abs( x[0] );
7068
EXPECT_EQ( expectedNormResult, normResult );
7169

72-
// Make sure that init always gets added to the result.
73-
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(1.0));
74-
EXPECT_EQ( expectedNormResult + mag_t(1.0), normResultPlusOne );
70+
const mag_t normResultPlusOne = vector_two_norm(x, mag_t(4.0));
71+
const mag_t expectedNormResultPlusOne = mag_t(5.0);
72+
EXPECT_EQ( expectedNormResultPlusOne, normResultPlusOne );
7573

7674
// Test 'auto' overload.
7775
const auto normResultAuto = vector_two_norm(x);

tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
#include "signal_kokkos_impl_called.hpp"
66

7-
namespace KokkosKernelsSTD {
7+
namespace KokkosKernelsSTD::detail {
88

99
template<class ExecSpace,
1010
class ElementType,

0 commit comments

Comments
 (0)