|
52 | 52 | #include <cstddef> |
53 | 53 | #include <type_traits> |
54 | 54 |
|
| 55 | +#include "exec_policy_wrapper_hpx.hpp" |
55 | 56 | #include "signal_hpx_impl_called.hpp" |
56 | 57 |
|
57 | 58 | namespace HPXKernelsSTD { |
@@ -84,49 +85,48 @@ void add_rank_1(ExPolicy&& policy, |
84 | 85 | y.static_extent(0) == std::experimental::dynamic_extent || |
85 | 86 | x.static_extent(0) == y.static_extent(0)); |
86 | 87 |
|
87 | | -#if defined(HPX_HAVE_DATAPAR) |
88 | | - using mdspan_x_t = std::experimental::mdspan<ElementType_x, |
89 | | - std::experimental::extents<SizeType_x, ext_x>, Layout_x, Accessor_x>; |
90 | | - using mdspan_y_t = std::experimental::mdspan<ElementType_y, |
91 | | - std::experimental::extents<SizeType_y, ext_y>, Layout_y, Accessor_y>; |
92 | | - using mdspan_z_t = std::experimental::mdspan<ElementType_z, |
93 | | - std::experimental::extents<SizeType_z, ext_z>, Layout_z, Accessor_z>; |
94 | | - |
95 | | - constexpr bool allow_explicit_vectorization = |
96 | | - std::is_arithmetic_v<ElementType_x> && |
97 | | - std::is_arithmetic_v<ElementType_y> && |
98 | | - std::is_arithmetic_v<ElementType_z> && |
99 | | - mdspan_x_t::is_always_contiguous() && |
100 | | - mdspan_y_t::is_always_contiguous() && |
101 | | - mdspan_z_t::is_always_contiguous() && |
102 | | - (hpx::is_vectorpack_execution_policy_v<ExPolicy> || |
103 | | - hpx::is_unsequenced_execution_policy_v<ExPolicy>); |
104 | | - |
105 | | - if constexpr (allow_explicit_vectorization) |
| 88 | + if constexpr (supports_vectorization_v<ExPolicy>) |
106 | 89 | { |
107 | | - // vectorize only if the arrays are contiguous and not strided |
108 | | - if (x.is_contiguous() && x.stride(0) == 1 && y.is_contiguous() && |
109 | | - y.stride(0) == 1 && z.is_contiguous() && z.stride(0) == 1) |
| 90 | + using mdspan_x_t = std::experimental::mdspan<ElementType_x, |
| 91 | + std::experimental::extents<SizeType_x, ext_x>, Layout_x, |
| 92 | + Accessor_x>; |
| 93 | + using mdspan_y_t = std::experimental::mdspan<ElementType_y, |
| 94 | + std::experimental::extents<SizeType_y, ext_y>, Layout_y, |
| 95 | + Accessor_y>; |
| 96 | + using mdspan_z_t = std::experimental::mdspan<ElementType_z, |
| 97 | + std::experimental::extents<SizeType_z, ext_z>, Layout_z, |
| 98 | + Accessor_z>; |
| 99 | + |
| 100 | + if constexpr (allow_vectorization_v<mdspan_x_t> && |
| 101 | + allow_vectorization_v<mdspan_y_t> && |
| 102 | + allow_vectorization_v<mdspan_z_t>) |
110 | 103 | { |
111 | | - auto zip = hpx::util::make_zip_iterator(x.data(), y.data()); |
112 | | - hpx::transform(policy, zip, zip + x.extent(0), z.data(), |
113 | | - [&](auto v) { return hpx::get<0>(v) + hpx::get<1>(v); }); |
| 104 | + // vectorize only if the arrays are contiguous and not strided |
| 105 | + if (x.is_contiguous() && x.stride(0) == 1 && y.is_contiguous() && |
| 106 | + y.stride(0) == 1 && z.is_contiguous() && z.stride(0) == 1) |
| 107 | + { |
| 108 | + auto zip = hpx::util::make_zip_iterator(x.data(), y.data()); |
| 109 | + hpx::transform(policy, zip, zip + x.extent(0), z.data(), |
| 110 | + [&](auto v) { return hpx::get<0>(v) + hpx::get<1>(v); }); |
| 111 | + } |
| 112 | + else |
| 113 | + { |
| 114 | + // fall back to the underlying base policy |
| 115 | + hpx::experimental::for_loop( |
| 116 | + hpx::execution::experimental::to_non_simd(policy), |
| 117 | + SizeType_z(0), x.extent(0), |
| 118 | + [&](auto i) { z(i) = x(i) + y(i); }); |
| 119 | + } |
114 | 120 | } |
115 | 121 | else |
116 | 122 | { |
117 | | - // fall back to the underlying base policy |
118 | | - hpx::experimental::for_loop(policy.base_policy(), SizeType_z(0), |
119 | | - x.extent(0), [&](auto i) { z(i) = x(i) + y(i); }); |
| 123 | + hpx::experimental::for_loop( |
| 124 | + hpx::execution::experimental::to_non_simd(policy), |
| 125 | + SizeType_z(0), z.extent(0), |
| 126 | + [&](auto i) { z(i) = x(i) + y(i); }); |
120 | 127 | } |
121 | 128 | } |
122 | 129 | else |
123 | | - if constexpr (hpx::is_vectorpack_execution_policy_v<ExPolicy>) |
124 | | - { |
125 | | - hpx::experimental::for_loop(policy.base_policy(), SizeType_z(0), |
126 | | - z.extent(0), [&](auto i) { z(i) = x(i) + y(i); }); |
127 | | - } |
128 | | - else |
129 | | -#endif |
130 | 130 | { |
131 | 131 | hpx::experimental::for_loop(policy, SizeType_z(0), z.extent(0), |
132 | 132 | [&](auto i) { z(i) = x(i) + y(i); }); |
|
0 commit comments