Skip to content

Commit db57de0

Browse files
1 parent 6acc20f commit db57de0

5 files changed

Lines changed: 107 additions & 24 deletions

File tree

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ add_benchmark(shuffle src/shuffle.cpp)
140140
add_benchmark(std_copy src/std_copy.cpp)
141141
add_benchmark(sv_equal src/sv_equal.cpp)
142142
add_benchmark(swap_ranges src/swap_ranges.cpp)
143+
add_benchmark(uninitialized_copy src/uninitialized_copy.cpp)
143144
add_benchmark(unique src/unique.cpp)
144145
add_benchmark(vector_bool_copy src/vector_bool_copy.cpp)
145146
add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#include <benchmark/benchmark.h>
5+
#include <cstddef>
6+
#include <cstdint>
7+
#include <cstring>
8+
#include <memory>
9+
10+
#include "skewed_allocator.hpp"
11+
12+
using namespace std;
13+
14+
template <size_t N, class T, template <class> class Padder>
15+
void bm_uninitialized_copy(benchmark::State& state) {
16+
Padder<T[N]> padded_a;
17+
auto a = &padded_a.value[0];
18+
memset(a, 'a', sizeof(T) * N);
19+
Padder<T[N]> padded_b;
20+
auto b = &padded_b.value[0];
21+
memset(b, 'b', sizeof(T) * N);
22+
23+
for (auto _ : state) {
24+
benchmark::DoNotOptimize(a);
25+
uninitialized_copy(a, a + N, b);
26+
benchmark::DoNotOptimize(b);
27+
}
28+
}
29+
30+
BENCHMARK(bm_uninitialized_copy<1, uint8_t, highly_aligned>);
31+
BENCHMARK(bm_uninitialized_copy<5, uint8_t, highly_aligned>);
32+
BENCHMARK(bm_uninitialized_copy<15, uint8_t, highly_aligned>);
33+
BENCHMARK(bm_uninitialized_copy<26, uint8_t, highly_aligned>);
34+
BENCHMARK(bm_uninitialized_copy<32, uint8_t, highly_aligned>);
35+
BENCHMARK(bm_uninitialized_copy<38, uint8_t, highly_aligned>);
36+
BENCHMARK(bm_uninitialized_copy<60, uint8_t, highly_aligned>);
37+
BENCHMARK(bm_uninitialized_copy<64, uint8_t, highly_aligned>);
38+
BENCHMARK(bm_uninitialized_copy<125, uint8_t, highly_aligned>);
39+
BENCHMARK(bm_uninitialized_copy<800, uint8_t, highly_aligned>);
40+
BENCHMARK(bm_uninitialized_copy<3000, uint8_t, highly_aligned>);
41+
BENCHMARK(bm_uninitialized_copy<9000, uint8_t, highly_aligned>);
42+
43+
BENCHMARK(bm_uninitialized_copy<1, uint8_t, not_highly_aligned>);
44+
BENCHMARK(bm_uninitialized_copy<5, uint8_t, not_highly_aligned>);
45+
BENCHMARK(bm_uninitialized_copy<15, uint8_t, not_highly_aligned>);
46+
BENCHMARK(bm_uninitialized_copy<26, uint8_t, not_highly_aligned>);
47+
BENCHMARK(bm_uninitialized_copy<32, uint8_t, not_highly_aligned>);
48+
BENCHMARK(bm_uninitialized_copy<38, uint8_t, not_highly_aligned>);
49+
BENCHMARK(bm_uninitialized_copy<60, uint8_t, not_highly_aligned>);
50+
BENCHMARK(bm_uninitialized_copy<64, uint8_t, not_highly_aligned>);
51+
BENCHMARK(bm_uninitialized_copy<125, uint8_t, not_highly_aligned>);
52+
BENCHMARK(bm_uninitialized_copy<800, uint8_t, not_highly_aligned>);
53+
BENCHMARK(bm_uninitialized_copy<3000, uint8_t, not_highly_aligned>);
54+
BENCHMARK(bm_uninitialized_copy<9000, uint8_t, not_highly_aligned>);
55+
56+
BENCHMARK_MAIN();

stl/inc/memory

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ _NoThrowFwdIt uninitialized_copy_n(const _InIt _First, const _Diff _Count_raw, _
144144
auto _UFirst = _STD _Get_unwrapped_n(_First, _Count);
145145
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);
146146
if constexpr (_Iter_copy_cat<decltype(_UFirst), decltype(_UDest)>::_Bitcopy_constructible) {
147-
_UDest = _STD _Copy_memmove_n(_UFirst, static_cast<size_t>(_Count), _UDest);
147+
_UDest = _STD _Copy_memcpy_n(_UFirst, static_cast<size_t>(_Count), _UDest);
148148
} else {
149149
_Uninitialized_backout<decltype(_UDest)> _Backout{_UDest};
150150

@@ -295,7 +295,7 @@ pair<_InIt, _NoThrowFwdIt> uninitialized_move_n(_InIt _First, const _Diff _Count
295295
auto _UFirst = _STD _Get_unwrapped_n(_First, _Count);
296296
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);
297297
if constexpr (_Iter_move_cat<decltype(_UFirst), decltype(_UDest)>::_Bitcopy_constructible) {
298-
_UDest = _STD _Copy_memmove_n(_UFirst, static_cast<size_t>(_Count), _UDest);
298+
_UDest = _STD _Copy_memcpy_n(_UFirst, static_cast<size_t>(_Count), _UDest);
299299
_UFirst += _Count;
300300
} else {
301301
_Uninitialized_backout<decltype(_UDest)> _Backout{_UDest};
@@ -2286,7 +2286,7 @@ template <class _Ty, size_t _Size>
22862286
void _Uninitialized_copy_multidimensional(const _Ty (&_In)[_Size], _Ty (&_Out)[_Size]) {
22872287
using _Item = remove_all_extents_t<_Ty>;
22882288
if constexpr (conjunction_v<is_trivially_copy_constructible<_Item>, is_trivially_destructible<_Item>>) {
2289-
_STD _Copy_memmove_n(_In, _Size, _Out);
2289+
_STD _Copy_memcpy_n(_In, _Size, _Out);
22902290
} else if constexpr (is_array_v<_Ty>) {
22912291
_Reverse_destroy_multidimensional_n_guard<_Ty> _Guard{_Out, 0};
22922292
for (size_t& _Idx = _Guard._Index; _Idx < _Size; ++_Idx) {
@@ -2651,7 +2651,7 @@ void _Uninitialized_copy_multidimensional_al(const _Ty (&_In)[_Size], _Ty (&_Out
26512651
using _Item = remove_all_extents_t<_Ty>;
26522652
if constexpr (conjunction_v<is_trivially_copy_constructible<_Item>, is_trivially_destructible<_Item>,
26532653
_Uses_default_construct<_Alloc, _Item*, const _Item&>>) {
2654-
_STD _Copy_memmove_n(_In, _Size, _Out);
2654+
_STD _Copy_memcpy_n(_In, _Size, _Out);
26552655
} else if constexpr (is_array_v<_Ty>) {
26562656
_Reverse_destroy_multidimensional_n_al_guard<_Ty, _Alloc> _Guard{_Out, 0, _Al};
26572657
for (size_t& _Idx = _Guard._Index; _Idx < _Size; ++_Idx) {

stl/inc/xmemory

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,7 +1688,7 @@ _NoThrowFwdIt _Uninitialized_move_unchecked(_InIt _First, const _InIt _Last, _No
16881688
if (!_STD is_constant_evaluated())
16891689
#endif // _HAS_CXX26
16901690
{
1691-
return _STD _Copy_memmove(_First, _Last, _Dest);
1691+
return _STD _Copy_memcpy(_First, _Last, _Dest);
16921692
}
16931693
}
16941694
_Uninitialized_backout<_NoThrowFwdIt> _Backout{_Dest};
@@ -1901,20 +1901,20 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_copy(
19011901
auto _ULast = _STD _Get_unwrapped(_STD move(_Last));
19021902
#endif // ^^^ !_HAS_CXX20 ^^^
19031903

1904-
constexpr bool _Can_memmove = _Sent_copy_cat<decltype(_UFirst), decltype(_ULast), _Ptrval>::_Bitcopy_constructible
1905-
&& _Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>::value;
1904+
constexpr bool _Can_memcpy = _Sent_copy_cat<decltype(_UFirst), decltype(_ULast), _Ptrval>::_Bitcopy_constructible
1905+
&& _Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>::value;
19061906

1907-
if constexpr (_Can_memmove) {
1907+
if constexpr (_Can_memcpy) {
19081908
#if _HAS_CXX20
19091909
if (!_STD is_constant_evaluated())
19101910
#endif // _HAS_CXX20
19111911
{
19121912
if constexpr (is_same_v<decltype(_UFirst), decltype(_ULast)>) {
1913-
_STD _Copy_memmove(_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _Unfancy(_Dest));
1913+
_STD _Copy_memcpy(_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _Unfancy(_Dest));
19141914
_Dest += _ULast - _UFirst;
19151915
} else {
19161916
const auto _Count = static_cast<size_t>(_STD _Contiguous_iter_distance(_UFirst, _ULast));
1917-
_STD _Copy_memmove_n(_STD _To_address(_UFirst), _Count, _STD _Unfancy(_Dest));
1917+
_STD _Copy_memcpy_n(_STD _To_address(_UFirst), _Count, _STD _Unfancy(_Dest));
19181918
_Dest += _Count;
19191919
}
19201920
return _Dest;
@@ -1942,16 +1942,16 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_copy_n(
19421942
auto _UFirst = _STD _Get_unwrapped(_STD move(_First));
19431943
#endif // ^^^ No checking ^^^
19441944

1945-
constexpr bool _Can_memmove =
1945+
constexpr bool _Can_memcpy =
19461946
conjunction_v<bool_constant<_Iter_copy_cat<decltype(_UFirst), _Ptrval>::_Bitcopy_constructible>,
19471947
_Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>>;
19481948

1949-
if constexpr (_Can_memmove) {
1949+
if constexpr (_Can_memcpy) {
19501950
#if _HAS_CXX20
19511951
if (!_STD is_constant_evaluated())
19521952
#endif // _HAS_CXX20
19531953
{
1954-
_STD _Copy_memmove_n(_UFirst, _Count, _STD _Unfancy(_Dest));
1954+
_STD _Copy_memcpy_n(_UFirst, _Count, _STD _Unfancy(_Dest));
19551955
_Dest += _Count;
19561956
return _Dest;
19571957
}
@@ -1973,7 +1973,7 @@ _NoThrowFwdIt _Uninitialized_copy_unchecked(_InIt _First, const _InIt _Last, _No
19731973
if (!_STD is_constant_evaluated())
19741974
#endif // _HAS_CXX26
19751975
{
1976-
return _STD _Copy_memmove(_First, _Last, _Dest);
1976+
return _STD _Copy_memcpy(_First, _Last, _Dest);
19771977
}
19781978
}
19791979

@@ -2013,7 +2013,7 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_move(
20132013
if (!_STD is_constant_evaluated())
20142014
#endif // _HAS_CXX20
20152015
{
2016-
_STD _Copy_memmove(_UFirst, _ULast, _STD _Unfancy(_Dest));
2016+
_STD _Copy_memcpy(_UFirst, _ULast, _STD _Unfancy(_Dest));
20172017
return _Dest + (_ULast - _UFirst);
20182018
}
20192019
}

stl/inc/xutility

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4937,14 +4937,20 @@ _CONSTEXPR20 void _Verify_ranges_do_not_overlap(const _Iter1& _First1, const _Se
49374937
#endif // _ITERATOR_DEBUG_LEVEL != 2 ^^^
49384938
}
49394939

4940-
template <class _OutCtgIt>
4941-
_OutCtgIt _Copy_memmove_tail(
4940+
template <bool _Use_memcpy, class _OutCtgIt>
4941+
_OutCtgIt _Impl_copy_memmeow_tail(
49424942
const char* const _First_ch, const _OutCtgIt _Dest, const size_t _Byte_count, const size_t _Object_count) {
49434943
_STL_INTERNAL_CHECK(_Byte_count == _Object_count * sizeof(*_Dest));
49444944
// (pre-verified contiguous iterator)
49454945
const auto _Dest_ptr = _STD _To_address(_Dest);
49464946
const auto _Dest_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_Dest_ptr));
4947-
_CSTD memmove(_Dest_ch, _First_ch, _Byte_count);
4947+
4948+
if constexpr (_Use_memcpy) {
4949+
_CSTD memcpy(_Dest_ch, _First_ch, _Byte_count);
4950+
} else {
4951+
_CSTD memmove(_Dest_ch, _First_ch, _Byte_count);
4952+
}
4953+
49484954
if constexpr (is_pointer_v<_OutCtgIt>) {
49494955
(void) _Object_count;
49504956
// CodeQL [SM02986] This cast is correct: we're bypassing pointer arithmetic for performance.
@@ -4954,8 +4960,8 @@ _OutCtgIt _Copy_memmove_tail(
49544960
}
49554961
}
49564962

4957-
template <class _CtgIt, class _OutCtgIt>
4958-
_OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
4963+
template <bool _Use_memcpy, class _CtgIt, class _OutCtgIt>
4964+
_OutCtgIt _Impl_copy_memmeow(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
49594965
_STL_INTERNAL_CHECK(_First <= _Last);
49604966
const auto _First_ptr = _STD _To_address(_First);
49614967
const auto _Last_ptr = _STD _To_address(_Last);
@@ -4965,17 +4971,37 @@ _OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
49654971
const auto _Last_ch = const_cast<const char*>(reinterpret_cast<const volatile char*>(_Last_ptr));
49664972
const auto _Byte_count = static_cast<size_t>(_Last_ch - _First_ch);
49674973
_STD _Contiguous_iter_verify(_Dest, static_cast<_Iter_diff_t<_OutCtgIt>>(_Ptr_diff));
4968-
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
4974+
return _STD _Impl_copy_memmeow_tail<_Use_memcpy>(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
49694975
}
49704976

4971-
template <class _CtgIt, class _OutCtgIt>
4972-
_OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
4977+
template <bool _Use_memcpy, class _CtgIt, class _OutCtgIt>
4978+
_OutCtgIt _Impl_copy_memmeow_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
49734979
_STD _Contiguous_iter_verify(_First, static_cast<_Iter_diff_t<_CtgIt>>(_Object_count));
49744980
_STD _Contiguous_iter_verify(_Dest, static_cast<_Iter_diff_t<_OutCtgIt>>(_Object_count));
49754981
const auto _First_ptr = _STD _To_address(_First);
49764982
const auto _First_ch = const_cast<const char*>(reinterpret_cast<const volatile char*>(_First_ptr));
49774983
const auto _Byte_count = _Object_count * sizeof(*_First_ptr);
4978-
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
4984+
return _STD _Impl_copy_memmeow_tail<_Use_memcpy>(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
4985+
}
4986+
4987+
template <class _CtgIt, class _OutCtgIt>
4988+
_OutCtgIt _Copy_memcpy(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
4989+
return _STD _Impl_copy_memmeow<true>(_First, _Last, _Dest);
4990+
}
4991+
4992+
template <class _CtgIt, class _OutCtgIt>
4993+
_OutCtgIt _Copy_memcpy_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
4994+
return _STD _Impl_copy_memmeow_n<true>(_First, _Object_count, _Dest);
4995+
}
4996+
4997+
template <class _CtgIt, class _OutCtgIt>
4998+
_OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
4999+
return _STD _Impl_copy_memmeow<false>(_First, _Last, _Dest);
5000+
}
5001+
5002+
template <class _CtgIt, class _OutCtgIt>
5003+
_OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
5004+
return _STD _Impl_copy_memmeow_n<false>(_First, _Object_count, _Dest);
49795005
}
49805006

49815007
template <class _Fn>

0 commit comments

Comments
 (0)