From 1a11a5038761d383bafcd1306d0f648756af52fc Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 3 Mar 2026 22:53:42 +0000 Subject: [PATCH 1/5] Add CHERI-safe memmove implementation New attempt at custom memmove, succeeding PR #593 which was reverted due to fuzzer-found bugs (off-by-one in reverse copy loop with size_t underflow) and CHERI incompatibility (byte-at-a-time reverse copy destroys capability tags). Key changes from the original PR #593: - Per-Arch move() and forward_move() methods following the existing copy() pattern, instead of a single generic byte-by-byte reverse. - Three-way overlap detection: non-overlapping uses optimized Arch::copy(), dst > src overlap uses Arch::move() (reverse), and dst < src overlap uses Arch::forward_move() (forward without the copy_end trick that re-reads already-overwritten bytes). - block_reverse_copy operates at register width (16 bytes on x86-64/PPC64/CHERI) instead of byte-by-byte, with byte-by-byte only for the sub-register remainder. - GenericStrictProvenance (CHERI) move() and forward_move() preserve capability tags by using pointer-pair (Ptr2) operations on aligned regions, with byte-by-byte only for sub-pointer head/tail padding where no aligned capabilities can exist. - Comprehensive tests: overlapping copies at various sizes (1-2048) and overlap amounts, exhaustive offset x length testing for small buffers (2-64), alignment boundary tests, bounds checking, and direct snmalloc::memmove unchecked path tests. - Re-enables memmove fuzz tests (simple_memmove, forward_memmove, backward_memmove) in snmalloc-fuzzer.cpp. Co-authored-by: Claude --- fuzzing/snmalloc-fuzzer.cpp | 6 - src/snmalloc/global/memcpy.h | 379 +++++++++++++++++++++++++ src/snmalloc/override/memcpy.cc | 9 + src/test/func/memcpy/func-memcpy.cc | 412 ++++++++++++++++++++++++++++ 4 files changed, 800 insertions(+), 6 deletions(-) diff --git a/fuzzing/snmalloc-fuzzer.cpp b/fuzzing/snmalloc-fuzzer.cpp index 5ee624c3c..441e4f134 100644 --- a/fuzzing/snmalloc-fuzzer.cpp +++ b/fuzzing/snmalloc-fuzzer.cpp @@ -41,8 +41,6 @@ void memcpy_with_align_offset( ::operator delete(dst_, std::align_val_t{dest_alignment}); } -/* - * disable memmove tests for now void simple_memmove(std::vector data) { std::vector dest(data.size()); @@ -76,7 +74,6 @@ void backward_memmove(std::string data, size_t offset) std::string_view(to_move.data(), after_move)) abort(); } -*/ constexpr static size_t size_limit = 16384; @@ -213,12 +210,9 @@ void snmalloc_random_walk( } FUZZ_TEST(snmalloc_fuzzing, simple_memcpy); -/* - * disable memmove tests for now FUZZ_TEST(snmalloc_fuzzing, simple_memmove); FUZZ_TEST(snmalloc_fuzzing, forward_memmove); FUZZ_TEST(snmalloc_fuzzing, backward_memmove); -*/ FUZZ_TEST(snmalloc_fuzzing, memcpy_with_align_offset) .WithDomains( fuzztest::InRange(0, 6), diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h index c1600d517..e7436e3a0 100644 --- a/src/snmalloc/global/memcpy.h +++ b/src/snmalloc/global/memcpy.h @@ -43,6 +43,23 @@ namespace snmalloc } } + /** + * Reverse-copy a block using the specified chunk size. Copies as many + * complete chunks of `Size` bytes as possible from end to start. + * After the loop, bytes [0, len % Size) remain uncopied. + */ + template + SNMALLOC_FAST_PATH_INLINE void + block_reverse_copy(void* dst, const void* src, size_t len) + { + size_t i = len; + while (i >= Size) + { + i -= Size; + copy_one(pointer_offset(dst, i), pointer_offset(src, i)); + } + } + /** * Perform an overlapping copy of the end. This will copy one (potentially * unaligned) `T` from the end of the source to the end of the destination. @@ -177,6 +194,41 @@ namespace snmalloc } return orig_dst; } + + /** + * Forward copy for overlapping memmove where dst < src. + * Uses block_copy without copy_end to avoid re-reading overwritten + * bytes. Caller guarantees len > 0 and buffers overlap. + */ + static void* forward_move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + { + size_t offset = len - remainder; + block_copy<1>( + pointer_offset(dst, offset), + pointer_offset(src, offset), + remainder); + } + return orig_dst; + } + + /** + * Reverse copy for overlapping memmove where dst > src. + * Caller guarantees len > 0 and dst != src. + */ + static void* move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_reverse_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + block_reverse_copy<1>(dst, src, remainder); + return orig_dst; + } }; /** @@ -309,6 +361,226 @@ namespace snmalloc } return orig_dst; } + + /** + * Forward copy for overlapping memmove where dst < src. + * Uses the same three-case structure as copy() but avoids copy_end + * and unaligned_start tricks that re-read already-written bytes. + * Caller guarantees len > 0 and buffers overlap. + */ + static void* forward_move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + + /* Tiny case: no aligned pointer can fit, byte-by-byte is safe. */ + if ( + len < sizeof(void*) + + (static_cast(-static_cast(address_cast(src))) & + (alignof(void*) - 1))) + { + block_copy<1>(dst, src, len); + } + /* Equally-misaligned: use pointer-width forward operations. */ + else if ( + address_misalignment(address_cast(src)) == + address_misalignment(address_cast(dst))) + { + size_t src_misalign = + address_misalignment(address_cast(src)); + size_t head_pad = + src_misalign == 0 ? 0 : (alignof(void*) - src_misalign); + size_t tail_pad = (len - head_pad) % alignof(void*); + size_t aligned_len = len - head_pad - tail_pad; + + /* Copy head sub-pointer bytes forward (byte-by-byte). */ + if (head_pad > 0) + block_copy<1>(dst, src, head_pad); + + /* Forward copy aligned middle using pointer-pair operations. */ + if (aligned_len > 0) + { + struct Ptr2 + { + void* p[2]; + }; + + void* aligned_dst = pointer_offset(dst, head_pad); + const void* aligned_src = pointer_offset(src, head_pad); + + /* Forward copy pairs of pointers */ + size_t i = 0; + for (; i + sizeof(Ptr2) <= aligned_len; i += sizeof(Ptr2)) + { + auto* dp = + static_cast(pointer_offset(aligned_dst, i)); + auto* sp = + static_cast(pointer_offset(aligned_src, i)); + *dp = *sp; + } + + /* Handle a remaining single pointer */ + if (i + sizeof(void*) <= aligned_len) + { + auto* dp = + static_cast(pointer_offset(aligned_dst, i)); + auto* sp = + static_cast(pointer_offset(aligned_src, i)); + *dp = *sp; + } + } + + /* Copy tail sub-pointer bytes forward (byte-by-byte). */ + if (tail_pad > 0) + { + size_t tail_off = len - tail_pad; + block_copy<1>( + pointer_offset(dst, tail_off), + pointer_offset(src, tail_off), + tail_pad); + } + } + /* Differently misaligned: integer-only forward copy is safe. */ + else + { + block_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + { + size_t offset = len - remainder; + block_copy<1>( + pointer_offset(dst, offset), + pointer_offset(src, offset), + remainder); + } + } + return orig_dst; + } + + /** + * Reverse copy for overlapping memmove where dst > src. + * Caller guarantees len > 0 and dst != src. + * + * Three cases, mirroring the forward copy() structure: + * + * 1. Tiny: buffer can't contain an aligned pointer, so byte-by-byte + * reverse is safe. + * + * 2. Equally misaligned: pointers could be at aligned positions. + * Use pointer-sized operations on the aligned interior, and + * byte-by-byte for the unaligned head/tail. + * + * 3. Differently misaligned: no pointer-aligned address in one maps + * to a pointer-aligned address in the other, so integer-only + * reverse copy is safe. + */ + static void* move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + + /* + * Tiny case: the buffer cannot contain a pointer-aligned, pointer- + * sized region, so byte-by-byte reverse is safe. + */ + if ( + len < sizeof(void*) + + (static_cast(-static_cast(address_cast(src))) & + (alignof(void*) - 1))) + { + block_reverse_copy<1>(dst, src, len); + } + /* + * Equally-misaligned: pointers could be hiding at aligned positions. + * Must use pointer-width operations on the aligned interior. + */ + else if ( + address_misalignment(address_cast(src)) == + address_misalignment(address_cast(dst))) + { + /* + * Compute alignment boundaries. head_pad is the number of bytes + * before the first aligned position; tail_pad is the number of + * bytes after the last aligned position. + */ + size_t src_misalign = + address_misalignment(address_cast(src)); + size_t head_pad = + src_misalign == 0 ? 0 : (alignof(void*) - src_misalign); + size_t tail_pad = (len - head_pad) % alignof(void*); + size_t aligned_len = len - head_pad - tail_pad; + + /* + * Reverse copy the tail sub-pointer bytes (byte-by-byte). + * These are past the last aligned position so no pointers here. + */ + if (tail_pad > 0) + { + size_t tail_off = len - tail_pad; + block_reverse_copy<1>( + pointer_offset(dst, tail_off), + pointer_offset(src, tail_off), + tail_pad); + } + + /* + * Reverse copy the aligned middle using pointer-pair operations + * to preserve capability tags. + */ + if (aligned_len > 0) + { + struct Ptr2 + { + void* p[2]; + }; + + void* aligned_dst = pointer_offset(dst, head_pad); + const void* aligned_src = pointer_offset(src, head_pad); + + /* Reverse copy pairs of pointers */ + size_t i = aligned_len; + while (i >= sizeof(Ptr2)) + { + i -= sizeof(Ptr2); + auto* dp = static_cast(pointer_offset(aligned_dst, i)); + auto* sp = + static_cast(pointer_offset(aligned_src, i)); + *dp = *sp; + } + + /* Handle a remaining single pointer if odd alignment */ + if (i >= sizeof(void*)) + { + i -= sizeof(void*); + auto* dp = + static_cast(pointer_offset(aligned_dst, i)); + auto* sp = + static_cast(pointer_offset(aligned_src, i)); + *dp = *sp; + } + } + + /* + * Reverse copy the head sub-pointer bytes (byte-by-byte). + * These are before the first aligned position so no pointers. + */ + if (head_pad > 0) + { + block_reverse_copy<1>(dst, src, head_pad); + } + } + /* + * Differently misaligned: no pointer-aligned address in src maps to + * a pointer-aligned address in dst, so integer-only operations are + * safe (no capability tags to preserve). + */ + else + { + block_reverse_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + block_reverse_copy<1>(dst, src, remainder); + } + return orig_dst; + } }; #if defined(__x86_64__) || defined(_M_X64) @@ -374,6 +646,40 @@ namespace snmalloc } return orig_dst; } + + /** + * Forward copy for overlapping memmove where dst < src. + */ + static void* forward_move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + { + size_t offset = len - remainder; + block_copy<1>( + pointer_offset(dst, offset), + pointer_offset(src, offset), + remainder); + } + return orig_dst; + } + + /** + * Reverse copy for overlapping memmove where dst > src. + * No rep movsb in reverse (ERMS doesn't support DF=1), so use + * SSE-width block_reverse_copy. + */ + static void* move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_reverse_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + block_reverse_copy<1>(dst, src, remainder); + return orig_dst; + } }; #endif @@ -413,6 +719,38 @@ namespace snmalloc } return orig_dst; } + + /** + * Forward copy for overlapping memmove where dst < src. + */ + static void* forward_move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + { + size_t offset = len - remainder; + block_copy<1>( + pointer_offset(dst, offset), + pointer_offset(src, offset), + remainder); + } + return orig_dst; + } + + /** + * Reverse copy for overlapping memmove where dst > src. + */ + static void* move(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + block_reverse_copy(dst, src, len); + size_t remainder = len % LargestRegisterSize; + if (remainder > 0) + block_reverse_copy<1>(dst, src, remainder); + return orig_dst; + } }; #endif @@ -454,4 +792,45 @@ namespace snmalloc [&]() { return Arch::copy(dst, src, len); }); }); } + + /** + * Snmalloc checked memmove. Handles overlapping source and destination + * by selecting forward copy (Arch::copy) or reverse copy (Arch::move) + * as appropriate. + */ + template< + bool Checked, + bool ReadsChecked = CheckReads, + typename Arch = DefaultArch> + SNMALLOC_FAST_PATH_INLINE void* + memmove(void* dst, const void* src, size_t len) + { + if (SNMALLOC_UNLIKELY(len == 0 || dst == src)) + return dst; + + return check_bound<(Checked && ReadsChecked)>( + src, + len, + "memmove with source out of bounds of heap allocation", + [&]() { + return check_bound( + dst, + len, + "memmove with destination out of bounds of heap allocation", + [&]() { + auto dst_addr = address_cast(dst); + auto src_addr = address_cast(src); + if (dst_addr > src_addr) + { + if ((dst_addr - src_addr) >= len) + return Arch::copy(dst, src, len); // no overlap + return Arch::move(dst, src, len); // reverse copy + } + // dst_addr < src_addr (dst == src already handled) + if ((src_addr - dst_addr) >= len) + return Arch::copy(dst, src, len); // no overlap + return Arch::forward_move(dst, src, len); // safe forward + }); + }); + } } // namespace snmalloc diff --git a/src/snmalloc/override/memcpy.cc b/src/snmalloc/override/memcpy.cc index c6053ae02..de26263a6 100644 --- a/src/snmalloc/override/memcpy.cc +++ b/src/snmalloc/override/memcpy.cc @@ -10,4 +10,13 @@ extern "C" { return snmalloc::memcpy(dst, src, len); } + + /** + * Snmalloc checked memmove. + */ + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(memmove)(void* dst, const void* src, size_t len) + { + return snmalloc::memmove(dst, src, len); + } } diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc index eda741c83..6eae3896a 100644 --- a/src/test/func/memcpy/func-memcpy.cc +++ b/src/test/func/memcpy/func-memcpy.cc @@ -109,6 +109,392 @@ void check_size(size_t size) my_free(d); } +/** + * Check that memmove works correctly for overlapping regions. + * Allocates a buffer, fills it with a pattern, and tests memmove + * with various overlap scenarios. + */ +void check_overlaps(size_t size) +{ + START_TEST("checking {}-byte memmove overlaps", size); + + // We need a buffer large enough for the overlap tests. + // Use 2*size so we can slide src/dst within it. + size_t bufsize = 2 * size + 1; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + for (size_t overlap_amount : + {size_t(1), + size / 4 + 1, + size / 2 + 1, + size - (size > 1 ? 1 : 0), + size}) + { + if (overlap_amount > size || overlap_amount == 0) + continue; + + size_t copy_len = size; + size_t offset = copy_len - overlap_amount; + + // Forward overlap: dst > src, overlap by overlap_amount bytes + // src = buf, dst = buf + offset + if (offset > 0 && offset + copy_len <= bufsize) + { + // Fill buffer with pattern + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + // Save reference copy of source before move + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[i]; + + void* ret = my_memmove(buf + offset, buf, copy_len); + EXPECT( + ret == buf + offset, + "Forward memmove return value should be dst"); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[offset + i] == ref[i], + "Forward memmove mismatch at index {}, size {}, overlap {}", + i, + size, + overlap_amount); + } + } + + // Backward overlap: dst < src, overlap by overlap_amount bytes + // src = buf + offset, dst = buf + if (offset > 0 && offset + copy_len <= bufsize) + { + // Re-fill buffer with pattern + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + // Save reference of source (which starts at offset) + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[offset + i]; + + void* ret = my_memmove(buf, buf + offset, copy_len); + EXPECT( + ret == buf, "Backward memmove return value should be dst"); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[i] == ref[i], + "Backward memmove mismatch at index {}, size {}, overlap {}", + i, + size, + overlap_amount); + } + } + } + + // Non-overlapping memmove should work like memcpy + { + auto* s = static_cast(my_malloc(size + 1)); + auto* d = static_cast(my_malloc(size + 1)); + for (size_t i = 0; i < size; ++i) + s[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + d[i] = 0; + + void* ret = my_memmove(d, s, size); + EXPECT(ret == d, "Non-overlapping memmove return value should be dst"); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + d[i] == static_cast(i & 0xff), + "Non-overlapping memmove mismatch at {}", + i); + } + my_free(s); + my_free(d); + } + + // Zero-length and dst==src edge cases + { + auto* p = static_cast(my_malloc(size + 1)); + for (size_t i = 0; i < size; ++i) + p[i] = static_cast(i & 0xff); + + // Zero-length should be a no-op + void* ret = my_memmove(p, p + 1, 0); + EXPECT(ret == p, "Zero-length memmove should return dst"); + + // dst == src should be a no-op + if (size > 0) + { + ret = my_memmove(p, p, size); + EXPECT(ret == p, "dst==src memmove should return dst"); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + p[i] == static_cast(i & 0xff), + "dst==src memmove should not corrupt data"); + } + } + my_free(p); + } + + my_free(buf); + my_free(ref); +} + +/** + * Exhaustive overlap test: for a given buffer size, test memmove for + * every possible (offset, copy_len) combination within the buffer. + * This catches subtle off-by-one and alignment issues. + */ +void check_exhaustive_overlaps(size_t bufsize) +{ + START_TEST("exhaustive memmove overlaps for bufsize {}", bufsize); + + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + for (size_t offset = 1; offset < bufsize; offset++) + { + for (size_t copy_len = 1; copy_len + offset <= bufsize; copy_len++) + { + // Forward overlap test: dst > src + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 7 + 13) & 0xff); + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[i]; + + my_memmove(buf + offset, buf, copy_len); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[offset + i] == ref[i], + "Exhaustive fwd mismatch: bufsize {}, offset {}, len {}, idx {}", + bufsize, + offset, + copy_len, + i); + } + + // Backward overlap test: dst < src + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 7 + 13) & 0xff); + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[offset + i]; + + my_memmove(buf, buf + offset, copy_len); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[i] == ref[i], + "Exhaustive bwd mismatch: bufsize {}, offset {}, len {}, idx {}", + bufsize, + offset, + copy_len, + i); + } + } + } + + my_free(buf); + my_free(ref); +} + +/** + * Test memmove at alignment boundary sizes. These sizes are chosen to + * exercise transitions between different code paths in the Arch + * implementations (small_copies, block_copy, rep movsb thresholds). + */ +void check_alignment_boundary_overlaps() +{ + START_TEST("memmove alignment boundary tests"); + + // Sizes near Arch path thresholds + static const size_t boundary_sizes[] = { + 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, 32, + 33, 48, 63, 64, 65, 127, 128, 129, 255, 256, 257, 511, + 512, 513, 768, 1023, 1024, 1025, 2048, 4096}; + + for (auto size : boundary_sizes) + { + size_t bufsize = 2 * size + 64; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + // Test with 1-byte offset (maximum overlap) + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + my_memmove(buf + 1, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[1 + i] == ref[i], + "Boundary fwd+1 mismatch at size {}, idx {}", + size, + i); + } + + // Test with 1-byte offset backward + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[1 + i]; + my_memmove(buf, buf + 1, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], + "Boundary bwd+1 mismatch at size {}, idx {}", + size, + i); + } + + // Test with pointer-sized offset (alignof(void*)) + size_t ptr_off = sizeof(void*); + if (ptr_off < size) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + my_memmove(buf + ptr_off, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[ptr_off + i] == ref[i], + "Boundary fwd+ptr mismatch at size {}, idx {}", + size, + i); + } + + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[ptr_off + i]; + my_memmove(buf, buf + ptr_off, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], + "Boundary bwd+ptr mismatch at size {}, idx {}", + size, + i); + } + } + + // Test with misaligned start (offset 3 from allocation start) + if (size + 3 + size <= bufsize) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[3 + i]; + my_memmove(buf + 3 + 1, buf + 3, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[3 + 1 + i] == ref[i], + "Boundary misaligned fwd mismatch at size {}, idx {}", + size, + i); + } + } + + my_free(buf); + my_free(ref); + } +} + +/** + * Test memmove via direct snmalloc::memmove (unchecked) to exercise + * arch-specific code paths without bounds checking interference. + */ +void check_direct_memmove(size_t size) +{ + START_TEST("direct snmalloc::memmove for size {}", size); + + size_t bufsize = 2 * size + 1; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + // Forward overlap (dst > src) with offset 1 + if (size > 0) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + + snmalloc::memmove(buf + 1, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[1 + i] == ref[i], + "Direct fwd mismatch at size {}, idx {}", + size, + i); + } + } + + // Reverse overlap (dst < src) with offset 1 + if (size > 0) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[1 + i]; + + snmalloc::memmove(buf, buf + 1, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], + "Direct bwd mismatch at size {}, idx {}", + size, + i); + } + } + + my_free(buf); + my_free(ref); +} + +void check_memmove_bounds(size_t size, size_t out_of_bounds) +{ + START_TEST( + "memmove bounds, size {}, {} bytes out of bounds", + size, + out_of_bounds); + auto* s = static_cast(my_malloc(size)); + auto* d = static_cast(my_malloc(size)); + for (size_t i = 0; i < size; ++i) + { + s[i] = static_cast(i); + } + for (size_t i = 0; i < size; ++i) + { + d[i] = 0; + } + bool bounds_error = false; + can_longjmp = true; + if (setjmp(jmp) == 0) + { + my_memmove(d, s, size + out_of_bounds); + } + else + { + bounds_error = true; + } + can_longjmp = false; + EXPECT( + bounds_error == (out_of_bounds > 0), + "bounds error: {}, out_of_bounds: {}", + bounds_error, + out_of_bounds); + my_free(s); + my_free(d); +} + void check_bounds(size_t size, size_t out_of_bounds) { START_TEST( @@ -165,9 +551,35 @@ int main() // Check one object out of bounds check_bounds(sz, sz); } + for (auto sz : sizes) + { + // Check in bounds + check_memmove_bounds(sz, 0); + // Check one byte out + check_memmove_bounds(sz, 1); + // Check one object out of bounds + check_memmove_bounds(sz, sz); + } for (size_t x = 0; x < 2048; x++) { check_size(x); } + // Test memmove with various sizes covering all Arch path thresholds + for (size_t x = 1; x < 2048; x++) + { + check_overlaps(x); + } + // Exhaustive overlap testing for small sizes (every offset * length combo) + for (size_t x = 2; x <= 64; x++) + { + check_exhaustive_overlaps(x); + } + // Alignment boundary tests + check_alignment_boundary_overlaps(); + // Direct snmalloc::memmove tests (unchecked path) + for (size_t x = 0; x < 2048; x++) + { + check_direct_memmove(x); + } } #endif \ No newline at end of file From 3355c8a3cd58e2e7426da600b88d0ba4cef4336f Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 3 Mar 2026 23:12:56 +0000 Subject: [PATCH 2/5] Run clang-format --- fuzzing/snmalloc-fuzzer.cpp | 11 +++++++--- src/snmalloc/global/memcpy.h | 32 ++++++++--------------------- src/test/func/memcpy/func-memcpy.cc | 28 ++++++++----------------- 3 files changed, 25 insertions(+), 46 deletions(-) diff --git a/fuzzing/snmalloc-fuzzer.cpp b/fuzzing/snmalloc-fuzzer.cpp index 441e4f134..cb19487ba 100644 --- a/fuzzing/snmalloc-fuzzer.cpp +++ b/fuzzing/snmalloc-fuzzer.cpp @@ -110,12 +110,17 @@ struct Result char* ptr; size_t size; - Result(char filler, char* ptr, size_t size) : filler(filler), ptr(ptr), size(size) {} - Result(Result&& other) noexcept : filler(other.filler), ptr(other.ptr), size(other.size) + Result(char filler, char* ptr, size_t size) + : filler(filler), ptr(ptr), size(size) + {} + + Result(Result&& other) noexcept + : filler(other.filler), ptr(other.ptr), size(other.size) { other.ptr = nullptr; } - Result &operator=(Result&& other) noexcept + + Result& operator=(Result&& other) noexcept { if (this != &other) { diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h index e7436e3a0..f90cd00ec 100644 --- a/src/snmalloc/global/memcpy.h +++ b/src/snmalloc/global/memcpy.h @@ -209,9 +209,7 @@ namespace snmalloc { size_t offset = len - remainder; block_copy<1>( - pointer_offset(dst, offset), - pointer_offset(src, offset), - remainder); + pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; } @@ -411,18 +409,15 @@ namespace snmalloc size_t i = 0; for (; i + sizeof(Ptr2) <= aligned_len; i += sizeof(Ptr2)) { - auto* dp = - static_cast(pointer_offset(aligned_dst, i)); - auto* sp = - static_cast(pointer_offset(aligned_src, i)); + auto* dp = static_cast(pointer_offset(aligned_dst, i)); + auto* sp = static_cast(pointer_offset(aligned_src, i)); *dp = *sp; } /* Handle a remaining single pointer */ if (i + sizeof(void*) <= aligned_len) { - auto* dp = - static_cast(pointer_offset(aligned_dst, i)); + auto* dp = static_cast(pointer_offset(aligned_dst, i)); auto* sp = static_cast(pointer_offset(aligned_src, i)); *dp = *sp; @@ -541,8 +536,7 @@ namespace snmalloc { i -= sizeof(Ptr2); auto* dp = static_cast(pointer_offset(aligned_dst, i)); - auto* sp = - static_cast(pointer_offset(aligned_src, i)); + auto* sp = static_cast(pointer_offset(aligned_src, i)); *dp = *sp; } @@ -550,8 +544,7 @@ namespace snmalloc if (i >= sizeof(void*)) { i -= sizeof(void*); - auto* dp = - static_cast(pointer_offset(aligned_dst, i)); + auto* dp = static_cast(pointer_offset(aligned_dst, i)); auto* sp = static_cast(pointer_offset(aligned_src, i)); *dp = *sp; @@ -659,9 +652,7 @@ namespace snmalloc { size_t offset = len - remainder; block_copy<1>( - pointer_offset(dst, offset), - pointer_offset(src, offset), - remainder); + pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; } @@ -732,9 +723,7 @@ namespace snmalloc { size_t offset = len - remainder; block_copy<1>( - pointer_offset(dst, offset), - pointer_offset(src, offset), - remainder); + pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; } @@ -809,10 +798,7 @@ namespace snmalloc return dst; return check_bound<(Checked && ReadsChecked)>( - src, - len, - "memmove with source out of bounds of heap allocation", - [&]() { + src, len, "memmove with source out of bounds of heap allocation", [&]() { return check_bound( dst, len, diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc index 6eae3896a..728857635 100644 --- a/src/test/func/memcpy/func-memcpy.cc +++ b/src/test/func/memcpy/func-memcpy.cc @@ -125,11 +125,7 @@ void check_overlaps(size_t size) auto* ref = static_cast(my_malloc(bufsize)); for (size_t overlap_amount : - {size_t(1), - size / 4 + 1, - size / 2 + 1, - size - (size > 1 ? 1 : 0), - size}) + {size_t(1), size / 4 + 1, size / 2 + 1, size - (size > 1 ? 1 : 0), size}) { if (overlap_amount > size || overlap_amount == 0) continue; @@ -149,9 +145,7 @@ void check_overlaps(size_t size) ref[i] = buf[i]; void* ret = my_memmove(buf + offset, buf, copy_len); - EXPECT( - ret == buf + offset, - "Forward memmove return value should be dst"); + EXPECT(ret == buf + offset, "Forward memmove return value should be dst"); for (size_t i = 0; i < copy_len; ++i) { EXPECT( @@ -175,8 +169,7 @@ void check_overlaps(size_t size) ref[i] = buf[offset + i]; void* ret = my_memmove(buf, buf + offset, copy_len); - EXPECT( - ret == buf, "Backward memmove return value should be dst"); + EXPECT(ret == buf, "Backward memmove return value should be dst"); for (size_t i = 0; i < copy_len; ++i) { EXPECT( @@ -309,9 +302,9 @@ void check_alignment_boundary_overlaps() // Sizes near Arch path thresholds static const size_t boundary_sizes[] = { - 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, 32, - 33, 48, 63, 64, 65, 127, 128, 129, 255, 256, 257, 511, - 512, 513, 768, 1023, 1024, 1025, 2048, 4096}; + 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, + 32, 33, 48, 63, 64, 65, 127, 128, 129, 255, 256, + 257, 511, 512, 513, 768, 1023, 1024, 1025, 2048, 4096}; for (auto size : boundary_sizes) { @@ -448,10 +441,7 @@ void check_direct_memmove(size_t size) for (size_t i = 0; i < size; ++i) { EXPECT( - buf[i] == ref[i], - "Direct bwd mismatch at size {}, idx {}", - size, - i); + buf[i] == ref[i], "Direct bwd mismatch at size {}, idx {}", size, i); } } @@ -462,9 +452,7 @@ void check_direct_memmove(size_t size) void check_memmove_bounds(size_t size, size_t out_of_bounds) { START_TEST( - "memmove bounds, size {}, {} bytes out of bounds", - size, - out_of_bounds); + "memmove bounds, size {}, {} bytes out of bounds", size, out_of_bounds); auto* s = static_cast(my_malloc(size)); auto* d = static_cast(my_malloc(size)); for (size_t i = 0; i < size; ++i) From d9b22aace5483b0ea6548342a628dfba842c26ed Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 3 Mar 2026 23:27:04 +0000 Subject: [PATCH 3/5] Fix ASan memcpy-param-overlap in memmove paths Add copy_one_move that always uses struct-copy instead of __builtin_memcpy_inline, which ASan treats as memcpy and flags overlapping src/dst as an error. Use it in block_reverse_copy and a new block_copy_move for all forward_move/move overlap paths. --- src/snmalloc/global/memcpy.h | 64 ++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h index f90cd00ec..f2453ba15 100644 --- a/src/snmalloc/global/memcpy.h +++ b/src/snmalloc/global/memcpy.h @@ -29,6 +29,26 @@ namespace snmalloc #endif } + /** + * Copy a single element where source and destination may overlap. + * Unlike copy_one, this always uses the struct-copy path because + * __builtin_memcpy_inline is treated as memcpy by sanitizers and + * will flag overlapping src/dst as an error. The struct copy is + * semantically safe for overlap (load into temporary, then store). + */ + template + SNMALLOC_FAST_PATH_INLINE void copy_one_move(void* dst, const void* src) + { + struct Block + { + char data[Size]; + }; + + auto* d = static_cast(dst); + auto* s = static_cast(src); + *d = *s; + } + /** * Copy a block using the specified size. This copies as many complete * chunks of size `Size` as are possible from `len`. @@ -43,10 +63,25 @@ namespace snmalloc } } + /** + * Copy a block where source and destination may overlap, using the + * overlap-safe copy_one_move. + */ + template + SNMALLOC_FAST_PATH_INLINE void + block_copy_move(void* dst, const void* src, size_t len) + { + for (size_t i = 0; (i + Size) <= len; i += Size) + { + copy_one_move(pointer_offset(dst, i), pointer_offset(src, i)); + } + } + /** * Reverse-copy a block using the specified chunk size. Copies as many * complete chunks of `Size` bytes as possible from end to start. * After the loop, bytes [0, len % Size) remain uncopied. + * Uses copy_one_move because source and destination overlap. */ template SNMALLOC_FAST_PATH_INLINE void @@ -56,7 +91,7 @@ namespace snmalloc while (i >= Size) { i -= Size; - copy_one(pointer_offset(dst, i), pointer_offset(src, i)); + copy_one_move(pointer_offset(dst, i), pointer_offset(src, i)); } } @@ -197,18 +232,19 @@ namespace snmalloc /** * Forward copy for overlapping memmove where dst < src. - * Uses block_copy without copy_end to avoid re-reading overwritten - * bytes. Caller guarantees len > 0 and buffers overlap. + * Uses block_copy_move (overlap-safe) without copy_end to avoid + * re-reading overwritten bytes. + * Caller guarantees len > 0 and buffers overlap. */ static void* forward_move(void* dst, const void* src, size_t len) { auto orig_dst = dst; - block_copy(dst, src, len); + block_copy_move(dst, src, len); size_t remainder = len % LargestRegisterSize; if (remainder > 0) { size_t offset = len - remainder; - block_copy<1>( + block_copy_move<1>( pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; @@ -376,7 +412,7 @@ namespace snmalloc (static_cast(-static_cast(address_cast(src))) & (alignof(void*) - 1))) { - block_copy<1>(dst, src, len); + block_copy_move<1>(dst, src, len); } /* Equally-misaligned: use pointer-width forward operations. */ else if ( @@ -392,7 +428,7 @@ namespace snmalloc /* Copy head sub-pointer bytes forward (byte-by-byte). */ if (head_pad > 0) - block_copy<1>(dst, src, head_pad); + block_copy_move<1>(dst, src, head_pad); /* Forward copy aligned middle using pointer-pair operations. */ if (aligned_len > 0) @@ -428,7 +464,7 @@ namespace snmalloc if (tail_pad > 0) { size_t tail_off = len - tail_pad; - block_copy<1>( + block_copy_move<1>( pointer_offset(dst, tail_off), pointer_offset(src, tail_off), tail_pad); @@ -437,12 +473,12 @@ namespace snmalloc /* Differently misaligned: integer-only forward copy is safe. */ else { - block_copy(dst, src, len); + block_copy_move(dst, src, len); size_t remainder = len % LargestRegisterSize; if (remainder > 0) { size_t offset = len - remainder; - block_copy<1>( + block_copy_move<1>( pointer_offset(dst, offset), pointer_offset(src, offset), remainder); @@ -646,12 +682,12 @@ namespace snmalloc static void* forward_move(void* dst, const void* src, size_t len) { auto orig_dst = dst; - block_copy(dst, src, len); + block_copy_move(dst, src, len); size_t remainder = len % LargestRegisterSize; if (remainder > 0) { size_t offset = len - remainder; - block_copy<1>( + block_copy_move<1>( pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; @@ -717,12 +753,12 @@ namespace snmalloc static void* forward_move(void* dst, const void* src, size_t len) { auto orig_dst = dst; - block_copy(dst, src, len); + block_copy_move(dst, src, len); size_t remainder = len % LargestRegisterSize; if (remainder > 0) { size_t offset = len - remainder; - block_copy<1>( + block_copy_move<1>( pointer_offset(dst, offset), pointer_offset(src, offset), remainder); } return orig_dst; From 942d7055a1db018e9ac6101f13ca51eebdf45118 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 3 Mar 2026 23:46:48 +0000 Subject: [PATCH 4/5] Separate memmove tests from memcpy tests Move all memmove tests into a new src/test/func/memmove/ directory so they build as func-memmove-fast and func-memmove-check, separate from func-memcpy-fast and func-memcpy-check. This makes it clear which test is failing when CI reports errors. --- src/test/func/memcpy/func-memcpy.cc | 400 ---------------------- src/test/func/memmove/func-memmove.cc | 473 ++++++++++++++++++++++++++ 2 files changed, 473 insertions(+), 400 deletions(-) create mode 100644 src/test/func/memmove/func-memmove.cc diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc index 728857635..eda741c83 100644 --- a/src/test/func/memcpy/func-memcpy.cc +++ b/src/test/func/memcpy/func-memcpy.cc @@ -109,380 +109,6 @@ void check_size(size_t size) my_free(d); } -/** - * Check that memmove works correctly for overlapping regions. - * Allocates a buffer, fills it with a pattern, and tests memmove - * with various overlap scenarios. - */ -void check_overlaps(size_t size) -{ - START_TEST("checking {}-byte memmove overlaps", size); - - // We need a buffer large enough for the overlap tests. - // Use 2*size so we can slide src/dst within it. - size_t bufsize = 2 * size + 1; - auto* buf = static_cast(my_malloc(bufsize)); - auto* ref = static_cast(my_malloc(bufsize)); - - for (size_t overlap_amount : - {size_t(1), size / 4 + 1, size / 2 + 1, size - (size > 1 ? 1 : 0), size}) - { - if (overlap_amount > size || overlap_amount == 0) - continue; - - size_t copy_len = size; - size_t offset = copy_len - overlap_amount; - - // Forward overlap: dst > src, overlap by overlap_amount bytes - // src = buf, dst = buf + offset - if (offset > 0 && offset + copy_len <= bufsize) - { - // Fill buffer with pattern - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast(i & 0xff); - // Save reference copy of source before move - for (size_t i = 0; i < copy_len; ++i) - ref[i] = buf[i]; - - void* ret = my_memmove(buf + offset, buf, copy_len); - EXPECT(ret == buf + offset, "Forward memmove return value should be dst"); - for (size_t i = 0; i < copy_len; ++i) - { - EXPECT( - buf[offset + i] == ref[i], - "Forward memmove mismatch at index {}, size {}, overlap {}", - i, - size, - overlap_amount); - } - } - - // Backward overlap: dst < src, overlap by overlap_amount bytes - // src = buf + offset, dst = buf - if (offset > 0 && offset + copy_len <= bufsize) - { - // Re-fill buffer with pattern - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast(i & 0xff); - // Save reference of source (which starts at offset) - for (size_t i = 0; i < copy_len; ++i) - ref[i] = buf[offset + i]; - - void* ret = my_memmove(buf, buf + offset, copy_len); - EXPECT(ret == buf, "Backward memmove return value should be dst"); - for (size_t i = 0; i < copy_len; ++i) - { - EXPECT( - buf[i] == ref[i], - "Backward memmove mismatch at index {}, size {}, overlap {}", - i, - size, - overlap_amount); - } - } - } - - // Non-overlapping memmove should work like memcpy - { - auto* s = static_cast(my_malloc(size + 1)); - auto* d = static_cast(my_malloc(size + 1)); - for (size_t i = 0; i < size; ++i) - s[i] = static_cast(i & 0xff); - for (size_t i = 0; i < size; ++i) - d[i] = 0; - - void* ret = my_memmove(d, s, size); - EXPECT(ret == d, "Non-overlapping memmove return value should be dst"); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - d[i] == static_cast(i & 0xff), - "Non-overlapping memmove mismatch at {}", - i); - } - my_free(s); - my_free(d); - } - - // Zero-length and dst==src edge cases - { - auto* p = static_cast(my_malloc(size + 1)); - for (size_t i = 0; i < size; ++i) - p[i] = static_cast(i & 0xff); - - // Zero-length should be a no-op - void* ret = my_memmove(p, p + 1, 0); - EXPECT(ret == p, "Zero-length memmove should return dst"); - - // dst == src should be a no-op - if (size > 0) - { - ret = my_memmove(p, p, size); - EXPECT(ret == p, "dst==src memmove should return dst"); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - p[i] == static_cast(i & 0xff), - "dst==src memmove should not corrupt data"); - } - } - my_free(p); - } - - my_free(buf); - my_free(ref); -} - -/** - * Exhaustive overlap test: for a given buffer size, test memmove for - * every possible (offset, copy_len) combination within the buffer. - * This catches subtle off-by-one and alignment issues. - */ -void check_exhaustive_overlaps(size_t bufsize) -{ - START_TEST("exhaustive memmove overlaps for bufsize {}", bufsize); - - auto* buf = static_cast(my_malloc(bufsize)); - auto* ref = static_cast(my_malloc(bufsize)); - - for (size_t offset = 1; offset < bufsize; offset++) - { - for (size_t copy_len = 1; copy_len + offset <= bufsize; copy_len++) - { - // Forward overlap test: dst > src - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 7 + 13) & 0xff); - for (size_t i = 0; i < copy_len; ++i) - ref[i] = buf[i]; - - my_memmove(buf + offset, buf, copy_len); - for (size_t i = 0; i < copy_len; ++i) - { - EXPECT( - buf[offset + i] == ref[i], - "Exhaustive fwd mismatch: bufsize {}, offset {}, len {}, idx {}", - bufsize, - offset, - copy_len, - i); - } - - // Backward overlap test: dst < src - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 7 + 13) & 0xff); - for (size_t i = 0; i < copy_len; ++i) - ref[i] = buf[offset + i]; - - my_memmove(buf, buf + offset, copy_len); - for (size_t i = 0; i < copy_len; ++i) - { - EXPECT( - buf[i] == ref[i], - "Exhaustive bwd mismatch: bufsize {}, offset {}, len {}, idx {}", - bufsize, - offset, - copy_len, - i); - } - } - } - - my_free(buf); - my_free(ref); -} - -/** - * Test memmove at alignment boundary sizes. These sizes are chosen to - * exercise transitions between different code paths in the Arch - * implementations (small_copies, block_copy, rep movsb thresholds). - */ -void check_alignment_boundary_overlaps() -{ - START_TEST("memmove alignment boundary tests"); - - // Sizes near Arch path thresholds - static const size_t boundary_sizes[] = { - 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, - 32, 33, 48, 63, 64, 65, 127, 128, 129, 255, 256, - 257, 511, 512, 513, 768, 1023, 1024, 1025, 2048, 4096}; - - for (auto size : boundary_sizes) - { - size_t bufsize = 2 * size + 64; - auto* buf = static_cast(my_malloc(bufsize)); - auto* ref = static_cast(my_malloc(bufsize)); - - // Test with 1-byte offset (maximum overlap) - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 11 + 3) & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[i]; - my_memmove(buf + 1, buf, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[1 + i] == ref[i], - "Boundary fwd+1 mismatch at size {}, idx {}", - size, - i); - } - - // Test with 1-byte offset backward - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 11 + 3) & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[1 + i]; - my_memmove(buf, buf + 1, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[i] == ref[i], - "Boundary bwd+1 mismatch at size {}, idx {}", - size, - i); - } - - // Test with pointer-sized offset (alignof(void*)) - size_t ptr_off = sizeof(void*); - if (ptr_off < size) - { - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 11 + 3) & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[i]; - my_memmove(buf + ptr_off, buf, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[ptr_off + i] == ref[i], - "Boundary fwd+ptr mismatch at size {}, idx {}", - size, - i); - } - - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 11 + 3) & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[ptr_off + i]; - my_memmove(buf, buf + ptr_off, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[i] == ref[i], - "Boundary bwd+ptr mismatch at size {}, idx {}", - size, - i); - } - } - - // Test with misaligned start (offset 3 from allocation start) - if (size + 3 + size <= bufsize) - { - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast((i * 11 + 3) & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[3 + i]; - my_memmove(buf + 3 + 1, buf + 3, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[3 + 1 + i] == ref[i], - "Boundary misaligned fwd mismatch at size {}, idx {}", - size, - i); - } - } - - my_free(buf); - my_free(ref); - } -} - -/** - * Test memmove via direct snmalloc::memmove (unchecked) to exercise - * arch-specific code paths without bounds checking interference. - */ -void check_direct_memmove(size_t size) -{ - START_TEST("direct snmalloc::memmove for size {}", size); - - size_t bufsize = 2 * size + 1; - auto* buf = static_cast(my_malloc(bufsize)); - auto* ref = static_cast(my_malloc(bufsize)); - - // Forward overlap (dst > src) with offset 1 - if (size > 0) - { - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast(i & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[i]; - - snmalloc::memmove(buf + 1, buf, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[1 + i] == ref[i], - "Direct fwd mismatch at size {}, idx {}", - size, - i); - } - } - - // Reverse overlap (dst < src) with offset 1 - if (size > 0) - { - for (size_t i = 0; i < bufsize; ++i) - buf[i] = static_cast(i & 0xff); - for (size_t i = 0; i < size; ++i) - ref[i] = buf[1 + i]; - - snmalloc::memmove(buf, buf + 1, size); - for (size_t i = 0; i < size; ++i) - { - EXPECT( - buf[i] == ref[i], "Direct bwd mismatch at size {}, idx {}", size, i); - } - } - - my_free(buf); - my_free(ref); -} - -void check_memmove_bounds(size_t size, size_t out_of_bounds) -{ - START_TEST( - "memmove bounds, size {}, {} bytes out of bounds", size, out_of_bounds); - auto* s = static_cast(my_malloc(size)); - auto* d = static_cast(my_malloc(size)); - for (size_t i = 0; i < size; ++i) - { - s[i] = static_cast(i); - } - for (size_t i = 0; i < size; ++i) - { - d[i] = 0; - } - bool bounds_error = false; - can_longjmp = true; - if (setjmp(jmp) == 0) - { - my_memmove(d, s, size + out_of_bounds); - } - else - { - bounds_error = true; - } - can_longjmp = false; - EXPECT( - bounds_error == (out_of_bounds > 0), - "bounds error: {}, out_of_bounds: {}", - bounds_error, - out_of_bounds); - my_free(s); - my_free(d); -} - void check_bounds(size_t size, size_t out_of_bounds) { START_TEST( @@ -539,35 +165,9 @@ int main() // Check one object out of bounds check_bounds(sz, sz); } - for (auto sz : sizes) - { - // Check in bounds - check_memmove_bounds(sz, 0); - // Check one byte out - check_memmove_bounds(sz, 1); - // Check one object out of bounds - check_memmove_bounds(sz, sz); - } for (size_t x = 0; x < 2048; x++) { check_size(x); } - // Test memmove with various sizes covering all Arch path thresholds - for (size_t x = 1; x < 2048; x++) - { - check_overlaps(x); - } - // Exhaustive overlap testing for small sizes (every offset * length combo) - for (size_t x = 2; x <= 64; x++) - { - check_exhaustive_overlaps(x); - } - // Alignment boundary tests - check_alignment_boundary_overlaps(); - // Direct snmalloc::memmove tests (unchecked path) - for (size_t x = 0; x < 2048; x++) - { - check_direct_memmove(x); - } } #endif \ No newline at end of file diff --git a/src/test/func/memmove/func-memmove.cc b/src/test/func/memmove/func-memmove.cc new file mode 100644 index 000000000..9da4e2d02 --- /dev/null +++ b/src/test/func/memmove/func-memmove.cc @@ -0,0 +1,473 @@ +// Windows doesn't like changing the linkage spec of abort. +#if defined(_MSC_VER) +int main() +{ + return 0; +} +#else +// QEMU user mode does not support the code that generates backtraces and so we +// also need to skip this test if we are doing a debug build and targeting +// QEMU. +# if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_BACKTRACE_HEADER) +# undef SNMALLOC_BACKTRACE_HEADER +# endif +# ifdef SNMALLOC_STATIC_LIBRARY_PREFIX +# undef SNMALLOC_STATIC_LIBRARY_PREFIX +# endif +# ifdef SNMALLOC_FAIL_FAST +# undef SNMALLOC_FAIL_FAST +# endif +# define SNMALLOC_FAIL_FAST false +# define SNMALLOC_STATIC_LIBRARY_PREFIX my_ +# include "snmalloc/override/malloc.cc" +# include "snmalloc/override/memcpy.cc" +# include "test/helpers.h" + +# include +# include +# include +# include +# include +# include + +using namespace snmalloc; + +/** + * Jump buffer used to jump out of `abort()` for recoverable errors. + */ +static std::jmp_buf jmp; + +/** + * Flag indicating whether `jmp` is valid. If this is set then calls to + * `abort` will jump to the jump buffer, rather than exiting. + */ +static bool can_longjmp; + +/** + * Replacement for the C standard `abort` that returns to the `setjmp` call for + * recoverable errors. + */ +extern "C" void abort() +{ + if (can_longjmp) + { + longjmp(jmp, 1); + } +# if __has_builtin(__builtin_trap) + __builtin_trap(); +# endif + exit(-1); +} + +/** + * Check that memmove works correctly for overlapping regions. + * Allocates a buffer, fills it with a pattern, and tests memmove + * with various overlap scenarios. + */ +void check_overlaps(size_t size) +{ + START_TEST("checking {}-byte memmove overlaps", size); + + // We need a buffer large enough for the overlap tests. + // Use 2*size so we can slide src/dst within it. + size_t bufsize = 2 * size + 1; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + for (size_t overlap_amount : + {size_t(1), size / 4 + 1, size / 2 + 1, size - (size > 1 ? 1 : 0), size}) + { + if (overlap_amount > size || overlap_amount == 0) + continue; + + size_t copy_len = size; + size_t offset = copy_len - overlap_amount; + + // Forward overlap: dst > src, overlap by overlap_amount bytes + // src = buf, dst = buf + offset + if (offset > 0 && offset + copy_len <= bufsize) + { + // Fill buffer with pattern + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + // Save reference copy of source before move + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[i]; + + void* ret = my_memmove(buf + offset, buf, copy_len); + EXPECT(ret == buf + offset, "Forward memmove return value should be dst"); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[offset + i] == ref[i], + "Forward memmove mismatch at index {}, size {}, overlap {}", + i, + size, + overlap_amount); + } + } + + // Backward overlap: dst < src, overlap by overlap_amount bytes + // src = buf + offset, dst = buf + if (offset > 0 && offset + copy_len <= bufsize) + { + // Re-fill buffer with pattern + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + // Save reference of source (which starts at offset) + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[offset + i]; + + void* ret = my_memmove(buf, buf + offset, copy_len); + EXPECT(ret == buf, "Backward memmove return value should be dst"); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[i] == ref[i], + "Backward memmove mismatch at index {}, size {}, overlap {}", + i, + size, + overlap_amount); + } + } + } + + // Non-overlapping memmove should work like memcpy + { + auto* s = static_cast(my_malloc(size + 1)); + auto* d = static_cast(my_malloc(size + 1)); + for (size_t i = 0; i < size; ++i) + s[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + d[i] = 0; + + void* ret = my_memmove(d, s, size); + EXPECT(ret == d, "Non-overlapping memmove return value should be dst"); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + d[i] == static_cast(i & 0xff), + "Non-overlapping memmove mismatch at {}", + i); + } + my_free(s); + my_free(d); + } + + // Zero-length and dst==src edge cases + { + auto* p = static_cast(my_malloc(size + 1)); + for (size_t i = 0; i < size; ++i) + p[i] = static_cast(i & 0xff); + + // Zero-length should be a no-op + void* ret = my_memmove(p, p + 1, 0); + EXPECT(ret == p, "Zero-length memmove should return dst"); + + // dst == src should be a no-op + if (size > 0) + { + ret = my_memmove(p, p, size); + EXPECT(ret == p, "dst==src memmove should return dst"); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + p[i] == static_cast(i & 0xff), + "dst==src memmove should not corrupt data"); + } + } + my_free(p); + } + + my_free(buf); + my_free(ref); +} + +/** + * Exhaustive overlap test: for a given buffer size, test memmove for + * every possible (offset, copy_len) combination within the buffer. + * This catches subtle off-by-one and alignment issues. + */ +void check_exhaustive_overlaps(size_t bufsize) +{ + START_TEST("exhaustive memmove overlaps for bufsize {}", bufsize); + + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + for (size_t offset = 1; offset < bufsize; offset++) + { + for (size_t copy_len = 1; copy_len + offset <= bufsize; copy_len++) + { + // Forward overlap test: dst > src + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 7 + 13) & 0xff); + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[i]; + + my_memmove(buf + offset, buf, copy_len); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[offset + i] == ref[i], + "Exhaustive fwd mismatch: bufsize {}, offset {}, len {}, idx {}", + bufsize, + offset, + copy_len, + i); + } + + // Backward overlap test: dst < src + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 7 + 13) & 0xff); + for (size_t i = 0; i < copy_len; ++i) + ref[i] = buf[offset + i]; + + my_memmove(buf, buf + offset, copy_len); + for (size_t i = 0; i < copy_len; ++i) + { + EXPECT( + buf[i] == ref[i], + "Exhaustive bwd mismatch: bufsize {}, offset {}, len {}, idx {}", + bufsize, + offset, + copy_len, + i); + } + } + } + + my_free(buf); + my_free(ref); +} + +/** + * Test memmove at alignment boundary sizes. These sizes are chosen to + * exercise transitions between different code paths in the Arch + * implementations (small_copies, block_copy, rep movsb thresholds). + */ +void check_alignment_boundary_overlaps() +{ + START_TEST("memmove alignment boundary tests"); + + // Sizes near Arch path thresholds + static const size_t boundary_sizes[] = { + 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, + 32, 33, 48, 63, 64, 65, 127, 128, 129, 255, 256, + 257, 511, 512, 513, 768, 1023, 1024, 1025, 2048, 4096}; + + for (auto size : boundary_sizes) + { + size_t bufsize = 2 * size + 64; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + // Test with 1-byte offset (maximum overlap) + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + my_memmove(buf + 1, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[1 + i] == ref[i], + "Boundary fwd+1 mismatch at size {}, idx {}", + size, + i); + } + + // Test with 1-byte offset backward + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[1 + i]; + my_memmove(buf, buf + 1, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], + "Boundary bwd+1 mismatch at size {}, idx {}", + size, + i); + } + + // Test with pointer-sized offset (alignof(void*)) + size_t ptr_off = sizeof(void*); + if (ptr_off < size) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + my_memmove(buf + ptr_off, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[ptr_off + i] == ref[i], + "Boundary fwd+ptr mismatch at size {}, idx {}", + size, + i); + } + + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[ptr_off + i]; + my_memmove(buf, buf + ptr_off, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], + "Boundary bwd+ptr mismatch at size {}, idx {}", + size, + i); + } + } + + // Test with misaligned start (offset 3 from allocation start) + if (size + 3 + size <= bufsize) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast((i * 11 + 3) & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[3 + i]; + my_memmove(buf + 3 + 1, buf + 3, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[3 + 1 + i] == ref[i], + "Boundary misaligned fwd mismatch at size {}, idx {}", + size, + i); + } + } + + my_free(buf); + my_free(ref); + } +} + +/** + * Test memmove via direct snmalloc::memmove (unchecked) to exercise + * arch-specific code paths without bounds checking interference. + */ +void check_direct_memmove(size_t size) +{ + START_TEST("direct snmalloc::memmove for size {}", size); + + size_t bufsize = 2 * size + 1; + auto* buf = static_cast(my_malloc(bufsize)); + auto* ref = static_cast(my_malloc(bufsize)); + + // Forward overlap (dst > src) with offset 1 + if (size > 0) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[i]; + + snmalloc::memmove(buf + 1, buf, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[1 + i] == ref[i], + "Direct fwd mismatch at size {}, idx {}", + size, + i); + } + } + + // Reverse overlap (dst < src) with offset 1 + if (size > 0) + { + for (size_t i = 0; i < bufsize; ++i) + buf[i] = static_cast(i & 0xff); + for (size_t i = 0; i < size; ++i) + ref[i] = buf[1 + i]; + + snmalloc::memmove(buf, buf + 1, size); + for (size_t i = 0; i < size; ++i) + { + EXPECT( + buf[i] == ref[i], "Direct bwd mismatch at size {}, idx {}", size, i); + } + } + + my_free(buf); + my_free(ref); +} + +void check_memmove_bounds(size_t size, size_t out_of_bounds) +{ + START_TEST( + "memmove bounds, size {}, {} bytes out of bounds", size, out_of_bounds); + auto* s = static_cast(my_malloc(size)); + auto* d = static_cast(my_malloc(size)); + for (size_t i = 0; i < size; ++i) + { + s[i] = static_cast(i); + } + for (size_t i = 0; i < size; ++i) + { + d[i] = 0; + } + bool bounds_error = false; + can_longjmp = true; + if (setjmp(jmp) == 0) + { + my_memmove(d, s, size + out_of_bounds); + } + else + { + bounds_error = true; + } + can_longjmp = false; + EXPECT( + bounds_error == (out_of_bounds > 0), + "bounds error: {}, out_of_bounds: {}", + bounds_error, + out_of_bounds); + my_free(s); + my_free(d); +} + +int main() +{ + static constexpr size_t min_class_size = + sizeclass_to_size(size_to_sizeclass(MIN_ALLOC_SIZE)); + + std::initializer_list sizes = {min_class_size, 1024, 2 * 1024 * 1024}; + static_assert( + MIN_ALLOC_SIZE < 1024, + "Can't detect overflow except at sizeclass boundaries"); + + for (auto sz : sizes) + { + // Check in bounds + check_memmove_bounds(sz, 0); + // Check one byte out + check_memmove_bounds(sz, 1); + // Check one object out of bounds + check_memmove_bounds(sz, sz); + } + // Test memmove with various sizes covering all Arch path thresholds + for (size_t x = 1; x < 2048; x++) + { + check_overlaps(x); + } + // Exhaustive overlap testing for small sizes (every offset * length combo) + for (size_t x = 2; x <= 64; x++) + { + check_exhaustive_overlaps(x); + } + // Alignment boundary tests + check_alignment_boundary_overlaps(); + // Direct snmalloc::memmove tests (unchecked path) + for (size_t x = 0; x < 2048; x++) + { + check_direct_memmove(x); + } +} +#endif From 5fca460f211f9f4ce9fa2b0f778560e1a2e411d7 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 4 Mar 2026 00:05:35 +0000 Subject: [PATCH 5/5] Use __builtin_memmove in copy_one_move to fix ASan overlap error The struct copy (*d = *s) in copy_one_move was being lowered by the compiler into a memcpy call, which ASan then flagged as memcpy-param-overlap for memmove's overlapping buffers. Switch to __builtin_memmove which correctly handles overlap and still optimizes to register-width loads/stores. Add a byte-by-byte fallback for compilers lacking __builtin_memmove. --- src/snmalloc/global/memcpy.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h index f2453ba15..56496b31e 100644 --- a/src/snmalloc/global/memcpy.h +++ b/src/snmalloc/global/memcpy.h @@ -31,22 +31,25 @@ namespace snmalloc /** * Copy a single element where source and destination may overlap. - * Unlike copy_one, this always uses the struct-copy path because - * __builtin_memcpy_inline is treated as memcpy by sanitizers and - * will flag overlapping src/dst as an error. The struct copy is - * semantically safe for overlap (load into temporary, then store). + * Uses __builtin_memmove which the compiler can optimize to register-width + * loads/stores while correctly handling overlap. We cannot use + * __builtin_memcpy_inline (ASan treats it as memcpy and flags overlap) + * or struct copy (compiler lowers *d = *s to a memcpy call, same problem). */ template SNMALLOC_FAST_PATH_INLINE void copy_one_move(void* dst, const void* src) { - struct Block - { - char data[Size]; - }; - - auto* d = static_cast(dst); - auto* s = static_cast(src); - *d = *s; +#if __has_builtin(__builtin_memmove) + __builtin_memmove(dst, src, Size); +#else + // Fallback: byte-by-byte copy through a temporary buffer to avoid + // the compiler generating a memcpy call for struct assignment. + char tmp[Size]; + for (size_t i = 0; i < Size; ++i) + tmp[i] = static_cast(src)[i]; + for (size_t i = 0; i < Size; ++i) + static_cast(dst)[i] = tmp[i]; +#endif } /**