From e1680755785f3f68c7573df7f1416299145d566a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 12 May 2026 15:52:27 -0400 Subject: [PATCH 1/4] Add boost::hash_combine style hashing --- include/boost/int128.hpp | 1 + include/boost/int128/hash.hpp | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 include/boost/int128/hash.hpp diff --git a/include/boost/int128.hpp b/include/boost/int128.hpp index d55e4166..7d996bd7 100644 --- a/include/boost/int128.hpp +++ b/include/boost/int128.hpp @@ -16,5 +16,6 @@ #include #include #include +#include #endif // BOOST_INT128_HPP diff --git a/include/boost/int128/hash.hpp b/include/boost/int128/hash.hpp new file mode 100644 index 00000000..403dbce9 --- /dev/null +++ b/include/boost/int128/hash.hpp @@ -0,0 +1,48 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_INT128_HASH_HPP +#define BOOST_INT128_HASH_HPP + +#include + +#ifndef BOOST_INT128_BUILD_MODULE + +#include +#include +#include + +#endif + +namespace std { + +template <> +struct hash +{ + auto operator()(const boost::int128::int128_t v) const noexcept -> std::size_t + { + const std::size_t low_hash {std::hash{}(v.low)}; + const std::size_t high_hash {std::hash{}(v.high)}; + + // boost::hash_combine style mixing + return low_hash ^ (high_hash + static_cast(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2)); + } +}; + +template <> +struct hash +{ + auto operator()(const boost::int128::uint128_t v) const noexcept -> std::size_t + { + const std::size_t low_hash {std::hash{}(v.low)}; + const std::size_t high_hash {std::hash{}(v.high)}; + + // boost::hash_combine style mixing + return low_hash ^ (high_hash + static_cast(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2)); + } +}; + +} // namespace std + +#endif // BOOST_INT128_HASH_HPP From 11851a5ae758007a541868bdb3ed0b67a7917045 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 12 May 2026 15:52:35 -0400 Subject: [PATCH 2/4] Add testing of hashing --- test/Jamfile | 2 + test/compile_tests/hash_compile.cpp | 10 ++ test/test_hash.cpp | 200 ++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 test/compile_tests/hash_compile.cpp create mode 100644 test/test_hash.cpp diff --git a/test/Jamfile b/test/Jamfile index 357c0b7c..e41a70f1 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -89,6 +89,7 @@ run test_div.cpp ; run test_num_digits.cpp ; run test_spaceship_operator.cpp ; run test_to_string.cpp ; +run test_hash.cpp ; # Make sure we run the examples as well run ../examples/construction.cpp ; @@ -123,6 +124,7 @@ compile compile_tests/charconv_compile.cpp ; compile compile_tests/climits_compile.cpp ; compile compile_tests/cstdlib_compile.cpp ; compile compile_tests/format_compile.cpp ; +compile compile_tests/hash_compile.cpp ; compile compile_tests/int128_compile.cpp ; compile compile_tests/iostream_compile.cpp ; compile compile_tests/limits_compile.cpp ; diff --git a/test/compile_tests/hash_compile.cpp b/test/compile_tests/hash_compile.cpp new file mode 100644 index 00000000..bf248891 --- /dev/null +++ b/test/compile_tests/hash_compile.cpp @@ -0,0 +1,10 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +int main() +{ + return 0; +} diff --git a/test/test_hash.cpp b/test/test_hash.cpp new file mode 100644 index 00000000..a3b54c5e --- /dev/null +++ b/test/test_hash.cpp @@ -0,0 +1,200 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#ifndef BOOST_INT128_BUILD_MODULE + +#include +#include + +#else + +import boost.int128; + +#endif + +#include +#include +#include +#include +#include + +void test_uint128_equivalent_hashes() +{ + using boost::int128::uint128_t; + + std::hash hasher {}; + + // Same value built different ways must hash identically + const uint128_t a {UINT64_C(0xDEADBEEF), UINT64_C(0xCAFEBABE)}; + const uint128_t b {UINT64_C(0xDEADBEEF), UINT64_C(0xCAFEBABE)}; + BOOST_TEST_EQ(hasher(a), hasher(b)); + + // Equality across assignment + uint128_t c {}; + c = a; + BOOST_TEST_EQ(hasher(a), hasher(c)); + + // Zero hashes to a stable value + const uint128_t zero1 {}; + const uint128_t zero2 {0}; + BOOST_TEST_EQ(hasher(zero1), hasher(zero2)); + + // Values constructed from a small integer match equivalent two-word form + const uint128_t small_a {42}; + const uint128_t small_b {0, 42}; + BOOST_TEST_EQ(hasher(small_a), hasher(small_b)); +} + +void test_int128_equivalent_hashes() +{ + using boost::int128::int128_t; + + std::hash hasher {}; + + const int128_t a {INT64_C(-1), UINT64_C(0xCAFEBABE)}; + const int128_t b {INT64_C(-1), UINT64_C(0xCAFEBABE)}; + BOOST_TEST_EQ(hasher(a), hasher(b)); + + int128_t c {}; + c = a; + BOOST_TEST_EQ(hasher(a), hasher(c)); + + const int128_t zero1 {}; + const int128_t zero2 {0}; + BOOST_TEST_EQ(hasher(zero1), hasher(zero2)); + + const int128_t pos_a {42}; + const int128_t pos_b {0, 42}; + BOOST_TEST_EQ(hasher(pos_a), hasher(pos_b)); +} + +void test_int128_negative_no_collision_with_absolute() +{ + using boost::int128::int128_t; + + std::hash hasher {}; + + // hash(-x) must not equal hash(x) for non-zero x + for (std::int64_t i {1}; i <= 1024; ++i) + { + const int128_t pos {i}; + const int128_t neg {-i}; + BOOST_TEST_NE(hasher(pos), hasher(neg)); + } + + // Larger magnitudes including values that span beyond 64 bits + const int128_t big_pos {INT64_C(0x0000000100000000), UINT64_C(0)}; + const int128_t big_neg {-big_pos}; + BOOST_TEST_NE(hasher(big_pos), hasher(big_neg)); + + // Values whose low word matches their negation's low word arithmetic (mod 2^64) are still distinct + const int128_t one {1}; + const int128_t minus_one {-1}; + BOOST_TEST_NE(hasher(one), hasher(minus_one)); + + // INT128_MIN and a value with the same low word but opposite high word + const int128_t edge_pos {1}; + const int128_t edge_neg {-1}; + BOOST_TEST_NE(hasher(edge_pos), hasher(edge_neg)); +} + +void test_hash_distribution_uint128() +{ + using boost::int128::uint128_t; + + std::hash hasher {}; + std::unordered_set seen {}; + + // A few thousand random values should produce nearly distinct hashes + std::mt19937_64 rng {42}; + constexpr int num_samples {4096}; + for (int i {0}; i < num_samples; ++i) + { + const uint128_t v {rng(), rng()}; + seen.insert(hasher(v)); + } + + // Allow some collisions (size_t may be 32-bit on some platforms) but require high uniqueness + BOOST_TEST_GT(seen.size(), static_cast(num_samples - 16)); +} + +void test_hash_distribution_int128() +{ + using boost::int128::int128_t; + + std::hash hasher {}; + std::unordered_set seen {}; + + std::mt19937_64 rng {123}; + constexpr int num_samples {4096}; + for (int i {0}; i < num_samples; ++i) + { + const int128_t v {static_cast(rng()), rng()}; + seen.insert(hasher(v)); + } + + BOOST_TEST_GT(seen.size(), static_cast(num_samples - 16)); +} + +void test_use_in_unordered_map_uint128() +{ + using boost::int128::uint128_t; + + std::unordered_map map {}; + map[uint128_t{0, 1}] = 1; + map[uint128_t{0, 2}] = 2; + map[uint128_t{1, 0}] = 3; + map[uint128_t{UINT64_MAX, UINT64_MAX}] = 4; + + BOOST_TEST_EQ((map[uint128_t{0, 1}]), 1); + BOOST_TEST_EQ((map[uint128_t{0, 2}]), 2); + BOOST_TEST_EQ((map[uint128_t{1, 0}]), 3); + BOOST_TEST_EQ((map[uint128_t{UINT64_MAX, UINT64_MAX}]), 4); + BOOST_TEST_EQ(map.size(), (std::size_t {4})); +} + +void test_use_in_unordered_map_int128() +{ + using boost::int128::int128_t; + + std::unordered_map map {}; + map[int128_t{0, 1}] = 1; + map[int128_t{-1, UINT64_MAX}] = 2; + map[int128_t{1, 0}] = 3; + map[int128_t{-1, 0}] = 4; + + BOOST_TEST_EQ((map[int128_t{0, 1}]), 1); + BOOST_TEST_EQ((map[int128_t{-1, UINT64_MAX}]), 2); + BOOST_TEST_EQ((map[int128_t{1, 0}]), 3); + BOOST_TEST_EQ((map[int128_t{-1, 0}]), 4); + BOOST_TEST_EQ(map.size(), (std::size_t {4})); +} + +void test_high_low_swap_not_colliding() +{ + using boost::int128::uint128_t; + + std::hash hasher {}; + + // hash({a, b}) should not equal hash({b, a}) in general + const uint128_t a {UINT64_C(0x1111), UINT64_C(0x2222)}; + const uint128_t b {UINT64_C(0x2222), UINT64_C(0x1111)}; + BOOST_TEST_NE(hasher(a), hasher(b)); +} + +int main() +{ + test_uint128_equivalent_hashes(); + test_int128_equivalent_hashes(); + test_int128_negative_no_collision_with_absolute(); + test_hash_distribution_uint128(); + test_hash_distribution_int128(); + test_use_in_unordered_map_uint128(); + test_use_in_unordered_map_int128(); + test_high_low_swap_not_colliding(); + + return boost::report_errors(); +} From a120020d0a3ffcc96f3b6a4e3d7f6aa582ed83ae Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 12 May 2026 16:10:26 -0400 Subject: [PATCH 3/4] Add hash docs --- doc/modules/ROOT/nav.adoc | 2 + doc/modules/ROOT/pages/api_reference.adoc | 23 ++++++++ doc/modules/ROOT/pages/file_structure.adoc | 3 + doc/modules/ROOT/pages/hash.adoc | 68 ++++++++++++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 doc/modules/ROOT/pages/hash.adoc diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index 892f4502..ceeee540 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -23,6 +23,7 @@ *** xref:api_reference.adoc#api_cstdlib[``] *** xref:api_reference.adoc#api_charconv[``] *** xref:api_reference.adoc#api_cmath[``] +*** xref:api_reference.adoc#api_functional[``] *** xref:api_reference.adoc#api_iostream[``] *** xref:api_reference.adoc#api_ios[``] *** xref:api_reference.adoc#api_numeric[``] @@ -54,6 +55,7 @@ * xref:cstdlib.adoc[] * xref:charconv.adoc[] * xref:stream.adoc[] +* xref:hash.adoc[] * xref:numeric.adoc[] * xref:string.adoc[] * xref:utilities.adoc[] diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc index faec48a9..2963d1a4 100644 --- a/doc/modules/ROOT/pages/api_reference.adoc +++ b/doc/modules/ROOT/pages/api_reference.adoc @@ -72,6 +72,12 @@ https://www.boost.org/LICENSE_1_0.txt | https://en.cppreference.com/w/cpp/types/numeric_limits[`std::numeric_limits`] | Numeric limits specialization for `int128_t` +| xref:hash.adoc[`std::hash`] +| Hash specialization for `uint128_t` + +| xref:hash.adoc[`std::hash`] +| Hash specialization for `int128_t` + | xref:cstdlib.adoc#div_structs[`u128div_t`] | Result type for `div(uint128_t, uint128_t)` @@ -164,6 +170,20 @@ Listed by analogous STL header. | Computes quotient and remainder simultaneously |=== +[#api_functional] +=== xref:hash.adoc[``] + +[cols="1,2", options="header"] +|=== +| Specialization | Description + +| xref:hash.adoc[`std::hash`] +| Enables `uint128_t` as a key in unordered associative containers + +| xref:hash.adoc[`std::hash`] +| Enables `int128_t` as a key in unordered associative containers +|=== + [#api_formatting] === xref:format.adoc[Formatting] @@ -367,6 +387,9 @@ Listed by analogous STL header. | xref:format.adoc#std_format[``] | Formatting integration for pass:[C++20] `` +| xref:hash.adoc[``] +| `std::hash` specializations for `int128_t` and `uint128_t` + | `` | The xref:uint128_t.adoc[`uint128_t`] and xref:int128_t.adoc[`int128_t`] types diff --git a/doc/modules/ROOT/pages/file_structure.adoc b/doc/modules/ROOT/pages/file_structure.adoc index 84c6765f..1abb1c4b 100644 --- a/doc/modules/ROOT/pages/file_structure.adoc +++ b/doc/modules/ROOT/pages/file_structure.adoc @@ -39,6 +39,9 @@ The entire library can be consumed via ``, or by independently | xref:format.adoc[``] | C++20 `std::format` support +| xref:hash.adoc[``] +| `std::hash` specializations for `int128_t` and `uint128_t` + | `` | Core type definitions (`uint128_t`, `int128_t`) diff --git a/doc/modules/ROOT/pages/hash.adoc b/doc/modules/ROOT/pages/hash.adoc new file mode 100644 index 00000000..cdc639f5 --- /dev/null +++ b/doc/modules/ROOT/pages/hash.adoc @@ -0,0 +1,68 @@ +//// +Copyright 2026 Matt Borland +Distributed under the Boost Software License, Version 1.0. +https://www.boost.org/LICENSE_1_0.txt +//// + +[#hash] += Hashing +:idprefix: hash_ + +The `` header provides specializations of `std::hash` for `uint128_t` and `int128_t`, allowing the library types to be used as keys in `std::unordered_map`, `std::unordered_set`, and any other container that relies on `std::hash`. + +[source, c++] +---- +#include +---- + +[#hash_specializations] +== Specializations + +[source, c++] +---- +namespace std { + +template <> +struct hash +{ + std::size_t operator()(boost::int128::int128_t v) const noexcept; +}; + +template <> +struct hash +{ + std::size_t operator()(boost::int128::uint128_t v) const noexcept; +}; + +} // namespace std +---- + +Both specializations hash the high and low 64-bit halves of the value individually and combine the two results using the same mixing function as `boost::hash_combine`. +The signed specialization hashes its high word as `std::int64_t`, which prevents a negative value from colliding with the positive value that shares the same low word. + +[#hash_guarantees] +== Guarantees + +* Two values comparing equal under `operator==` produce the same hash. +* For any non-zero `v`, `std::hash{}(v) != std::hash{}(-v)`. +* The mixing function is asymmetric, so `{high, low}` and `{low, high}` do not collide except by chance. +* The hash value is implementation-defined and may differ across platforms, compilers, or library versions. Do not persist hash values across runs. + +[#hash_example] +== Example + +[source, c++] +---- +#include +#include +#include + +int main() +{ + std::unordered_map counts {}; + counts[boost::int128::uint128_t{1, 0}] = 1; + counts[boost::int128::uint128_t{0, 1}] = 2; + + return 0; +} +---- From eccb7aa924a36c763600c5c703af29f092a4c367 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 12 May 2026 16:22:03 -0400 Subject: [PATCH 4/4] Use mixer to work with 32 bit platforms --- doc/modules/ROOT/pages/hash.adoc | 5 +++-- include/boost/int128/hash.hpp | 32 ++++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/doc/modules/ROOT/pages/hash.adoc b/doc/modules/ROOT/pages/hash.adoc index cdc639f5..a03ef14c 100644 --- a/doc/modules/ROOT/pages/hash.adoc +++ b/doc/modules/ROOT/pages/hash.adoc @@ -37,8 +37,9 @@ struct hash } // namespace std ---- -Both specializations hash the high and low 64-bit halves of the value individually and combine the two results using the same mixing function as `boost::hash_combine`. -The signed specialization hashes its high word as `std::int64_t`, which prevents a negative value from colliding with the positive value that shares the same low word. +Each 64-bit half of the value is first run through a SplitMix64 finalizer so that every input bit influences the lower bits of the result. +This is necessary because `std::hash` is permitted to truncate to `std::size_t`, which would lose the upper 32 bits on 32-bit platforms and cause distinct 128-bit values to collide. +The two finalized halves are then combined with the `boost::hash_combine` mixing formula. [#hash_guarantees] == Guarantees diff --git a/include/boost/int128/hash.hpp b/include/boost/int128/hash.hpp index 403dbce9..2bed5b58 100644 --- a/include/boost/int128/hash.hpp +++ b/include/boost/int128/hash.hpp @@ -15,6 +15,26 @@ #endif +namespace boost { +namespace int128 { +namespace detail { + +// splitmix64 finalizer: mixes all 64 input bits into the result before any narrowing to size_t. +// This is required for correctness on platforms where size_t is 32 bits +inline std::size_t hash_finalize_64(std::uint64_t v) noexcept +{ + v ^= v >> 30; + v *= UINT64_C(0xbf58476d1ce4e5b9); + v ^= v >> 27; + v *= UINT64_C(0x94d049bb133111eb); + v ^= v >> 31; + return v; +} + +} // namespace detail +} // namespace int128 +} // namespace boost + namespace std { template <> @@ -22,10 +42,10 @@ struct hash { auto operator()(const boost::int128::int128_t v) const noexcept -> std::size_t { - const std::size_t low_hash {std::hash{}(v.low)}; - const std::size_t high_hash {std::hash{}(v.high)}; + const std::size_t low_hash {boost::int128::detail::hash_finalize_64(v.low)}; + const std::size_t high_hash {boost::int128::detail::hash_finalize_64(static_cast(v.high))}; - // boost::hash_combine style mixing + // boost::hash_combine style mixing of the two finalized halves return low_hash ^ (high_hash + static_cast(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2)); } }; @@ -35,10 +55,10 @@ struct hash { auto operator()(const boost::int128::uint128_t v) const noexcept -> std::size_t { - const std::size_t low_hash {std::hash{}(v.low)}; - const std::size_t high_hash {std::hash{}(v.high)}; + const std::size_t low_hash {boost::int128::detail::hash_finalize_64(v.low)}; + const std::size_t high_hash {boost::int128::detail::hash_finalize_64(v.high)}; - // boost::hash_combine style mixing + // boost::hash_combine style mixing of the two finalized halves return low_hash ^ (high_hash + static_cast(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2)); } };