From 03f71edb86d599cd08eea04540129de17b19495e Mon Sep 17 00:00:00 2001 From: monthdev Date: Thu, 5 Mar 2026 23:10:24 -0700 Subject: [PATCH 1/5] Add XXH32() and XXH3() SQL functions --- mysql-test/main/func_xxh.result | 30 +++++++++++++ mysql-test/main/func_xxh.test | 12 ++++++ sql/item_create.cc | 56 +++++++++++++++++++++++++ sql/item_strfunc.cc | 39 +++++++++++++++++ sql/item_strfunc.h | 74 +++++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 mysql-test/main/func_xxh.result create mode 100644 mysql-test/main/func_xxh.test diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result new file mode 100644 index 0000000000000..fb85e5c5d0ce2 --- /dev/null +++ b/mysql-test/main/func_xxh.result @@ -0,0 +1,30 @@ +SELECT XXH32('abc') = 852579327 AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32(11223344) = 408040606 AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +xxh32_eq +1 +SELECT XXH32(NULL) IS NULL AS xxh32_null; +xxh32_null +1 +SELECT XXH32('') IS NULL AS xxh32_null; +xxh32_null +1 +SELECT XXH3('abc') = 8696274497037089104 AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3(11223344) = 9329478082249925753 AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +xxh3_eq +0 +SELECT XXH3(NULL) IS NULL AS xxh3_null; +xxh3_null +1 +SELECT XXH3('') IS NULL AS xxh3_null; +xxh3_null +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test new file mode 100644 index 0000000000000..000d0e50ad8bf --- /dev/null +++ b/mysql-test/main/func_xxh.test @@ -0,0 +1,12 @@ +# Testing XXH SQL functions +SELECT XXH32('abc') = 852579327 AS xxh32_expected; +SELECT XXH32(11223344) = 408040606 AS xxh32_expected; +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +SELECT XXH32(NULL) IS NULL AS xxh32_null; +SELECT XXH32('') IS NULL AS xxh32_null; + +SELECT XXH3('abc') = 8696274497037089104 AS xxh3_expected; +SELECT XXH3(11223344) = 9329478082249925753 AS xxh3_expected; +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +SELECT XXH3(NULL) IS NULL AS xxh3_null; +SELECT XXH3('') IS NULL AS xxh3_null; diff --git a/sql/item_create.cc b/sql/item_create.cc index f707607e1e84a..a83abd29b1423 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -633,6 +633,27 @@ class Create_func_crc32c : public Create_native_func virtual ~Create_func_crc32c() = default; }; +class Create_func_xxh32 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh32 s_singleton; +protected: + Create_func_xxh32() = default; + ~Create_func_xxh32() override = default; +}; + +class Create_func_xxh3 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh3 s_singleton; +protected: + Create_func_xxh3() = default; + ~Create_func_xxh3() override = default; +}; class Create_func_datediff : public Create_func_arg2 { @@ -3682,6 +3703,39 @@ Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name, : new (thd->mem_root) Item_func_crc32(thd, true, arg1); } +Create_func_xxh32 Create_func_xxh32::s_singleton; + +Item *Create_func_xxh32::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh32(thd, arg1); +} + +Create_func_xxh3 Create_func_xxh3::s_singleton; + +Item *Create_func_xxh3::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh3_64(thd, arg1); +} Create_func_datediff Create_func_datediff::s_singleton; @@ -6338,6 +6392,8 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("COT") }, BUILDER(Create_func_cot)}, { { STRING_WITH_LEN("CRC32") }, BUILDER(Create_func_crc32)}, { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, + { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, +{ { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, { { STRING_WITH_LEN("DATABASE") }, BUILDER(Create_func_database)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DATE_FORMAT") }, BUILDER(Create_func_date_format)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index d4328284b6bde..5b66812213d45 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -43,6 +43,7 @@ // my_make_scrambled_password_323 #include #include +#include "../mysys/xxhash.h" C_MODE_START #include "../mysys/my_static.h" // For soundex_map C_MODE_END @@ -4570,6 +4571,44 @@ longlong Item_func_crc32::val_int() (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } +#include "../mysys/xxhash.h" + +longlong Item_func_xxh32::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + null_value= 0; + String *Result= args[0]->val_str(&m_Value); + if (!Result || Result->length() == 0) + { + null_value= 1; + return 0; + } + + const uint32_t Hash= + XXH32(static_cast(Result->ptr()), Result->length(), 0); + return static_cast(static_cast(Hash)); +} + +longlong Item_func_xxh3_64::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + null_value= 0; + String *Result= args[0]->val_str(&m_Value); + if (!Result || Result->length() == 0) + { + null_value= 1; + return 0; + } + + const uint64_t Hash= + XXH3_64bits(static_cast(Result->ptr()), Result->length()); + return static_cast(static_cast(Hash)); +} + #ifdef HAVE_COMPRESS #include "zlib.h" diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 4383f86178971..680dc022ab5ef 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2317,6 +2317,80 @@ class Item_func_crc32 :public Item_long_func { return get_item_copy(thd, this); } }; +class Item_func_xxh32 : public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_str(func_name_cstring()); + } + + String m_Value; + +public: + Item_func_xxh32(THD *Thd, Item *Arg) + : Item_long_func(Thd, Arg) + { + unsigned_flag= 1; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING Name= { STRING_WITH_LEN("XXH32") }; + return Name; + } + + bool fix_length_and_dec(THD *) override + { + max_length= 10; + set_maybe_null(); + return false; + } + + longlong val_int() override; + + Item *shallow_copy(THD *Thd) const override + { + return get_item_copy(Thd, this); + } +}; + +class Item_func_xxh3_64 : public Item_longlong_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_str(func_name_cstring()); + } + + String m_Value; + +public: + Item_func_xxh3_64(THD *Thd, Item *Arg) + : Item_longlong_func(Thd, Arg) + { + unsigned_flag= 1; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING Name= { STRING_WITH_LEN("XXH3") }; + return Name; + } + + bool fix_length_and_dec(THD *) override + { + max_length= 20; + set_maybe_null(); + return false; + } + + longlong val_int() override; + + Item *shallow_copy(THD *Thd) const override + { + return get_item_copy(Thd, this); + } +}; + class Item_func_uncompressed_length : public Item_long_func_length { String value; From 3f2389082d0a48f5838c6c990705c0edbd4ad32d Mon Sep 17 00:00:00 2001 From: monthdev Date: Fri, 6 Mar 2026 22:28:58 -0700 Subject: [PATCH 2/5] Apply feedback --- mysql-test/main/func_xxh.result | 40 ++++++++--- mysql-test/main/func_xxh.test | 23 +++++-- sql/item_create.cc | 31 ++++++++- sql/item_strfunc.cc | 113 +++++++++++++++++++++++++------- sql/item_strfunc.h | 90 ++++++++++++++----------- 5 files changed, 218 insertions(+), 79 deletions(-) diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result index fb85e5c5d0ce2..92813c1a83a5f 100644 --- a/mysql-test/main/func_xxh.result +++ b/mysql-test/main/func_xxh.result @@ -1,7 +1,7 @@ -SELECT XXH32('abc') = 852579327 AS xxh32_expected; +SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; xxh32_expected 1 -SELECT XXH32(11223344) = 408040606 AS xxh32_expected; +SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; xxh32_expected 1 SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; @@ -10,13 +10,13 @@ xxh32_eq SELECT XXH32(NULL) IS NULL AS xxh32_null; xxh32_null 1 -SELECT XXH32('') IS NULL AS xxh32_null; -xxh32_null +SELECT XXH32('') = '02cc5d05' AS xxh32_empty; +xxh32_empty 1 -SELECT XXH3('abc') = 8696274497037089104 AS xxh3_expected; +SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; xxh3_expected 1 -SELECT XXH3(11223344) = 9329478082249925753 AS xxh3_expected; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; xxh3_expected 1 SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; @@ -25,6 +25,30 @@ xxh3_eq SELECT XXH3(NULL) IS NULL AS xxh3_null; xxh3_null 1 -SELECT XXH3('') IS NULL AS xxh3_null; -xxh3_null +SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; +xxh3_empty +1 +SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; +xxh3_128_expected +1 +SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +xxh3_128_num +1 +SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; +xxh3_128_eq +1 +SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; +xxh3_128_null +1 +SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; +xxh3_128_empty +1 +SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; +xxh32_charset_eq +1 +SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; +xxh3_charset_eq +1 +SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; +xxh3_128_charset_eq 1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test index 000d0e50ad8bf..4bd6d7cb8243f 100644 --- a/mysql-test/main/func_xxh.test +++ b/mysql-test/main/func_xxh.test @@ -1,12 +1,21 @@ -# Testing XXH SQL functions -SELECT XXH32('abc') = 852579327 AS xxh32_expected; -SELECT XXH32(11223344) = 408040606 AS xxh32_expected; +SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; +SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; SELECT XXH32(NULL) IS NULL AS xxh32_null; -SELECT XXH32('') IS NULL AS xxh32_null; +SELECT XXH32('') = '02cc5d05' AS xxh32_empty; -SELECT XXH3('abc') = 8696274497037089104 AS xxh3_expected; -SELECT XXH3(11223344) = 9329478082249925753 AS xxh3_expected; +SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; SELECT XXH3(NULL) IS NULL AS xxh3_null; -SELECT XXH3('') IS NULL AS xxh3_null; +SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; + +SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; +SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; +SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; +SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; + +SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; +SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; +SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; diff --git a/sql/item_create.cc b/sql/item_create.cc index a83abd29b1423..7c27edfeab966 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -655,6 +655,17 @@ class Create_func_xxh3 : public Create_native_func ~Create_func_xxh3() override = default; }; +class Create_func_xxh3_128 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh3_128 s_singleton; +protected: + Create_func_xxh3_128() = default; + ~Create_func_xxh3_128() override = default; +}; + class Create_func_datediff : public Create_func_arg2 { public: @@ -3734,7 +3745,24 @@ Item *Create_func_xxh3::create_native(THD *thd, const LEX_CSTRING *name, Item *arg1= item_list->pop(); DBUG_ASSERT(!arg1->is_explicit_name()); - return new (thd->mem_root) Item_func_xxh3_64(thd, arg1); + return new (thd->mem_root) Item_func_xxh3(thd, arg1); +} + +Create_func_xxh3_128 Create_func_xxh3_128::s_singleton; + +Item *Create_func_xxh3_128::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh3_128(thd, arg1); } Create_func_datediff Create_func_datediff::s_singleton; @@ -6394,6 +6422,7 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, { { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, + { { STRING_WITH_LEN("XXH3_128") }, BUILDER(Create_func_xxh3_128) }, { { STRING_WITH_LEN("DATABASE") }, BUILDER(Create_func_database)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DATE_FORMAT") }, BUILDER(Create_func_date_format)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 5b66812213d45..23594a87480fb 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -4571,42 +4571,109 @@ longlong Item_func_crc32::val_int() (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } -#include "../mysys/xxhash.h" +namespace +{ +constexpr const CHARSET_INFO *XxhCharset= &my_charset_utf8mb4_bin; -longlong Item_func_xxh32::val_int() +void BytesToHexLower(const unsigned char *digest, size_t length, String *to) { - DBUG_ASSERT(fixed()); - DBUG_ASSERT(arg_count == 1); + static const char hex[]= "0123456789abcdef"; + char buffer[32 * 2]; - null_value= 0; - String *Result= args[0]->val_str(&m_Value); - if (!Result || Result->length() == 0) + DBUG_ASSERT(length * 2 <= sizeof(buffer)); + + for (size_t i= 0; i < length; ++i) { - null_value= 1; - return 0; + buffer[i * 2]= hex[digest[i] >> 4]; + buffer[i * 2 + 1]= hex[digest[i] & 0x0f]; } - const uint32_t Hash= - XXH32(static_cast(Result->ptr()), Result->length(), 0); - return static_cast(static_cast(Hash)); + to->copy(buffer, length * 2, &my_charset_latin1); } -longlong Item_func_xxh3_64::val_int() +String *GetStableXxhInput(Item *arg, String *value, String *converted_value, + bool *null_value) { - DBUG_ASSERT(fixed()); - DBUG_ASSERT(arg_count == 1); + String *input= arg->val_str(value); + if (!input) + { + *null_value= true; + return nullptr; + } - null_value= 0; - String *Result= args[0]->val_str(&m_Value); - if (!Result || Result->length() == 0) + *null_value= false; + + if (input->charset() == XxhCharset) + return input; + + uint errors= 0; + converted_value->length(0); + if (converted_value->copy(input->ptr(), input->length(), + input->charset(), XxhCharset, &errors)) { - null_value= 1; - return 0; + *null_value= true; + return nullptr; } - const uint64_t Hash= - XXH3_64bits(static_cast(Result->ptr()), Result->length()); - return static_cast(static_cast(Hash)); + return converted_value; +} +} + +String *Item_func_xxh32::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH32_hash_t hash= + XXH32(reinterpret_cast(input->ptr()), input->length(), 0); + + XXH32_canonical_t canonical; + XXH32_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + +String *Item_func_xxh3::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH64_hash_t hash= + XXH3_64bits(reinterpret_cast(input->ptr()), input->length()); + + XXH64_canonical_t canonical; + XXH64_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + +String *Item_func_xxh3_128::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH128_hash_t hash= + XXH3_128bits(reinterpret_cast(input->ptr()), input->length()); + + XXH128_canonical_t canonical; + XXH128_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; } #ifdef HAVE_COMPRESS diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 680dc022ab5ef..4027ef2f154b7 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2317,77 +2317,87 @@ class Item_func_crc32 :public Item_long_func { return get_item_copy(thd, this); } }; -class Item_func_xxh32 : public Item_long_func +class Item_func_xxh32 : public Item_str_ascii_checksum_func { - bool check_arguments() const override - { - return args[0]->check_type_can_return_str(func_name_cstring()); - } - - String m_Value; - + String value; + String converted_value; public: - Item_func_xxh32(THD *Thd, Item *Arg) - : Item_long_func(Thd, Arg) - { - unsigned_flag= 1; - } + Item_func_xxh32(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} - LEX_CSTRING func_name_cstring() const override - { - static LEX_CSTRING Name= { STRING_WITH_LEN("XXH32") }; - return Name; - } + String *val_str_ascii(String *to) override; bool fix_length_and_dec(THD *) override { - max_length= 10; - set_maybe_null(); + fix_length_and_charset(8, default_charset()); return false; } - longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= { STRING_WITH_LEN("XXH32") }; + return name; + } - Item *shallow_copy(THD *Thd) const override + Item *shallow_copy(THD *thd) const override { - return get_item_copy(Thd, this); + return get_item_copy(thd, this); } }; -class Item_func_xxh3_64 : public Item_longlong_func +class Item_func_xxh3 : public Item_str_ascii_checksum_func { - bool check_arguments() const override - { - return args[0]->check_type_can_return_str(func_name_cstring()); - } + String value; + String converted_value; +public: + Item_func_xxh3(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} - String m_Value; + String *val_str_ascii(String *to) override; -public: - Item_func_xxh3_64(THD *Thd, Item *Arg) - : Item_longlong_func(Thd, Arg) + bool fix_length_and_dec(THD *) override { - unsigned_flag= 1; + fix_length_and_charset(16, default_charset()); + return false; } LEX_CSTRING func_name_cstring() const override { - static LEX_CSTRING Name= { STRING_WITH_LEN("XXH3") }; - return Name; + static LEX_CSTRING name= { STRING_WITH_LEN("XXH3") }; + return name; } + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + +class Item_func_xxh3_128 : public Item_str_ascii_checksum_func +{ + String value; + String converted_value; +public: + Item_func_xxh3_128(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} + + String *val_str_ascii(String *to) override; + bool fix_length_and_dec(THD *) override { - max_length= 20; - set_maybe_null(); + fix_length_and_charset(32, default_charset()); return false; } - longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= { STRING_WITH_LEN("XXH3_128") }; + return name; + } - Item *shallow_copy(THD *Thd) const override + Item *shallow_copy(THD *thd) const override { - return get_item_copy(Thd, this); + return get_item_copy(thd, this); } }; From d7e1fa28d0f43a48677089851c18e54fa76829c5 Mon Sep 17 00:00:00 2001 From: monthdev Date: Thu, 12 Mar 2026 05:24:56 -0700 Subject: [PATCH 3/5] Apply feedback --- mysql-test/main/func_xxh.result | 27 +++++++---- mysql-test/main/func_xxh.test | 18 +++++--- sql/item_strfunc.cc | 80 ++++++++++++++++++++++++--------- 3 files changed, 89 insertions(+), 36 deletions(-) diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result index 92813c1a83a5f..3a47e3d1b7a9e 100644 --- a/mysql-test/main/func_xxh.result +++ b/mysql-test/main/func_xxh.result @@ -1,8 +1,8 @@ -SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; +SELECT XXH32('abc') = '5f739703' AS xxh32_expected; xxh32_expected 1 -SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; -xxh32_expected +SELECT XXH32(11223344) = '75a5123c' AS xxh32_num; +xxh32_num 1 SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; xxh32_eq @@ -13,11 +13,11 @@ xxh32_null SELECT XXH32('') = '02cc5d05' AS xxh32_empty; xxh32_empty 1 -SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; +SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; xxh3_expected 1 -SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; -xxh3_expected +SELECT XXH3(11223344) = 'b51c44cc5c739944' AS xxh3_num; +xxh3_num 1 SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; xxh3_eq @@ -25,13 +25,13 @@ xxh3_eq SELECT XXH3(NULL) IS NULL AS xxh3_null; xxh3_null 1 -SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; +SELECT XXH3('') = '0000000000000000' AS xxh3_empty; xxh3_empty 1 -SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; +SELECT XXH3_128('abc') = '4a992abde7402c1f2fe8608253a6bd5e' AS xxh3_128_expected; xxh3_128_expected 1 -SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +SELECT XXH3_128(11223344) = 'a8dc85a76425b0ddb5cb2c19157e2435' AS xxh3_128_num; xxh3_128_num 1 SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; @@ -52,3 +52,12 @@ xxh3_charset_eq SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; xxh3_128_charset_eq 1 +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +xxh32_space_eq +1 +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; +xxh3_space_eq +1 +SELECT XXH3_128(' ') = XXH3_128(' ') AS xxh3_128_space_eq; +xxh3_128_space_eq +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test index 4bd6d7cb8243f..0929cb3704c1e 100644 --- a/mysql-test/main/func_xxh.test +++ b/mysql-test/main/func_xxh.test @@ -1,17 +1,17 @@ -SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; -SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; +SELECT XXH32('abc') = '5f739703' AS xxh32_expected; +SELECT XXH32(11223344) = '75a5123c' AS xxh32_num; SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; SELECT XXH32(NULL) IS NULL AS xxh32_null; SELECT XXH32('') = '02cc5d05' AS xxh32_empty; -SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; -SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; +SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; +SELECT XXH3(11223344) = 'b51c44cc5c739944' AS xxh3_num; SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; SELECT XXH3(NULL) IS NULL AS xxh3_null; -SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; +SELECT XXH3('') = '0000000000000000' AS xxh3_empty; -SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; -SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +SELECT XXH3_128('abc') = '4a992abde7402c1f2fe8608253a6bd5e' AS xxh3_128_expected; +SELECT XXH3_128(11223344) = 'a8dc85a76425b0ddb5cb2c19157e2435' AS xxh3_128_num; SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; @@ -19,3 +19,7 @@ SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; + +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; +SELECT XXH3_128(' ') = XXH3_128(' ') AS xxh3_128_space_eq; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 23594a87480fb..a505b65a197b0 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -4573,9 +4573,9 @@ longlong Item_func_crc32::val_int() namespace { -constexpr const CHARSET_INFO *XxhCharset= &my_charset_utf8mb4_bin; +constexpr CHARSET_INFO *xxh_charset= &my_charset_utf8mb4_general_ci; -void BytesToHexLower(const unsigned char *digest, size_t length, String *to) +void bytes_to_hex_lower(const unsigned char *digest, size_t length, String *to) { static const char hex[]= "0123456789abcdef"; char buffer[32 * 2]; @@ -4591,8 +4591,8 @@ void BytesToHexLower(const unsigned char *digest, size_t length, String *to) to->copy(buffer, length * 2, &my_charset_latin1); } -String *GetStableXxhInput(Item *arg, String *value, String *converted_value, - bool *null_value) +String *get_xxh_input(Item *arg, String *value, String *converted_value, + bool *null_value) { String *input= arg->val_str(value); if (!input) @@ -4603,13 +4603,13 @@ String *GetStableXxhInput(Item *arg, String *value, String *converted_value, *null_value= false; - if (input->charset() == XxhCharset) + if (input->charset() == xxh_charset) return input; uint errors= 0; converted_value->length(0); - if (converted_value->copy(input->ptr(), input->length(), - input->charset(), XxhCharset, &errors)) + if (converted_value->copy(input->ptr(), input->length(), input->charset(), + xxh_charset, &errors)) { *null_value= true; return nullptr; @@ -4617,24 +4617,56 @@ String *GetStableXxhInput(Item *arg, String *value, String *converted_value, return converted_value; } + +static void xxh3_128_hash_str(my_hasher_st *hasher, const uchar *key, + size_t len) +{ + XXH3_128bits_update((XXH3_state_t *) hasher->m_specific, key, len); } +static void xxh3_128_hash_byte(my_hasher_st *hasher, uchar value) +{ + XXH3_128bits_update((XXH3_state_t *) hasher->m_specific, &value, 1); +} + +static my_hasher_st my_hasher_xxh3_128_local() +{ + my_hasher_st tmp{}; + + tmp.m_nr1= 1; + tmp.m_nr2= 4; + tmp.m_nr= 0; + tmp.m_streaming= FALSE; + tmp.m_hash_str= xxh3_128_hash_str; + tmp.m_hash_byte= xxh3_128_hash_byte; + tmp.m_hash_num= nullptr; + tmp.m_finalize= nullptr; + tmp.m_specific= (void *) XXH3_createState(); + + XXH3_128bits_reset((XXH3_state_t *) tmp.m_specific); + return tmp; +} +} // namespace + String *Item_func_xxh32::val_str_ascii(String *to) { DBUG_ASSERT(fixed()); DBUG_ASSERT(arg_count == 1); - String *input= - GetStableXxhInput(args[0], &value, &converted_value, &null_value); + String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); if (!input) return nullptr; - const XXH32_hash_t hash= - XXH32(reinterpret_cast(input->ptr()), input->length(), 0); + my_hasher_st hasher= my_hasher_xxh32(); + xxh_charset->hash_sort( + &hasher, reinterpret_cast(input->ptr()), input->length()); + + const XXH32_hash_t hash= XXH32_digest((XXH32_state_t *) hasher.m_specific); + XXH32_freeState((XXH32_state_t *) hasher.m_specific); XXH32_canonical_t canonical; XXH32_canonicalFromHash(&canonical, hash); - BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); return to; } @@ -4643,17 +4675,21 @@ String *Item_func_xxh3::val_str_ascii(String *to) DBUG_ASSERT(fixed()); DBUG_ASSERT(arg_count == 1); - String *input= - GetStableXxhInput(args[0], &value, &converted_value, &null_value); + String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); if (!input) return nullptr; + my_hasher_st hasher= my_hasher_xxh3(); + xxh_charset->hash_sort( + &hasher, reinterpret_cast(input->ptr()), input->length()); + const XXH64_hash_t hash= - XXH3_64bits(reinterpret_cast(input->ptr()), input->length()); + XXH3_64bits_digest((XXH3_state_t *) hasher.m_specific); + XXH3_freeState((XXH3_state_t *) hasher.m_specific); XXH64_canonical_t canonical; XXH64_canonicalFromHash(&canonical, hash); - BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); return to; } @@ -4662,17 +4698,21 @@ String *Item_func_xxh3_128::val_str_ascii(String *to) DBUG_ASSERT(fixed()); DBUG_ASSERT(arg_count == 1); - String *input= - GetStableXxhInput(args[0], &value, &converted_value, &null_value); + String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); if (!input) return nullptr; + my_hasher_st hasher= my_hasher_xxh3_128_local(); + xxh_charset->hash_sort( + &hasher, reinterpret_cast(input->ptr()), input->length()); + const XXH128_hash_t hash= - XXH3_128bits(reinterpret_cast(input->ptr()), input->length()); + XXH3_128bits_digest((XXH3_state_t *) hasher.m_specific); + XXH3_freeState((XXH3_state_t *) hasher.m_specific); XXH128_canonical_t canonical; XXH128_canonicalFromHash(&canonical, hash); - BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); return to; } From 4ab693362f41b8f2f714c0de2119c20bc0ba5561 Mon Sep 17 00:00:00 2001 From: monthdev Date: Thu, 12 Mar 2026 08:04:20 -0700 Subject: [PATCH 4/5] Fix bug --- mysql-test/main/func_xxh.result | 23 +----------- mysql-test/main/func_xxh.test | 10 +----- sql/item_create.cc | 29 --------------- sql/item_strfunc.cc | 63 +++------------------------------ strings/hasher-xxh.c | 2 ++ 5 files changed, 8 insertions(+), 119 deletions(-) diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result index 3a47e3d1b7a9e..f841ce7239721 100644 --- a/mysql-test/main/func_xxh.result +++ b/mysql-test/main/func_xxh.result @@ -10,7 +10,7 @@ xxh32_eq SELECT XXH32(NULL) IS NULL AS xxh32_null; xxh32_null 1 -SELECT XXH32('') = '02cc5d05' AS xxh32_empty; +SELECT XXH32('') = '00000000' AS xxh32_empty; xxh32_empty 1 SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; @@ -28,36 +28,15 @@ xxh3_null SELECT XXH3('') = '0000000000000000' AS xxh3_empty; xxh3_empty 1 -SELECT XXH3_128('abc') = '4a992abde7402c1f2fe8608253a6bd5e' AS xxh3_128_expected; -xxh3_128_expected -1 -SELECT XXH3_128(11223344) = 'a8dc85a76425b0ddb5cb2c19157e2435' AS xxh3_128_num; -xxh3_128_num -1 -SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; -xxh3_128_eq -1 -SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; -xxh3_128_null -1 -SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; -xxh3_128_empty -1 SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; xxh32_charset_eq 1 SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; xxh3_charset_eq 1 -SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; -xxh3_128_charset_eq -1 SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; xxh32_space_eq 1 SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; xxh3_space_eq 1 -SELECT XXH3_128(' ') = XXH3_128(' ') AS xxh3_128_space_eq; -xxh3_128_space_eq -1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test index 0929cb3704c1e..d40c1d3cfdf0a 100644 --- a/mysql-test/main/func_xxh.test +++ b/mysql-test/main/func_xxh.test @@ -2,7 +2,7 @@ SELECT XXH32('abc') = '5f739703' AS xxh32_expected; SELECT XXH32(11223344) = '75a5123c' AS xxh32_num; SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; SELECT XXH32(NULL) IS NULL AS xxh32_null; -SELECT XXH32('') = '02cc5d05' AS xxh32_empty; +SELECT XXH32('') = '00000000' AS xxh32_empty; SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; SELECT XXH3(11223344) = 'b51c44cc5c739944' AS xxh3_num; @@ -10,16 +10,8 @@ SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; SELECT XXH3(NULL) IS NULL AS xxh3_null; SELECT XXH3('') = '0000000000000000' AS xxh3_empty; -SELECT XXH3_128('abc') = '4a992abde7402c1f2fe8608253a6bd5e' AS xxh3_128_expected; -SELECT XXH3_128(11223344) = 'a8dc85a76425b0ddb5cb2c19157e2435' AS xxh3_128_num; -SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; -SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; -SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; - SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; -SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; -SELECT XXH3_128(' ') = XXH3_128(' ') AS xxh3_128_space_eq; diff --git a/sql/item_create.cc b/sql/item_create.cc index 7c27edfeab966..594ba0202b2b1 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -655,17 +655,6 @@ class Create_func_xxh3 : public Create_native_func ~Create_func_xxh3() override = default; }; -class Create_func_xxh3_128 : public Create_native_func -{ -public: - Item *create_native(THD *thd, const LEX_CSTRING *name, - List *item_list) override; - static Create_func_xxh3_128 s_singleton; -protected: - Create_func_xxh3_128() = default; - ~Create_func_xxh3_128() override = default; -}; - class Create_func_datediff : public Create_func_arg2 { public: @@ -3748,23 +3737,6 @@ Item *Create_func_xxh3::create_native(THD *thd, const LEX_CSTRING *name, return new (thd->mem_root) Item_func_xxh3(thd, arg1); } -Create_func_xxh3_128 Create_func_xxh3_128::s_singleton; - -Item *Create_func_xxh3_128::create_native(THD *thd, const LEX_CSTRING *name, - List *item_list) -{ - int argc= item_list ? item_list->elements : 0; - if (unlikely(argc != 1)) - { - my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); - return nullptr; - } - - Item *arg1= item_list->pop(); - DBUG_ASSERT(!arg1->is_explicit_name()); - return new (thd->mem_root) Item_func_xxh3_128(thd, arg1); -} - Create_func_datediff Create_func_datediff::s_singleton; Item* @@ -6422,7 +6394,6 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, { { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, - { { STRING_WITH_LEN("XXH3_128") }, BUILDER(Create_func_xxh3_128) }, { { STRING_WITH_LEN("DATABASE") }, BUILDER(Create_func_database)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DATE_FORMAT") }, BUILDER(Create_func_date_format)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index a505b65a197b0..7f0d9c477e4a2 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -4617,35 +4617,6 @@ String *get_xxh_input(Item *arg, String *value, String *converted_value, return converted_value; } - -static void xxh3_128_hash_str(my_hasher_st *hasher, const uchar *key, - size_t len) -{ - XXH3_128bits_update((XXH3_state_t *) hasher->m_specific, key, len); -} - -static void xxh3_128_hash_byte(my_hasher_st *hasher, uchar value) -{ - XXH3_128bits_update((XXH3_state_t *) hasher->m_specific, &value, 1); -} - -static my_hasher_st my_hasher_xxh3_128_local() -{ - my_hasher_st tmp{}; - - tmp.m_nr1= 1; - tmp.m_nr2= 4; - tmp.m_nr= 0; - tmp.m_streaming= FALSE; - tmp.m_hash_str= xxh3_128_hash_str; - tmp.m_hash_byte= xxh3_128_hash_byte; - tmp.m_hash_num= nullptr; - tmp.m_finalize= nullptr; - tmp.m_specific= (void *) XXH3_createState(); - - XXH3_128bits_reset((XXH3_state_t *) tmp.m_specific); - return tmp; -} } // namespace String *Item_func_xxh32::val_str_ascii(String *to) @@ -4661,11 +4632,10 @@ String *Item_func_xxh32::val_str_ascii(String *to) xxh_charset->hash_sort( &hasher, reinterpret_cast(input->ptr()), input->length()); - const XXH32_hash_t hash= XXH32_digest((XXH32_state_t *) hasher.m_specific); - XXH32_freeState((XXH32_state_t *) hasher.m_specific); + const uint64_t hash= hasher.m_finalize(&hasher); XXH32_canonical_t canonical; - XXH32_canonicalFromHash(&canonical, hash); + XXH32_canonicalFromHash(&canonical, (XXH32_hash_t) hash); bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); return to; } @@ -4683,35 +4653,10 @@ String *Item_func_xxh3::val_str_ascii(String *to) xxh_charset->hash_sort( &hasher, reinterpret_cast(input->ptr()), input->length()); - const XXH64_hash_t hash= - XXH3_64bits_digest((XXH3_state_t *) hasher.m_specific); - XXH3_freeState((XXH3_state_t *) hasher.m_specific); + const uint64_t hash= hasher.m_finalize(&hasher); XXH64_canonical_t canonical; - XXH64_canonicalFromHash(&canonical, hash); - bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); - return to; -} - -String *Item_func_xxh3_128::val_str_ascii(String *to) -{ - DBUG_ASSERT(fixed()); - DBUG_ASSERT(arg_count == 1); - - String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); - if (!input) - return nullptr; - - my_hasher_st hasher= my_hasher_xxh3_128_local(); - xxh_charset->hash_sort( - &hasher, reinterpret_cast(input->ptr()), input->length()); - - const XXH128_hash_t hash= - XXH3_128bits_digest((XXH3_state_t *) hasher.m_specific); - XXH3_freeState((XXH3_state_t *) hasher.m_specific); - - XXH128_canonical_t canonical; - XXH128_canonicalFromHash(&canonical, hash); + XXH64_canonicalFromHash(&canonical, (XXH64_hash_t) hash); bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); return to; } diff --git a/strings/hasher-xxh.c b/strings/hasher-xxh.c index a10518fc58ef1..1f27b08307e28 100644 --- a/strings/hasher-xxh.c +++ b/strings/hasher-xxh.c @@ -48,6 +48,7 @@ my_hasher_st my_hasher_xxh32(void) { {.m_nr = 0}, FALSE, my_hasher_xxh32_hash_str, my_hasher_xxh32_hash_byte, my_hasher_hash_num, my_hasher_xxh32_finalize, (void *) XXH32_createState() }; + XXH32_reset((XXH32_state_t *) tmp.m_specific, 0); return tmp; } @@ -91,5 +92,6 @@ my_hasher_st my_hasher_xxh3(void) { {.m_nr = 0}, FALSE, my_hasher_xxh3_hash_str, my_hasher_xxh3_hash_byte, my_hasher_hash_num, my_hasher_xxh3_finalize, (void *) XXH3_createState() }; + XXH3_64bits_reset((XXH3_state_t *) tmp.m_specific); return tmp; } From b1f63c5c9cf62f41c441f234ea75f0c1f198f311 Mon Sep 17 00:00:00 2001 From: monthdev Date: Fri, 13 Mar 2026 04:49:02 -0700 Subject: [PATCH 5/5] Apply feedback --- mysql-test/main/func_xxh.result | 22 ++++++------ mysql-test/main/func_xxh.test | 16 +++++---- sql/item_strfunc.cc | 60 +++++++++++---------------------- sql/item_strfunc.h | 46 ++++++------------------- 4 files changed, 51 insertions(+), 93 deletions(-) diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result index f841ce7239721..71168e14da027 100644 --- a/mysql-test/main/func_xxh.result +++ b/mysql-test/main/func_xxh.result @@ -1,7 +1,7 @@ -SELECT XXH32('abc') = '5f739703' AS xxh32_expected; +SELECT XXH32('abc') = '80712ed5' AS xxh32_expected; xxh32_expected 1 -SELECT XXH32(11223344) = '75a5123c' AS xxh32_num; +SELECT XXH32(11223344) = '1852349e' AS xxh32_num; xxh32_num 1 SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; @@ -13,10 +13,10 @@ xxh32_null SELECT XXH32('') = '00000000' AS xxh32_empty; xxh32_empty 1 -SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; +SELECT XXH3('abc') = '244da40f405c870e' AS xxh3_expected; xxh3_expected 1 -SELECT XXH3(11223344) = 'b51c44cc5c739944' AS xxh3_num; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_num; xxh3_num 1 SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; @@ -28,15 +28,17 @@ xxh3_null SELECT XXH3('') = '0000000000000000' AS xxh3_empty; xxh3_empty 1 -SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; -xxh32_charset_eq -1 -SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; -xxh3_charset_eq -1 SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; xxh32_space_eq 1 SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; xxh3_space_eq 1 +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +xxh32_koi8u_eq +1 +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; +xxh3_koi8u_eq +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test index d40c1d3cfdf0a..89972ff3e7efd 100644 --- a/mysql-test/main/func_xxh.test +++ b/mysql-test/main/func_xxh.test @@ -1,17 +1,19 @@ -SELECT XXH32('abc') = '5f739703' AS xxh32_expected; -SELECT XXH32(11223344) = '75a5123c' AS xxh32_num; +SELECT XXH32('abc') = '80712ed5' AS xxh32_expected; +SELECT XXH32(11223344) = '1852349e' AS xxh32_num; SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; SELECT XXH32(NULL) IS NULL AS xxh32_null; SELECT XXH32('') = '00000000' AS xxh32_empty; -SELECT XXH3('abc') = 'cffd0d343257b2de' AS xxh3_expected; -SELECT XXH3(11223344) = 'b51c44cc5c739944' AS xxh3_num; +SELECT XXH3('abc') = '244da40f405c870e' AS xxh3_expected; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_num; SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; SELECT XXH3(NULL) IS NULL AS xxh3_null; SELECT XXH3('') = '0000000000000000' AS xxh3_empty; -SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; -SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; - SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; + +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 7f0d9c477e4a2..d983d8002bbd0 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -4573,8 +4573,6 @@ longlong Item_func_crc32::val_int() namespace { -constexpr CHARSET_INFO *xxh_charset= &my_charset_utf8mb4_general_ci; - void bytes_to_hex_lower(const unsigned char *digest, size_t length, String *to) { static const char hex[]= "0123456789abcdef"; @@ -4590,33 +4588,6 @@ void bytes_to_hex_lower(const unsigned char *digest, size_t length, String *to) to->copy(buffer, length * 2, &my_charset_latin1); } - -String *get_xxh_input(Item *arg, String *value, String *converted_value, - bool *null_value) -{ - String *input= arg->val_str(value); - if (!input) - { - *null_value= true; - return nullptr; - } - - *null_value= false; - - if (input->charset() == xxh_charset) - return input; - - uint errors= 0; - converted_value->length(0); - if (converted_value->copy(input->ptr(), input->length(), input->charset(), - xxh_charset, &errors)) - { - *null_value= true; - return nullptr; - } - - return converted_value; -} } // namespace String *Item_func_xxh32::val_str_ascii(String *to) @@ -4624,14 +4595,18 @@ String *Item_func_xxh32::val_str_ascii(String *to) DBUG_ASSERT(fixed()); DBUG_ASSERT(arg_count == 1); - String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); + String *input= args[0]->val_str(&value); if (!input) - return nullptr; - + { + null_value= true; + return nullptr; + } + null_value= false; my_hasher_st hasher= my_hasher_xxh32(); - xxh_charset->hash_sort( - &hasher, reinterpret_cast(input->ptr()), input->length()); - + input->charset()->hash_sort( + &hasher, + reinterpret_cast(input->ptr()), + input->length()); const uint64_t hash= hasher.m_finalize(&hasher); XXH32_canonical_t canonical; @@ -4645,13 +4620,18 @@ String *Item_func_xxh3::val_str_ascii(String *to) DBUG_ASSERT(fixed()); DBUG_ASSERT(arg_count == 1); - String *input= get_xxh_input(args[0], &value, &converted_value, &null_value); + String *input= args[0]->val_str(&value); if (!input) - return nullptr; - + { + null_value= true; + return nullptr; + } + null_value= false; my_hasher_st hasher= my_hasher_xxh3(); - xxh_charset->hash_sort( - &hasher, reinterpret_cast(input->ptr()), input->length()); + input->charset()->hash_sort( + &hasher, + reinterpret_cast(input->ptr()), + input->length()); const uint64_t hash= hasher.m_finalize(&hasher); diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 4027ef2f154b7..c5c0b678b48ac 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2320,10 +2320,11 @@ class Item_func_crc32 :public Item_long_func class Item_func_xxh32 : public Item_str_ascii_checksum_func { String value; - String converted_value; + public: - Item_func_xxh32(THD *thd, Item *arg) - : Item_str_ascii_checksum_func(thd, arg) {} + Item_func_xxh32(THD *thd, Item *arg) : Item_str_ascii_checksum_func(thd, arg) + { + } String *val_str_ascii(String *to) override; @@ -2335,7 +2336,7 @@ class Item_func_xxh32 : public Item_str_ascii_checksum_func LEX_CSTRING func_name_cstring() const override { - static LEX_CSTRING name= { STRING_WITH_LEN("XXH32") }; + static LEX_CSTRING name= {STRING_WITH_LEN("XXH32")}; return name; } @@ -2348,56 +2349,29 @@ class Item_func_xxh32 : public Item_str_ascii_checksum_func class Item_func_xxh3 : public Item_str_ascii_checksum_func { String value; - String converted_value; -public: - Item_func_xxh3(THD *thd, Item *arg) - : Item_str_ascii_checksum_func(thd, arg) {} - - String *val_str_ascii(String *to) override; - - bool fix_length_and_dec(THD *) override - { - fix_length_and_charset(16, default_charset()); - return false; - } - - LEX_CSTRING func_name_cstring() const override - { - static LEX_CSTRING name= { STRING_WITH_LEN("XXH3") }; - return name; - } - Item *shallow_copy(THD *thd) const override +public: + Item_func_xxh3(THD *thd, Item *arg) : Item_str_ascii_checksum_func(thd, arg) { - return get_item_copy(thd, this); } -}; - -class Item_func_xxh3_128 : public Item_str_ascii_checksum_func -{ - String value; - String converted_value; -public: - Item_func_xxh3_128(THD *thd, Item *arg) - : Item_str_ascii_checksum_func(thd, arg) {} String *val_str_ascii(String *to) override; bool fix_length_and_dec(THD *) override { - fix_length_and_charset(32, default_charset()); + fix_length_and_charset(16, default_charset()); return false; } LEX_CSTRING func_name_cstring() const override { - static LEX_CSTRING name= { STRING_WITH_LEN("XXH3_128") }; + static LEX_CSTRING name= {STRING_WITH_LEN("XXH3")}; return name; } Item *shallow_copy(THD *thd) const override { - return get_item_copy(thd, this); + return get_item_copy(thd, this); } };