diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result new file mode 100644 index 0000000000000..71168e14da027 --- /dev/null +++ b/mysql-test/main/func_xxh.result @@ -0,0 +1,44 @@ +SELECT XXH32('abc') = '80712ed5' AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32(11223344) = '1852349e' AS xxh32_num; +xxh32_num +1 +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +xxh32_eq +1 +SELECT XXH32(NULL) IS NULL AS xxh32_null; +xxh32_null +1 +SELECT XXH32('') = '00000000' AS xxh32_empty; +xxh32_empty +1 +SELECT XXH3('abc') = '244da40f405c870e' AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_num; +xxh3_num +1 +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +xxh3_eq +0 +SELECT XXH3(NULL) IS NULL AS xxh3_null; +xxh3_null +1 +SELECT XXH3('') = '0000000000000000' AS xxh3_empty; +xxh3_empty +1 +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +xxh32_space_eq +1 +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; +xxh3_space_eq +1 +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +xxh32_koi8u_eq +1 +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; +xxh3_koi8u_eq +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test new file mode 100644 index 0000000000000..89972ff3e7efd --- /dev/null +++ b/mysql-test/main/func_xxh.test @@ -0,0 +1,19 @@ +SELECT XXH32('abc') = '80712ed5' AS xxh32_expected; +SELECT XXH32(11223344) = '1852349e' AS xxh32_num; +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +SELECT XXH32(NULL) IS NULL AS xxh32_null; +SELECT XXH32('') = '00000000' AS xxh32_empty; + +SELECT XXH3('abc') = '244da40f405c870e' AS xxh3_expected; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_num; +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +SELECT XXH3(NULL) IS NULL AS xxh3_null; +SELECT XXH3('') = '0000000000000000' AS xxh3_empty; + +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; + +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; diff --git a/sql/item_create.cc b/sql/item_create.cc index f707607e1e84a..594ba0202b2b1 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -633,6 +633,27 @@ class Create_func_crc32c : public Create_native_func virtual ~Create_func_crc32c() = default; }; +class Create_func_xxh32 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh32 s_singleton; +protected: + Create_func_xxh32() = default; + ~Create_func_xxh32() override = default; +}; + +class Create_func_xxh3 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh3 s_singleton; +protected: + Create_func_xxh3() = default; + ~Create_func_xxh3() override = default; +}; class Create_func_datediff : public Create_func_arg2 { @@ -3682,6 +3703,39 @@ Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name, : new (thd->mem_root) Item_func_crc32(thd, true, arg1); } +Create_func_xxh32 Create_func_xxh32::s_singleton; + +Item *Create_func_xxh32::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh32(thd, arg1); +} + +Create_func_xxh3 Create_func_xxh3::s_singleton; + +Item *Create_func_xxh3::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh3(thd, arg1); +} Create_func_datediff Create_func_datediff::s_singleton; @@ -6338,6 +6392,8 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("COT") }, BUILDER(Create_func_cot)}, { { STRING_WITH_LEN("CRC32") }, BUILDER(Create_func_crc32)}, { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, + { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, +{ { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, { { STRING_WITH_LEN("DATABASE") }, BUILDER(Create_func_database)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DATE_FORMAT") }, BUILDER(Create_func_date_format)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index d4328284b6bde..d983d8002bbd0 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -43,6 +43,7 @@ // my_make_scrambled_password_323 #include #include +#include "../mysys/xxhash.h" C_MODE_START #include "../mysys/my_static.h" // For soundex_map C_MODE_END @@ -4570,6 +4571,76 @@ longlong Item_func_crc32::val_int() (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } +namespace +{ +void bytes_to_hex_lower(const unsigned char *digest, size_t length, String *to) +{ + static const char hex[]= "0123456789abcdef"; + char buffer[32 * 2]; + + DBUG_ASSERT(length * 2 <= sizeof(buffer)); + + for (size_t i= 0; i < length; ++i) + { + buffer[i * 2]= hex[digest[i] >> 4]; + buffer[i * 2 + 1]= hex[digest[i] & 0x0f]; + } + + to->copy(buffer, length * 2, &my_charset_latin1); +} +} // namespace + +String *Item_func_xxh32::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= args[0]->val_str(&value); + if (!input) + { + null_value= true; + return nullptr; + } + null_value= false; + my_hasher_st hasher= my_hasher_xxh32(); + input->charset()->hash_sort( + &hasher, + reinterpret_cast(input->ptr()), + input->length()); + const uint64_t hash= hasher.m_finalize(&hasher); + + XXH32_canonical_t canonical; + XXH32_canonicalFromHash(&canonical, (XXH32_hash_t) hash); + bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + +String *Item_func_xxh3::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= args[0]->val_str(&value); + if (!input) + { + null_value= true; + return nullptr; + } + null_value= false; + my_hasher_st hasher= my_hasher_xxh3(); + input->charset()->hash_sort( + &hasher, + reinterpret_cast(input->ptr()), + input->length()); + + const uint64_t hash= hasher.m_finalize(&hasher); + + XXH64_canonical_t canonical; + XXH64_canonicalFromHash(&canonical, (XXH64_hash_t) hash); + bytes_to_hex_lower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + #ifdef HAVE_COMPRESS #include "zlib.h" diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 4383f86178971..c5c0b678b48ac 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2317,6 +2317,64 @@ class Item_func_crc32 :public Item_long_func { return get_item_copy(thd, this); } }; +class Item_func_xxh32 : public Item_str_ascii_checksum_func +{ + String value; + +public: + Item_func_xxh32(THD *thd, Item *arg) : Item_str_ascii_checksum_func(thd, arg) + { + } + + String *val_str_ascii(String *to) override; + + bool fix_length_and_dec(THD *) override + { + fix_length_and_charset(8, default_charset()); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("XXH32")}; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + +class Item_func_xxh3 : public Item_str_ascii_checksum_func +{ + String value; + +public: + Item_func_xxh3(THD *thd, Item *arg) : Item_str_ascii_checksum_func(thd, arg) + { + } + + String *val_str_ascii(String *to) override; + + bool fix_length_and_dec(THD *) override + { + fix_length_and_charset(16, default_charset()); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("XXH3")}; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + class Item_func_uncompressed_length : public Item_long_func_length { String value; diff --git a/strings/hasher-xxh.c b/strings/hasher-xxh.c index a10518fc58ef1..1f27b08307e28 100644 --- a/strings/hasher-xxh.c +++ b/strings/hasher-xxh.c @@ -48,6 +48,7 @@ my_hasher_st my_hasher_xxh32(void) { {.m_nr = 0}, FALSE, my_hasher_xxh32_hash_str, my_hasher_xxh32_hash_byte, my_hasher_hash_num, my_hasher_xxh32_finalize, (void *) XXH32_createState() }; + XXH32_reset((XXH32_state_t *) tmp.m_specific, 0); return tmp; } @@ -91,5 +92,6 @@ my_hasher_st my_hasher_xxh3(void) { {.m_nr = 0}, FALSE, my_hasher_xxh3_hash_str, my_hasher_xxh3_hash_byte, my_hasher_hash_num, my_hasher_xxh3_finalize, (void *) XXH3_createState() }; + XXH3_64bits_reset((XXH3_state_t *) tmp.m_specific); return tmp; }