中文版 | English
[TOC]
hash function, which ideally should be simple to compute and should ensure that any two distinct keys get different cells.
Example:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|
| 49 | 18 | 29 | 38 | 79 |
graph LR
a(insert into 0)-->b{is conflict?}
b-.yes.->c(insert into 1)-->b
b-.yes.->e(insert into 2)-->b
b-.yes.->f(insert into 3)-->b
b-.no.->d(end)
TODO
separate chaining: is to keep a list of all elements that hash to the same value.
Example:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|
| linked list | linked list | linked list | linked list | linked list | |||||
| 49 | 18 | 29 | 38 | 79 | |||||
insert 32 into 1, conflict with 18:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|
| linked list | linked list | linked list | linked list | linked list | |||||
| 49 | 18 | 29 | 38 | 79 | |||||
| 32 |
TODO
The MurmurHash algorithm was invented by Austin Appleby in 2008; as of 2021, there are three versions: MurmurHash1, MurmurHash2, and MurmurHash3. Compared to other hash functions, MurmurHash has better random distribution characteristics for keys with strong patterns. It is used in Redis, Memcached, Cassandra, HBase, and Lucene.
The following source code is referenced from smasher
x86 Platform 32-bit:
void MurmurHash3_x86_32 ( const void * key, int len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
for(int i = -nblocks; i; i++)
{
uint32_t k1 = getblock32(blocks,i);
k1 *= c1;
k1 = ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
*(uint32_t*)out = h1;
}x64 Platform 128-bit:
void MurmurHash3_x64_128 ( const void * key, const int len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64_t * blocks = (const uint64_t *)(data);
for(int i = 0; i < nblocks; i++)
{
uint64_t k1 = getblock64(blocks,i*2+0);
uint64_t k2 = getblock64(blocks,i*2+1);
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch(len & 15)
{
case 15: k2 ^= ((uint64_t)tail[14]) << 48;
case 14: k2 ^= ((uint64_t)tail[13]) << 40;
case 13: k2 ^= ((uint64_t)tail[12]) << 32;
case 12: k2 ^= ((uint64_t)tail[11]) << 24;
case 11: k2 ^= ((uint64_t)tail[10]) << 16;
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}[1] Thomas H.Cormen, Charles E.Leiserson, Ronald L. Rivest, Clifford Stein . Introduction to Algorithms . 3ED