Skip to content

Commit 14e2ed7

Browse files
committed
Derive other indexes directly for binary fuse
We manipulate the math and use bit tricks to derive the other two indexes more efficiently during peeling. Apple M1: ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-10 43.8 ± 2% 50.3 ± 3% +14.88% (p=0.000 n=8+9) BinaryFusePopulate/8/n=100000-10 38.6 ± 3% 41.3 ± 1% +7.09% (p=0.000 n=9+8) BinaryFusePopulate/8/n=1000000-10 35.0 ± 4% 36.5 ± 7% +4.12% (p=0.013 n=9+10) BinaryFusePopulate/16/n=10000-10 48.6 ± 4% 48.5 ± 6% ~ (p=1.000 n=10+10) BinaryFusePopulate/16/n=100000-10 38.0 ± 3% 41.1 ± 1% +8.35% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-10 33.8 ± 5% 36.6 ± 2% +8.14% (p=0.000 n=10+10) ``` GCE N4D (AMD Turin): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 53.2 ± 3% 57.1 ± 1% +7.46% (p=0.000 n=10+10) BinaryFusePopulate/8/n=100000-8 33.0 ± 0% 37.5 ± 1% +13.38% (p=0.000 n=10+10) BinaryFusePopulate/8/n=1000000-8 28.5 ± 2% 31.8 ± 2% +11.59% (p=0.000 n=10+10) BinaryFusePopulate/16/n=10000-8 53.1 ± 1% 56.2 ± 1% +5.93% (p=0.000 n=10+10) BinaryFusePopulate/16/n=100000-8 31.8 ± 1% 37.3 ± 1% +17.35% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 27.5 ± 1% 30.9 ± 1% +12.34% (p=0.000 n=10+10) ``` GCE C4 (Intel Emerald Rapids, turbo boost capped at "all core" max): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 29.2 ± 1% 32.2 ± 1% +10.00% (p=0.000 n=10+10) BinaryFusePopulate/8/n=100000-8 27.0 ± 3% 29.8 ± 5% +10.22% (p=0.000 n=10+10) BinaryFusePopulate/8/n=1000000-8 25.6 ± 3% 28.2 ± 5% +10.27% (p=0.000 n=10+10) BinaryFusePopulate/16/n=10000-8 28.9 ± 1% 32.0 ± 1% +10.84% (p=0.000 n=10+10) BinaryFusePopulate/16/n=100000-8 26.2 ± 1% 28.8 ± 3% +10.05% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 24.8 ± 2% 26.9 ± 2% +8.37% (p=0.000 n=10+10) ``` GCE C4A (Google's Axion ARM64): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 45.1 ± 1% 45.1 ± 1% ~ (p=0.511 n=9+10) BinaryFusePopulate/8/n=100000-8 39.8 ± 1% 39.4 ± 1% -0.79% (p=0.018 n=9+10) BinaryFusePopulate/8/n=1000000-8 33.9 ± 3% 34.2 ± 3% ~ (p=0.363 n=10+10) BinaryFusePopulate/16/n=10000-8 44.0 ± 1% 44.7 ± 1% +1.54% (p=0.000 n=9+10) BinaryFusePopulate/16/n=100000-8 37.4 ± 1% 38.4 ± 1% +2.75% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 30.9 ± 5% 32.4 ± 1% +4.84% (p=0.000 n=10+10) ```
1 parent e8256d3 commit 14e2ed7

1 file changed

Lines changed: 57 additions & 22 deletions

File tree

binaryfusefilter.go

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ func BuildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (BinaryFus
6060
return f, err
6161
}
6262

63+
var (
64+
off1Tab = [3]int8{1, 1, -2} // segment multiplier for other_index1
65+
off2Tab = [3]int8{2, -1, -1} // segment multiplier for other_index2
66+
xor1SelTab = [3]uint8{0, 2, 1}
67+
xor2SelTab = [3]uint8{1, 0, 2}
68+
f1Tab = [3]uint8{1, 2, 0}
69+
f2Tab = [3]uint8{2, 0, 1}
70+
)
71+
6372
func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryFuse[T], iterations int, _ error) {
6473
size := uint32(len(keys))
6574
var filter BinaryFuse[T]
@@ -79,10 +88,6 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
7988
reverseOrder := reuseBuffer[uint64](&b.reverseOrder, int(size+1))
8089
reverseOrder[size] = 1
8190

82-
// the array h0, h1, h2, h0, h1, h2
83-
var h012 [6]uint32
84-
// this could be used to compute the mod3
85-
// tabmod3 := [5]uint8{0,1,2,0,1}
8691
for {
8792
iterations += 1
8893
if iterations > MaxIterations {
@@ -194,6 +199,9 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
194199
}
195200
}
196201
stacksize := uint32(0)
202+
segLen := filter.SegmentLength
203+
// segLenToMinusSegLenX2 is used to change segLen to -2*segLen via XOR.
204+
segLenToMinusSegLenX2 := segLen ^ (-(2 * segLen))
197205
for Qsize > 0 {
198206
Qsize--
199207
index := alone[Qsize]
@@ -204,29 +212,63 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
204212
reverseOrder[stacksize] = hash
205213
stacksize++
206214

207-
index1, index2, index3 := filter.getHashFromHash(hash)
208-
209-
h012[1] = index2
210-
h012[2] = index3
211-
h012[3] = index1
212-
h012[4] = h012[1]
215+
// Here, we could use filter.getHashFromHash(hash) to obtain the other
216+
// two indexes. But we can manipulate the formulas to derive them more
217+
// efficiently. We use bit tricks to avoid branching.
218+
219+
h01 := uint32(hash>>18) & filter.SegmentLengthMask
220+
h02 := uint32(hash) & filter.SegmentLengthMask
221+
222+
// These variables are either 0 or all 1s.
223+
is0 := -uint32((found - 1) >> 7) // all 1s if found==0 (relies on uint8 wrap)
224+
is1 := -uint32(found & 1) // all 1s if found==1
225+
is2 := -uint32(found >> 1) // all 1s if found==2
226+
227+
// First, adjust the segment index. other_index1 is:
228+
// if found<2: index + segLen
229+
// if found=2: index - segLen*2
230+
other_index1 := index + (segLen ^ (segLenToMinusSegLenX2 & is2))
231+
// other_index2 is:
232+
// if found>0: index - segLen
233+
// if found=0: index + 2*segLen
234+
other_index2 := index - (segLen ^ (segLenToMinusSegLenX2 & is0))
235+
236+
// Now adjust the offset inside the segment.
237+
// Three cases:
238+
// 0: other_index1 ^= h01 other_index2 ^= h02
239+
// 1: other_index1 ^= h01^h02 other_index2 ^= h01
240+
// 2: other_index1 ^= h02 other_index2 ^= h01^h02
241+
other_index1 ^= (h01 &^ is2) ^ (h02 &^ is0)
242+
other_index2 ^= (h01 &^ is0) ^ (h02 &^ is1)
243+
244+
f1 := uint8(is0&1 | is1&2) // f1 = (found + 1) % 3
245+
f2 := uint8(is0&2 | is2&1) // f2 = (found + 2) % 3
246+
247+
// Verification. Turn on for debugging.
248+
if false {
249+
index1, index2, index3 := filter.getHashFromHash(hash)
250+
if other_index1 != []uint32{index1, index2, index3}[(found+1)%3] {
251+
panic("incorrect other_index1")
252+
}
253+
if other_index2 != []uint32{index1, index2, index3}[(found+2)%3] {
254+
panic("incorrect other_index2")
255+
}
256+
}
213257

214-
other_index1 := h012[found+1]
215258
alone[Qsize] = other_index1
216259
if (t2count[other_index1] >> 2) == 2 {
217260
Qsize++
218261
}
219262
t2count[other_index1] -= 4
220-
t2count[other_index1] ^= filter.mod3(found + 1) // could use this instead: tabmod3[found+1]
263+
t2count[other_index1] ^= f1
221264
t2hash[other_index1] ^= hash
222265

223-
other_index2 := h012[found+2]
224266
alone[Qsize] = other_index2
225267
if (t2count[other_index2] >> 2) == 2 {
226268
Qsize++
227269
}
228270
t2count[other_index2] -= 4
229-
t2count[other_index2] ^= filter.mod3(found + 2) // could use this instead: tabmod3[found+2]
271+
t2count[other_index2] ^= f2
230272
t2hash[other_index2] ^= hash
231273
}
232274
}
@@ -255,6 +297,7 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
255297
return filter, iterations, nil
256298
}
257299

300+
var h012 [5]uint32
258301
for i := int(size - 1); i >= 0; i-- {
259302
// the hash of the key we insert next
260303
hash := reverseOrder[i]
@@ -297,14 +340,6 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin
297340
filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(arrayLength))
298341
}
299342

300-
func (filter *BinaryFuse[T]) mod3(x uint8) uint8 {
301-
if x > 2 {
302-
x -= 3
303-
}
304-
305-
return x
306-
}
307-
308343
func (filter *BinaryFuse[T]) getHashFromHash(hash uint64) (uint32, uint32, uint32) {
309344
hi, _ := bits.Mul64(hash, uint64(filter.SegmentCountLength))
310345
h0 := uint32(hi)

0 commit comments

Comments
 (0)