Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
6b42af1
fast lid representation for optimized comparison
cheb0 Feb 16, 2026
c4d671e
add tests
cheb0 Feb 17, 2026
ad98152
remove reverse flag from node_range.go, delete unneeded null checks
cheb0 Feb 17, 2026
6d5bbab
Merge branch 'refs/heads/main' into 0-fast-cmp-xor
cheb0 Feb 18, 2026
e9401ba
rebase onto fast cmp
cheb0 Feb 1, 2026
5288d49
Merge branch 'refs/heads/main' into 0-fast-cmp-xor
cheb0 Feb 18, 2026
c31cb85
Merge branch 'refs/heads/0-fast-cmp-xor' into 332-next-geq
cheb0 Feb 18, 2026
4a8237e
fixes
cheb0 Feb 18, 2026
e2e89f8
implement String
cheb0 Feb 18, 2026
7de3263
linter fixes
cheb0 Feb 18, 2026
1e942d0
fix
cheb0 Feb 18, 2026
368d6c4
const masks
cheb0 Feb 20, 2026
a207c89
Merge branch 'refs/heads/0-fast-cmp-xor' into 332-next-geq
cheb0 Feb 20, 2026
08efdb7
rename node.CmpLID => node.LID
cheb0 Mar 2, 2026
684371e
Merge branch 'refs/heads/0-fast-cmp-xor' into 332-next-geq
cheb0 Mar 2, 2026
f62e2ab
remove TODO
cheb0 Mar 2, 2026
6035a9c
rename node.CmpLID => node.LID
cheb0 Mar 2, 2026
75663bb
Merge branch 'refs/heads/0-fast-cmp-xor' into 332-next-geq
cheb0 Mar 2, 2026
f6ac1eb
rename node.CmpLID => node.LID
cheb0 Mar 2, 2026
8c12ab3
rename node.CmpLID => node.LID (filename)
cheb0 Mar 2, 2026
c42a519
Merge branch 'refs/heads/0-fast-cmp-xor' into 332-next-geq
cheb0 Mar 2, 2026
35219c8
rename node.CmpLID => node.LID (filename)
cheb0 Mar 2, 2026
68f1aa3
review fixes
cheb0 Mar 5, 2026
e3fee6c
Merge branch 'refs/heads/main' into 0-fast-cmp-xor
cheb0 Mar 18, 2026
3c722bb
Merge branch 'refs/heads/main' into 0-fast-cmp-xor
cheb0 Mar 18, 2026
eadd789
review fixes
cheb0 Mar 18, 2026
bcbf323
review fixes
cheb0 Mar 18, 2026
3678a3a
review fixes: make masks private
cheb0 Mar 18, 2026
5925841
Merge branch 'main' into 0-fast-cmp-xor
cheb0 Mar 18, 2026
1e602f6
rename
cheb0 Mar 18, 2026
45611fb
Merge remote-tracking branch 'origin/0-fast-cmp-xor' into 0-fast-cmp-xor
cheb0 Mar 18, 2026
48d45b5
Merge remote-tracking branch 'origin/0-fast-cmp-xor' into 332-next-geq
cheb0 Mar 18, 2026
42a2183
Merge branch 'main' into 332-next-geq
cheb0 Mar 19, 2026
8be3a21
fix build
cheb0 Mar 19, 2026
e31ba10
review fixes: retire gallop search, honest NextGEQ
cheb0 Mar 19, 2026
53e51d8
review fixes
cheb0 Mar 19, 2026
2bd2a94
review fixes: call Next()
cheb0 Mar 19, 2026
db23bd2
review fixes: add TODO
cheb0 Mar 20, 2026
045ae6a
review fix: delete gallop search
cheb0 Mar 20, 2026
7c7eb95
linter fixes
cheb0 Mar 20, 2026
3e12a0a
Merge branch 'main' into 332-next-geq
cheb0 Mar 23, 2026
0b0a5ba
PR review: add TODO comment
cheb0 Mar 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion frac/processor/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ func NewSourcedNodeIterator(sourced node.Sourced, ti tokenIndex, tids []uint32,

func (s *SourcedNodeIterator) ConsumeTokenSource(lid node.LID) (uint32, bool, error) {
for s.lastID.Less(lid) {
s.lastID, s.lastSource = s.sourcedNode.NextSourced()
s.lastID, s.lastSource = s.sourcedNode.NextSourcedGeq(lid)
}

exists := !s.lastID.IsNull() && s.lastID == lid
Expand Down
12 changes: 12 additions & 0 deletions frac/processor/aggregator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,18 @@ func (m *MockNode) NextSourced() (node.LID, uint32) {
return first.LID, first.Source
}

func (m *MockNode) NextSourcedGeq(minLID node.LID) (node.LID, uint32) {
for len(m.Pairs) > 0 && m.Pairs[0].LID.Less(minLID) {
m.Pairs = m.Pairs[1:]
}
if len(m.Pairs) == 0 {
return node.NullLID(), 0
}
first := m.Pairs[0]
m.Pairs = m.Pairs[1:]
return first.LID, first.Source
}

func TestTwoSourceAggregator(t *testing.T) {
r := require.New(t)

Expand Down
31 changes: 31 additions & 0 deletions frac/sealed/lids/iterator_asc.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,34 @@ func (it *IteratorAsc) Next() node.LID {
it.lids = it.lids[:i]
return node.NewAscLID(lid)
}

// NextGeq returns the next (in reverse iteration order) LID that is <= maxLID.
func (it *IteratorAsc) NextGeq(nextID node.LID) node.LID {
for {
for len(it.lids) == 0 {
if !it.tryNextBlock {
return node.NullLID()
}

it.loadNextLIDsBlock()
it.lids, it.tryNextBlock = it.narrowLIDsRange(it.lids, it.tryNextBlock)
it.counter.AddLIDsCount(len(it.lids))
}

// fast path: smallest remaining > nextID => skip entire block
Comment thread
dkharms marked this conversation as resolved.
// TODO(cheb0): We could also pass LID into narrowLIDsRange to perform block skipping once we add something like MinLID to LID block header
if it.lids[0] > nextID.Unpack() {
it.lids = it.lids[:0]
continue
}

idx := sort.Search(len(it.lids), func(i int) bool { return it.lids[i] > nextID.Unpack() }) - 1
if idx >= 0 {
lid := it.lids[idx]
it.lids = it.lids[:idx]
return node.NewAscLID(lid)
}

it.lids = it.lids[:0]
}
}
32 changes: 32 additions & 0 deletions frac/sealed/lids/iterator_desc.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,35 @@ func (it *IteratorDesc) Next() node.LID {
it.lids = it.lids[1:]
return node.NewDescLID(lid)
}

// NextGeq finds next greater or equal
func (it *IteratorDesc) NextGeq(nextID node.LID) node.LID {
for {
for len(it.lids) == 0 {
if !it.tryNextBlock {
return node.NullLID()
}

it.loadNextLIDsBlock() // last chunk in block but not last for tid; need load next block
it.lids, it.tryNextBlock = it.narrowLIDsRange(it.lids, it.tryNextBlock)
it.counter.AddLIDsCount(len(it.lids)) // inc loaded LIDs count
}

// fast path: last LID < nextID => skip the entire block
// TODO(cheb0): We could also pass LID into narrowLIDsRange to perform block skipping once we add something like MinLID to LID block header
if nextID.Unpack() > it.lids[len(it.lids)-1] {
it.lids = it.lids[:0]
continue
}

idx := sort.Search(len(it.lids), func(i int) bool { return it.lids[i] >= nextID.Unpack() })
if idx < len(it.lids) {
it.lids = it.lids[idx:]
lid := it.lids[0]
it.lids = it.lids[1:]
return node.NewDescLID(lid)
}

it.lids = it.lids[:0]
}
}
51 changes: 51 additions & 0 deletions node/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ func newNodeStaticSize(r *rand.Rand, size int) *staticAsc {
return &staticAsc{staticCursor: staticCursor{data: data}}
}

func newNodeStaticSizeFixedDelta(size, start, delta int) *staticAsc {
data, _ := GenerateFixedDelta(size, start, delta)
return &staticAsc{staticCursor: staticCursor{data: data}}
}

func Generate(r *rand.Rand, n int) ([]uint32, uint32) {
v := make([]uint32, n)
last := uint32(1)
Expand All @@ -25,6 +30,16 @@ func Generate(r *rand.Rand, n int) ([]uint32, uint32) {
return v, last
}

func GenerateFixedDelta(n, start, step int) ([]uint32, uint32) {
v := make([]uint32, n)
last := uint32(start)
for i := 0; i < len(v); i++ {
v[i] = last
last += uint32(step)
}
return v, last
}

func BenchmarkNot(b *testing.B) {
sizes := []int{1000, 10_000, 1_000_000}

Expand Down Expand Up @@ -165,6 +180,42 @@ func BenchmarkOrTree(b *testing.B) {
}
}

// BenchmarkOrTreeNextGeq checks the performance of NextGeq vs Next when no skipping occur and all node
// yield distinct values (no intersection between nodes)
func BenchmarkOrTreeNextGeq(b *testing.B) {
Comment thread
eguguchkin marked this conversation as resolved.
sizes := []int{1000, 10_000, 1_000_000}
// step is equal to total number of nodes, so that every node produces distinct values
step := 8

for _, s := range sizes {
b.Run(fmt.Sprintf("size=%d", s), func(b *testing.B) {
n1 := NewOr(
newNodeStaticSizeFixedDelta(s, 1, step),
newNodeStaticSizeFixedDelta(s, 5, step))
n2 := NewOr(
newNodeStaticSizeFixedDelta(s, 2, step),
newNodeStaticSizeFixedDelta(s, 6, step))
n3 := NewOr(
newNodeStaticSizeFixedDelta(s, 3, step),
newNodeStaticSizeFixedDelta(s, 8, step))
n4 := NewOr(
newNodeStaticSizeFixedDelta(s, 4, step),
newNodeStaticSizeFixedDelta(s, 7, step))
n12 := NewOr(n1, n2)
n34 := NewOr(n3, n4)
n := NewOr(n12, n34)
res := make([]uint32, 0, s*8)

for b.Loop() {
res = readAllIntoGeq(n, res)
}

assert.Equal(b, cap(res), s*8)

})
}
}

func BenchmarkComplex(b *testing.B) {
sizes := []int{1000, 10_000, 1_000_000}

Expand Down
24 changes: 24 additions & 0 deletions node/lid.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ func NewAscLID(lid uint32) LID {
}
}

func NewLID(lid uint32, asc bool) LID {
if asc {
return NewAscLID(lid)
} else {
return NewDescLID(lid)
}
}

// Less compares two values. It also does an implicit null check, since we store math.MaxUint32 for null values.
// Which means if we call x.Less(y), then we know for sure that x is not null. Therefore, this Less call can work
// as both "null check + less" combo.
Expand All @@ -61,6 +69,22 @@ func (c LID) Eq(other LID) bool {
return c.lid == other.lid
}

func Max(left, right LID) LID {
if left.lid > right.lid {
return left
} else {
return right
}
}

func Min(left, right LID) LID {
if left.lid < right.lid {
return left
} else {
return right
}
}

func (c LID) Unpack() uint32 {
return c.lid ^ c.mask
}
Expand Down
3 changes: 3 additions & 0 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import (
type Node interface {
fmt.Stringer // for testing
Next() LID
// NextGeq returns next greater or equal (GEQ) lid
NextGeq(nextID LID) LID
}

type Sourced interface {
fmt.Stringer // for testing
// aggregation need source
NextSourced() (id LID, source uint32)
NextSourcedGeq(nextLID LID) (id LID, source uint32)
}
33 changes: 32 additions & 1 deletion node/node_and.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,18 @@ func (n *nodeAnd) readRight() {
n.rightID = n.right.Next()
}

func (n *nodeAnd) readLeftGeq(nextID LID) {
n.leftID = n.left.NextGeq(nextID)
}

func (n *nodeAnd) readRightGeq(nextID LID) {
n.rightID = n.right.NextGeq(nextID)
}

func (n *nodeAnd) Next() LID {
for !n.leftID.IsNull() && !n.rightID.IsNull() && n.leftID != n.rightID {
for !n.rightID.IsNull() && n.leftID.Less(n.rightID) {
n.readLeft()
n.readLeftGeq(n.rightID)
}
for !n.leftID.IsNull() && n.rightID.Less(n.leftID) {
n.readRight()
Expand All @@ -48,3 +56,26 @@ func (n *nodeAnd) Next() LID {
n.readRight()
return cur
}

func (n *nodeAnd) NextGeq(nextID LID) LID {
for {
for !n.leftID.IsNull() && !n.rightID.IsNull() && !n.leftID.Eq(n.rightID) {
for !n.rightID.IsNull() && n.leftID.Less(n.rightID) {
n.readLeftGeq(Max(n.rightID, nextID))
}
for !n.leftID.IsNull() && n.rightID.Less(n.leftID) {
n.readRightGeq(Max(n.leftID, nextID))
}
}

if n.leftID.IsNull() || n.rightID.IsNull() {
return NullLID()
}
cur := n.leftID
n.readLeft()
n.readRight()
if nextID.LessOrEq(cur) {
return cur
}
}
}
60 changes: 60 additions & 0 deletions node/node_and_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package node

import (
"math"
"math/rand/v2"
"testing"

"github.com/stretchr/testify/assert"
)

func TestNodeAnd_NextGeqAscending(t *testing.T) {
left := NewStatic([]uint32{1, 2, 7, 10, 20, 25, 26, 30, 50, 80, 90, 100}, false)
right := NewStatic([]uint32{1, 3, 4, 7, 9, 30, 40, 45, 60, 80, 110}, false)

node := NewAnd(left, right)

id := node.NextGeq(NewDescLID(7))
assert.Equal(t, uint32(7), id.Unpack())

id = node.NextGeq(NewDescLID(50))
assert.Equal(t, uint32(80), id.Unpack())

id = node.NextGeq(NewDescLID(50))
assert.True(t, id.IsNull())
}

// TestNodeAnd_NextGeqCompatibility tests that just calling NextGeq with 0 passed as argument is equivalent to
// calling Next
func TestNodeAnd_NextGeqCompatibility(t *testing.T) {
for _, asc := range []bool{true, false} {
left := []uint32{rand.Uint32N(10)}
right := []uint32{rand.Uint32N(10)}

for i := 1; i < 1000; i++ {
left = append(left, left[i-1]+rand.Uint32N(10))
right = append(right, right[i-1]+rand.Uint32N(10))
}

node := NewAnd(NewStatic(left, asc), NewStatic(right, asc))
nodeGeq := NewAnd(NewStatic(left, asc), NewStatic(right, asc))

var zero uint32
if asc {
zero = math.MaxUint32
} else {
zero = 0
}

for {
lid := node.Next()
lidGeq := nodeGeq.NextGeq(NewLID(zero, asc))

assert.Equal(t, lid, lidGeq)

if lid.IsNull() {
break
}
}
}
}
8 changes: 8 additions & 0 deletions node/node_nand.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,11 @@ func (n *nodeNAnd) Next() LID {
}
return NullLID()
}

func (n *nodeNAnd) NextGeq(nextID LID) LID {
Comment thread
eguguchkin marked this conversation as resolved.
lid := n.Next()
for lid.Less(nextID) {
lid = n.Next()
}
return lid
}
52 changes: 52 additions & 0 deletions node/node_nand_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package node

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestNodeNAnd_NextGeq(t *testing.T) {
neg := NewStatic([]uint32{1, 2, 7, 10, 20, 25, 26, 30, 50, 80, 90, 100}, false)
reg := NewStatic([]uint32{1, 3, 4, 7, 9, 30, 40, 45, 60, 80, 110}, false)

node := NewNAnd(neg, reg)

id := node.NextGeq(NewDescLID(7))
assert.Equal(t, uint32(9), id.Unpack())

id = node.NextGeq(NewDescLID(50))
assert.Equal(t, uint32(60), id.Unpack())

id = node.NextGeq(NewDescLID(100))
assert.Equal(t, uint32(110), id.Unpack())

id = node.NextGeq(NewDescLID(100))
assert.True(t, id.IsNull())
}

func TestNodeNAnd_NextGeq_Reverse(t *testing.T) {
neg := NewStatic([]uint32{1, 2, 7, 10, 20, 25, 26, 30, 50, 80, 90, 100}, true)
reg := NewStatic([]uint32{1, 3, 4, 7, 9, 30, 40, 45, 60, 80, 110}, true)

node := NewNAnd(neg, reg)

id := node.NextGeq(NewAscLID(80))
assert.Equal(t, uint32(60), id.Unpack())

id = node.NextGeq(NewAscLID(49))
assert.Equal(t, uint32(45), id.Unpack())

// call with same nextID, should just return next value
id = node.NextGeq(NewAscLID(49))
assert.Equal(t, uint32(40), id.Unpack())

id = node.NextGeq(NewAscLID(49))
assert.Equal(t, uint32(9), id.Unpack())

id = node.NextGeq(NewAscLID(4))
assert.Equal(t, uint32(4), id.Unpack())

id = node.NextGeq(NewAscLID(1))
assert.True(t, id.IsNull())
}
Loading
Loading