Skip to content

Commit 1bd0a8c

Browse files
committed
epoch/finder: use GetEpochInfo instead of GetSlot for authoritative epoch
Replace the slot-based epoch approximation with GetEpochInfo which returns the authoritative epoch directly from the RPC. This fixes an issue where a stale GetSlot(finalized) response caused the epoch finder to return the wrong epoch for ~51 minutes after an epoch boundary, leading to Account Not Found alerts on all circuits. For recent targets (within the current epoch), the authoritative epoch from GetEpochInfo is returned directly. For targets in prior epochs, slot math is used as before but with the authoritative slot from GetEpochInfo.
1 parent bb92934 commit 1bd0a8c

3 files changed

Lines changed: 157 additions & 148 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file.
88

99
### Changes
1010

11+
- Tools
12+
- Fix epoch finder returning wrong epoch for ~51 minutes after epoch boundaries by using `GetEpochInfo` (authoritative epoch) instead of approximating from `GetSlot` which was vulnerable to stale finalized slot responses ([#3227](https://github.com/malbeclabs/doublezero/pull/3227))
1113
- Activator
1214
- Suppress noisy program log output from race conditions caused by dual event processing (websocket + snapshot poll). The SDK's new `execute_transaction_quiet` returns a `SimulationError` with program logs; the activator verifies suspected races by re-fetching user state before deciding whether to print logs ([#3197](https://github.com/malbeclabs/doublezero/pull/3197))
1315
- Telemetry

tools/solana/pkg/epoch/finder.go

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const (
1919

2020
type SolanaRPCClient interface {
2121
GetSlot(ctx context.Context, commitment solanarpc.CommitmentType) (out uint64, err error)
22+
GetEpochInfo(ctx context.Context, commitment solanarpc.CommitmentType) (out *solanarpc.GetEpochInfoResult, err error)
2223
GetEpochSchedule(ctx context.Context) (out *solanarpc.GetEpochScheduleResult, err error)
2324
GetSignaturesForAddressWithOpts(ctx context.Context, account solana.PublicKey, opts *solanarpc.GetSignaturesForAddressOpts) ([]*solanarpc.TransactionSignature, error)
2425
}
@@ -73,34 +74,42 @@ func (e *epochFinder) ApproximateAtTime(ctx context.Context, target time.Time) (
7374
return val.(uint64), nil
7475
}
7576

76-
sched := e.sched.Load()
77-
if sched == nil {
78-
val, err := e.getEpochScheduleWithRetry(ctx)
79-
if err != nil {
80-
return 0, fmt.Errorf("failed to get epoch schedule: %w", err)
81-
}
82-
e.sched.Store(val)
83-
sched = val
84-
}
85-
86-
currentSlot, err := e.getSlotWithRetry(ctx)
77+
// Use GetEpochInfo for the authoritative current epoch and slot. This avoids
78+
// the failure mode where GetSlot returns a stale finalized slot from the previous
79+
// epoch, causing all records to be assigned the wrong epoch.
80+
epochInfo, err := e.getEpochInfoWithRetry(ctx)
8781
if err != nil {
88-
return 0, fmt.Errorf("failed to get current slot: %w", err)
82+
return 0, fmt.Errorf("failed to get epoch info: %w", err)
8983
}
9084

91-
slotsAgo := now.Sub(target) / ApproximateSlotDuration
92-
93-
if uint64(slotsAgo) > currentSlot {
94-
return 0, fmt.Errorf("target time %v is too far in the past", target)
85+
slotsAgo := uint64(now.Sub(target) / ApproximateSlotDuration)
86+
87+
var ep uint64
88+
if slotsAgo <= epochInfo.SlotIndex {
89+
// Target is within the current epoch — use the authoritative epoch directly.
90+
ep = epochInfo.Epoch
91+
} else {
92+
// Target is before the current epoch — fall back to slot math.
93+
sched := e.sched.Load()
94+
if sched == nil {
95+
val, err := e.getEpochScheduleWithRetry(ctx)
96+
if err != nil {
97+
return 0, fmt.Errorf("failed to get epoch schedule: %w", err)
98+
}
99+
e.sched.Store(val)
100+
sched = val
101+
}
102+
if slotsAgo > epochInfo.AbsoluteSlot {
103+
return 0, fmt.Errorf("target time %v is too far in the past", target)
104+
}
105+
approxSlot := epochInfo.AbsoluteSlot - slotsAgo
106+
ep = e.getEpochForSlot(approxSlot, sched)
95107
}
96-
approxSlot := currentSlot - uint64(slotsAgo)
97-
98-
epoch := e.getEpochForSlot(approxSlot, sched)
99108

100-
e.cache.SetWithTTL(cacheKey, epoch, 0, 30*time.Minute)
109+
e.cache.SetWithTTL(cacheKey, ep, 0, 30*time.Minute)
101110
e.cache.Wait()
102111

103-
return epoch, nil
112+
return ep, nil
104113
}
105114

106115
func (e *epochFinder) getEpochForSlot(slot uint64, sched *solanarpc.GetEpochScheduleResult) uint64 {
@@ -128,23 +137,19 @@ func (e *epochFinder) getEpochForSlot(slot uint64, sched *solanarpc.GetEpochSche
128137
return epoch
129138
}
130139

131-
func (e *epochFinder) getSlotWithRetry(ctx context.Context) (uint64, error) {
140+
func (e *epochFinder) getEpochInfoWithRetry(ctx context.Context) (*solanarpc.GetEpochInfoResult, error) {
132141
attempt := 0
133-
slot, err := backoff.Retry(ctx, func() (uint64, error) {
142+
info, err := backoff.Retry(ctx, func() (*solanarpc.GetEpochInfoResult, error) {
134143
if attempt > 1 {
135-
e.log.Warn("Failed to get current slot, retrying", "attempt", attempt)
144+
e.log.Warn("Failed to get epoch info, retrying", "attempt", attempt)
136145
}
137146
attempt++
138-
slot, err := e.client.GetSlot(ctx, solanarpc.CommitmentFinalized)
139-
if err != nil {
140-
return 0, err
141-
}
142-
return slot, nil
147+
return e.client.GetEpochInfo(ctx, solanarpc.CommitmentFinalized)
143148
}, backoff.WithBackOff(backoff.NewExponentialBackOff()))
144149
if err != nil {
145-
return 0, fmt.Errorf("failed to get current slot: %w", err)
150+
return nil, fmt.Errorf("failed to get epoch info: %w", err)
146151
}
147-
return slot, nil
152+
return info, nil
148153
}
149154

150155
func (e *epochFinder) getEpochScheduleWithRetry(ctx context.Context) (*solanarpc.GetEpochScheduleResult, error) {

0 commit comments

Comments
 (0)