Skip to content

Commit 422aa8f

Browse files
committed
persist: review feedback — disable fsync, harden ReadAll, fix Close
- Disable WAL fsync; the prune anchor (A/B files with fsync) already provides the crash-recovery watermark. - Use Count() == 0 instead of firstIdx == 0 for emptiness checks in Write and ReadAll for robustness. - Add post-replay count check in ReadAll to detect silent data loss. - Use errors.Join in BlockPersister.Close to close all lane WALs. - Rename laneDir → lanePath to avoid shadowing the laneDir() function. - Add TestIndexedWAL_ReadAllDetectsStaleNextIdx. Made-with: Cursor
1 parent c907f70 commit 422aa8f

3 files changed

Lines changed: 43 additions & 12 deletions

File tree

sei-tendermint/internal/autobahn/consensus/persist/blocks.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package persist
22

33
import (
44
"encoding/hex"
5+
"errors"
56
"fmt"
67
"os"
78
"path/filepath"
@@ -97,17 +98,17 @@ func NewBlockPersister(stateDir utils.Option[string]) (*BlockPersister, map[type
9798
logger.Warn("skipping lane dir with invalid key", "name", e.Name(), "err", err)
9899
continue
99100
}
100-
laneDir := filepath.Join(dir, e.Name())
101-
lw, err := newLaneWAL(laneDir)
101+
lanePath := filepath.Join(dir, e.Name())
102+
lw, err := newLaneWAL(lanePath)
102103
if err != nil {
103104
_ = bp.Close()
104-
return nil, nil, fmt.Errorf("open lane WAL in %s: %w", laneDir, err)
105+
return nil, nil, fmt.Errorf("open lane WAL in %s: %w", lanePath, err)
105106
}
106107
loaded, err := lw.loadAll()
107108
if err != nil {
108109
_ = lw.Close()
109110
_ = bp.Close()
110-
return nil, nil, fmt.Errorf("load lane WAL in %s: %w", laneDir, err)
111+
return nil, nil, fmt.Errorf("load lane WAL in %s: %w", lanePath, err)
111112
}
112113
bp.lanes[lane] = lw
113114
if len(loaded) > 0 {
@@ -201,12 +202,13 @@ func (bp *BlockPersister) Close() error {
201202
if _, ok := bp.dir.Get(); !ok {
202203
return nil
203204
}
205+
var errs []error
204206
for _, lw := range bp.lanes {
205207
if err := lw.Close(); err != nil {
206-
return err
208+
errs = append(errs, err)
207209
}
208210
}
209-
return nil
211+
return errors.Join(errs...)
210212
}
211213

212214
// loadAll reads all entries from the lane WAL and returns the loaded blocks.

sei-tendermint/internal/autobahn/consensus/persist/wal.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ type indexedWAL[T any] struct {
2626
nextIdx uint64 // WAL index that the next Write will be assigned
2727
}
2828

29-
// openIndexedWAL creates (or opens) a WAL in dir with synchronous, unbatched,
30-
// fsync-enabled writes. Initializes index tracking from the WAL's stored
31-
// offsets so the caller can immediately Write, ReadAll, or TruncateBefore.
29+
// openIndexedWAL creates (or opens) a WAL in dir with synchronous, unbatched
30+
// writes. Fsync is disabled because the prune anchor (persisted via A/B files
31+
// with fsync) is the crash-recovery watermark — on power loss we restart from
32+
// the anchor and re-sync any lost WAL entries from peers.
33+
// Initializes index tracking from the WAL's stored offsets so the caller can
34+
// immediately Write, ReadAll, or TruncateBefore.
3235
func openIndexedWAL[T any](dir string, codec codec[T]) (*indexedWAL[T], error) {
3336
if err := os.MkdirAll(dir, 0700); err != nil {
3437
return nil, fmt.Errorf("create dir %s: %w", dir, err)
@@ -42,7 +45,6 @@ func openIndexedWAL[T any](dir string, codec codec[T]) (*indexedWAL[T], error) {
4245
dbwal.Config{
4346
WriteBufferSize: 0, // synchronous writes
4447
WriteBatchSize: 1, // no batching
45-
FsyncEnabled: true,
4648
},
4749
)
4850
if err != nil {
@@ -73,7 +75,7 @@ func (w *indexedWAL[T]) Write(entry T) error {
7375
if err := w.wal.Write(entry); err != nil {
7476
return err
7577
}
76-
if w.firstIdx == 0 {
78+
if w.Count() == 0 {
7779
w.firstIdx = w.nextIdx
7880
}
7981
w.nextIdx++
@@ -101,7 +103,7 @@ func (w *indexedWAL[T]) TruncateBefore(walIdx uint64, verify func(T) error) erro
101103

102104
// ReadAll returns all entries in the WAL. Returns nil if empty.
103105
func (w *indexedWAL[T]) ReadAll() ([]T, error) {
104-
if w.firstIdx == 0 {
106+
if w.Count() == 0 {
105107
return nil, nil
106108
}
107109
entries := make([]T, 0, w.Count())
@@ -112,10 +114,15 @@ func (w *indexedWAL[T]) ReadAll() ([]T, error) {
112114
if err != nil {
113115
return nil, err
114116
}
117+
if uint64(len(entries)) != w.Count() {
118+
return nil, fmt.Errorf("WAL replay returned %d entries, expected %d (possible silent data loss)", len(entries), w.Count())
119+
}
115120
return entries, nil
116121
}
117122

118123
// Count returns the number of entries in the WAL.
124+
// firstIdx == 0 is the empty sentinel because tidwall/wal returns 0 for both
125+
// FirstIndex and LastIndex on an empty log, and real indices start at 1.
119126
func (w *indexedWAL[T]) Count() uint64 {
120127
if w.firstIdx == 0 {
121128
return 0

sei-tendermint/internal/autobahn/consensus/persist/wal_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,28 @@ func TestIndexedWAL_Reset(t *testing.T) {
206206
require.NoError(t, iw2.Close())
207207
}
208208

209+
func TestIndexedWAL_ReadAllDetectsStaleNextIdx(t *testing.T) {
210+
dir := t.TempDir()
211+
iw, err := openIndexedWAL(dir, stringCodec{})
212+
require.NoError(t, err)
213+
214+
require.NoError(t, iw.Write("a"))
215+
require.NoError(t, iw.Write("b"))
216+
require.Equal(t, uint64(2), iw.Count())
217+
218+
// Simulate stale internal state: advance nextIdx so Count() reports more
219+
// entries than the WAL actually contains. ReadAll must return an error
220+
// (either from Replay failing to read the missing entry, or from the
221+
// post-replay count check).
222+
iw.nextIdx++
223+
224+
_, err = iw.ReadAll()
225+
require.Error(t, err)
226+
227+
iw.nextIdx--
228+
require.NoError(t, iw.Close())
229+
}
230+
209231
func TestIndexedWAL_WriteAfterTruncate(t *testing.T) {
210232
dir := t.TempDir()
211233

0 commit comments

Comments
 (0)