From f124bd21e383565ef6644a4d4080a0a3b00a16e8 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 20:39:25 -0400 Subject: [PATCH 01/27] parquet v2 refactor coordinator thread - step 1 --- .../ledger_db/receipt/parquet_v2/api_stub.go | 94 ++++++++++++ .../receipt/parquet_v2/coordinator.go | 56 +++++++ .../ledger_db/receipt/parquet_v2/handlers.go | 100 ++++++++++++ sei-db/ledger_db/receipt/parquet_v2/prune.go | 7 + sei-db/ledger_db/receipt/parquet_v2/reader.go | 60 ++++++++ .../ledger_db/receipt/parquet_v2/requests.go | 144 ++++++++++++++++++ sei-db/ledger_db/receipt/parquet_v2/store.go | 21 +++ sei-db/ledger_db/receipt/parquet_v2/types.go | 36 +++++ sei-db/ledger_db/receipt/parquet_v2/wal.go | 13 ++ 9 files changed, 531 insertions(+) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/api_stub.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/handlers.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/prune.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/reader.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/requests.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/types.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/wal.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/api_stub.go b/sei-db/ledger_db/receipt/parquet_v2/api_stub.go new file mode 100644 index 0000000000..67a9c543c9 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/api_stub.go @@ -0,0 +1,94 @@ +package parquet_v2 + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +func (s *Store) WriteReceipts(inputs []parquet.ReceiptInput) error { + _ = inputs + return ErrNotImplemented +} + +func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { + _ = ctx + _ = txHash + return nil, ErrNotImplemented +} + +func (s *Store) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { + _ = ctx + _ = txHash + _ = blockNumber + return nil, ErrNotImplemented +} + +func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { + _ = ctx + _ = filter + return nil, ErrNotImplemented +} + +func (s *Store) ObserveEmptyBlock(height uint64) error { + _ = height + return ErrNotImplemented +} + +func (s *Store) IsRotationBoundary(blockNumber uint64) bool { + _ = blockNumber + return false +} + +func (s *Store) FileStartBlock() uint64 { + return 0 +} + +func (s *Store) LatestVersion() int64 { + return 0 +} + +func (s *Store) SetLatestVersion(version int64) { + _ = version +} + +func (s *Store) SetEarliestVersion(version int64) { + _ = version +} + +func (s *Store) UpdateLatestVersion(version int64) { + _ = version +} + +func (s *Store) CacheRotateInterval() uint64 { + return 0 +} + +func (s *Store) Flush() error { + return ErrNotImplemented +} + +func (s *Store) Close() error { + return ErrNotImplemented +} + +func (s *Store) SimulateCrash() { +} + +func (s *Store) SetBlockFlushInterval(interval uint64) { + _ = interval +} + +func (s *Store) SetMaxBlocksPerFile(n uint64) { + _ = n +} + +func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { + _ = hooks +} + +func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { + _ = converter + return ReplayResult{}, ErrNotImplemented +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator.go new file mode 100644 index 0000000000..e98c34811e --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator.go @@ -0,0 +1,56 @@ +package parquet_v2 + +import ( + "os" + "time" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + dbwal "github.com/sei-protocol/sei-chain/sei-db/wal" +) + +type closedFile struct { + startBlock uint64 + receiptPath string + logPath string +} + +type coordinator struct { + requests chan coordRequest + pruneTick <-chan time.Time + pruneTicker *time.Ticker + done chan struct{} + + config parquet.StoreConfig + + basePath string + fileStartBlock uint64 + receiptWriter *parquetgo.GenericWriter[parquet.ReceiptRecord] + logWriter *parquetgo.GenericWriter[parquet.LogRecord] + receiptFile *os.File + logFile *os.File + closedFiles []closedFile + + receiptsBuffer []parquet.ReceiptRecord + logsBuffer []parquet.LogRecord + lastSeenBlock uint64 + blocksSinceFlush uint64 + + tempWriteCache map[common.Hash]tempReceipt + + latestVersion int64 + earliestVersion int64 + + replayedWarmup []parquet.ReceiptRecord + replayedBlocks []ReplayedBlock + + faultHooks *parquet.FaultHooks + + wal dbwal.GenericWAL[parquet.WALEntry] + reader *Reader +} + +func (c *coordinator) run() { + _ = c +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go new file mode 100644 index 0000000000..ec2e4fae77 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -0,0 +1,100 @@ +package parquet_v2 + +func (c *coordinator) handleWrite(req writeReq) { + _ = c + req.resp <- writeResp{err: ErrNotImplemented} +} + +func (c *coordinator) handleReadByTxHash(req readByTxHashReq) { + _ = c + req.resp <- readReceiptResp{err: ErrNotImplemented} +} + +func (c *coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { + _ = c + req.resp <- readReceiptResp{err: ErrNotImplemented} +} + +func (c *coordinator) handleGetLogs(req getLogsReq) { + _ = c + req.resp <- getLogsResp{err: ErrNotImplemented} +} + +func (c *coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleFlush(req flushReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleLatestVersion(req latestVersionReq) { + _ = c + req.resp <- 0 +} + +func (c *coordinator) handleSetLatestVersion(req setLatestVersionReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleSetEarliestVersion(req setEarliestVersionReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleCacheRotateInterval(req cacheRotateIntervalReq) { + _ = c + req.resp <- 0 +} + +func (c *coordinator) handleFileStartBlock(req fileStartBlockReq) { + _ = c + req.resp <- 0 +} + +func (c *coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { + _ = c + req.resp <- false +} + +func (c *coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleSetFaultHooks(req setFaultHooksReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleReplayWAL(req replayWALReq) { + _ = c + req.resp <- replayWALResp{err: ErrNotImplemented} +} + +func (c *coordinator) handlePruneTick() { + _ = c +} + +func (c *coordinator) handleClose(req closeReq) { + _ = c + req.resp <- ErrNotImplemented +} + +func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { + _ = c + req.resp <- struct{}{} +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/prune.go b/sei-db/ledger_db/receipt/parquet_v2/prune.go new file mode 100644 index 0000000000..66440c0b2e --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/prune.go @@ -0,0 +1,7 @@ +package parquet_v2 + +func (c *coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { + _ = c + _ = pruneBeforeBlock + return 0 +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/reader.go b/sei-db/ledger_db/receipt/parquet_v2/reader.go new file mode 100644 index 0000000000..3746a4e9ea --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/reader.go @@ -0,0 +1,60 @@ +package parquet_v2 + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +// Reader is the V2 DuckDB query helper. It intentionally owns no file-list +// state; callers pass explicit file snapshots to each query. +type Reader struct{} + +func NewReader(basePath string) (*Reader, error) { + _ = basePath + return &Reader{}, nil +} + +func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*Reader, error) { + _ = basePath + _ = maxBlocksPerFile + return &Reader{}, nil +} + +func (r *Reader) Close() error { + _ = r + return ErrNotImplemented +} + +func (r *Reader) QueryReceiptByTxHash(ctx context.Context, files []string, txHash common.Hash) (*parquet.ReceiptResult, error) { + _ = r + _ = ctx + _ = files + _ = txHash + return nil, ErrNotImplemented +} + +func (r *Reader) QueryReceiptByTxHashInBlock(ctx context.Context, files []string, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { + _ = r + _ = ctx + _ = files + _ = txHash + _ = blockNumber + return nil, ErrNotImplemented +} + +func (r *Reader) QueryLogs(ctx context.Context, files []string, filter parquet.LogFilter) ([]parquet.LogResult, error) { + _ = r + _ = ctx + _ = files + _ = filter + return nil, ErrNotImplemented +} + +func (r *Reader) MaxReceiptBlockNumber(ctx context.Context, files []string) (uint64, bool, error) { + _ = r + _ = ctx + _ = files + return 0, false, ErrNotImplemented +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/requests.go b/sei-db/ledger_db/receipt/parquet_v2/requests.go new file mode 100644 index 0000000000..60df707e77 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/requests.go @@ -0,0 +1,144 @@ +package parquet_v2 + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +type coordRequest interface { + isCoordRequest() +} + +type writeReq struct { + inputs []parquet.ReceiptInput + resp chan writeResp +} + +type writeResp struct { + err error +} + +type readByTxHashReq struct { + ctx context.Context + txHash common.Hash + resp chan readReceiptResp +} + +type readByTxHashInBlockReq struct { + ctx context.Context + txHash common.Hash + blockNumber uint64 + resp chan readReceiptResp +} + +type readReceiptResp struct { + result *parquet.ReceiptResult + err error +} + +type getLogsReq struct { + ctx context.Context + filter parquet.LogFilter + resp chan getLogsResp +} + +type getLogsResp struct { + results []parquet.LogResult + err error +} + +type observeEmptyBlockReq struct { + height uint64 + resp chan error +} + +type flushReq struct { + resp chan error +} + +type latestVersionReq struct { + resp chan int64 +} + +type setLatestVersionReq struct { + version int64 + resp chan error +} + +type setEarliestVersionReq struct { + version int64 + resp chan error +} + +type updateLatestVersionReq struct { + version int64 + resp chan error +} + +type cacheRotateIntervalReq struct { + resp chan uint64 +} + +type fileStartBlockReq struct { + resp chan uint64 +} + +type isRotationBoundaryReq struct { + blockNumber uint64 + resp chan bool +} + +type setBlockFlushIntervalReq struct { + interval uint64 + resp chan error +} + +type setMaxBlocksPerFileReq struct { + interval uint64 + resp chan error +} + +type setFaultHooksReq struct { + hooks *parquet.FaultHooks + resp chan error +} + +type replayWALReq struct { + converter WALReceiptConverter + resp chan replayWALResp +} + +type replayWALResp struct { + result ReplayResult + err error +} + +type simulateCrashReq struct { + resp chan struct{} +} + +type closeReq struct { + resp chan error +} + +func (writeReq) isCoordRequest() {} +func (readByTxHashReq) isCoordRequest() {} +func (readByTxHashInBlockReq) isCoordRequest() {} +func (getLogsReq) isCoordRequest() {} +func (observeEmptyBlockReq) isCoordRequest() {} +func (flushReq) isCoordRequest() {} +func (latestVersionReq) isCoordRequest() {} +func (setLatestVersionReq) isCoordRequest() {} +func (setEarliestVersionReq) isCoordRequest() {} +func (updateLatestVersionReq) isCoordRequest() {} +func (cacheRotateIntervalReq) isCoordRequest() {} +func (fileStartBlockReq) isCoordRequest() {} +func (isRotationBoundaryReq) isCoordRequest() {} +func (setBlockFlushIntervalReq) isCoordRequest() {} +func (setMaxBlocksPerFileReq) isCoordRequest() {} +func (setFaultHooksReq) isCoordRequest() {} +func (replayWALReq) isCoordRequest() {} +func (simulateCrashReq) isCoordRequest() {} +func (closeReq) isCoordRequest() {} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go new file mode 100644 index 0000000000..d2b1d3e09d --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -0,0 +1,21 @@ +package parquet_v2 + +import ( + "sync" + + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +// Store is the V2 parquet receipt store facade. In the finished implementation +// it will hold only channels into the coordinator goroutine. +type Store struct { + requests chan coordRequest + done chan struct{} + closeOnce sync.Once +} + +// NewStore creates a non-functional Step 1 V2 store scaffold. +func NewStore(cfg parquet.StoreConfig) (*Store, error) { + _ = cfg + return &Store{}, nil +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go new file mode 100644 index 0000000000..b35e71877a --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -0,0 +1,36 @@ +package parquet_v2 + +import ( + "errors" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +// ErrNotImplemented marks Step 1 scaffold methods that are intentionally +// non-functional until the coordinator handlers are implemented. +var ErrNotImplemented = errors.New("not implemented") + +type tempReceipt struct { + blockNumber uint64 + receiptBytes []byte +} + +type ReplayedBlock struct { + BlockNumber uint64 + TxHashes []common.Hash +} + +type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) + +type ReplayReceipt struct { + Input parquet.ReceiptInput + TxHash common.Hash + Warmup parquet.ReceiptRecord + LogCount uint +} + +type ReplayResult struct { + WarmupRecords []parquet.ReceiptRecord + Blocks []ReplayedBlock +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/wal.go b/sei-db/ledger_db/receipt/parquet_v2/wal.go new file mode 100644 index 0000000000..2097c0bf96 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/wal.go @@ -0,0 +1,13 @@ +package parquet_v2 + +func (c *coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { + _ = c + _ = converter + return ReplayResult{}, ErrNotImplemented +} + +func truncateReplayWAL(w interface{ TruncateBefore(offset uint64) error }, dropOffset uint64) error { + _ = w + _ = dropOffset + return ErrNotImplemented +} From 8927929a339b08b719de97674ee72939e093f469 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:24:13 -0400 Subject: [PATCH 02/27] Coordinator goroutine + dispatch shape - step 2 --- .../ledger_db/receipt/parquet_v2/api_stub.go | 116 +++++++++++++----- .../receipt/parquet_v2/coordinator.go | 65 +++++++++- .../ledger_db/receipt/parquet_v2/handlers.go | 65 ++++++---- sei-db/ledger_db/receipt/parquet_v2/reader.go | 22 +++- .../ledger_db/receipt/parquet_v2/requests.go | 4 +- sei-db/ledger_db/receipt/parquet_v2/store.go | 78 +++++++++++- sei-db/ledger_db/receipt/parquet_v2/types.go | 9 +- 7 files changed, 293 insertions(+), 66 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/api_stub.go b/sei-db/ledger_db/receipt/parquet_v2/api_stub.go index 67a9c543c9..9b7b0b1d0a 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/api_stub.go +++ b/sei-db/ledger_db/receipt/parquet_v2/api_stub.go @@ -8,87 +8,145 @@ import ( ) func (s *Store) WriteReceipts(inputs []parquet.ReceiptInput) error { - _ = inputs - return ErrNotImplemented + resp := make(chan writeResp, 1) + r, err := awaitResponse(s, writeReq{inputs: inputs, resp: resp}, resp) + if err != nil { + return err + } + return r.err } func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { - _ = ctx - _ = txHash - return nil, ErrNotImplemented + resp := make(chan readReceiptResp, 1) + r, err := awaitResponse(s, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.result, r.err } func (s *Store) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { - _ = ctx - _ = txHash - _ = blockNumber - return nil, ErrNotImplemented + resp := make(chan readReceiptResp, 1) + r, err := awaitResponse(s, readByTxHashInBlockReq{ + ctx: ctx, + txHash: txHash, + blockNumber: blockNumber, + resp: resp, + }, resp) + if err != nil { + return nil, err + } + return r.result, r.err } func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { - _ = ctx - _ = filter - return nil, ErrNotImplemented + resp := make(chan getLogsResp, 1) + r, err := awaitResponse(s, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.results, r.err } func (s *Store) ObserveEmptyBlock(height uint64) error { - _ = height - return ErrNotImplemented + resp := make(chan error, 1) + return awaitError(s, observeEmptyBlockReq{height: height, resp: resp}, resp) } func (s *Store) IsRotationBoundary(blockNumber uint64) bool { - _ = blockNumber - return false + resp := make(chan bool, 1) + r, err := awaitResponse(s, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) + if err != nil { + return false + } + return r } func (s *Store) FileStartBlock() uint64 { - return 0 + resp := make(chan uint64, 1) + r, err := awaitResponse(s, fileStartBlockReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r } func (s *Store) LatestVersion() int64 { - return 0 + resp := make(chan int64, 1) + r, err := awaitResponse(s, latestVersionReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r } func (s *Store) SetLatestVersion(version int64) { - _ = version + resp := make(chan error, 1) + _ = awaitError(s, setLatestVersionReq{version: version, resp: resp}, resp) } func (s *Store) SetEarliestVersion(version int64) { - _ = version + resp := make(chan error, 1) + _ = awaitError(s, setEarliestVersionReq{version: version, resp: resp}, resp) } func (s *Store) UpdateLatestVersion(version int64) { - _ = version + resp := make(chan error, 1) + _ = awaitError(s, updateLatestVersionReq{version: version, resp: resp}, resp) } func (s *Store) CacheRotateInterval() uint64 { - return 0 + resp := make(chan uint64, 1) + r, err := awaitResponse(s, cacheRotateIntervalReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r } func (s *Store) Flush() error { - return ErrNotImplemented + resp := make(chan error, 1) + return awaitError(s, flushReq{resp: resp}, resp) } func (s *Store) Close() error { - return ErrNotImplemented + var err error + s.closeOnce.Do(func() { + resp := make(chan error, 1) + err = awaitError(s, closeReq{resp: resp}, resp) + close(s.done) + }) + return err } func (s *Store) SimulateCrash() { + s.closeOnce.Do(func() { + resp := make(chan struct{}, 1) + _, _ = awaitResponse(s, simulateCrashReq{resp: resp}, resp) + close(s.done) + }) } func (s *Store) SetBlockFlushInterval(interval uint64) { - _ = interval + resp := make(chan error, 1) + _ = awaitError(s, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) } func (s *Store) SetMaxBlocksPerFile(n uint64) { - _ = n + resp := make(chan error, 1) + _ = awaitError(s, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) } func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { - _ = hooks + resp := make(chan error, 1) + _ = awaitError(s, setFaultHooksReq{hooks: hooks, resp: resp}, resp) } func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { - _ = converter - return ReplayResult{}, ErrNotImplemented + resp := make(chan replayWALResp, 1) + r, err := awaitResponse(s, replayWALReq{converter: converter, resp: resp}, resp) + if err != nil { + return ReplayResult{}, err + } + return r.result, r.err } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator.go index e98c34811e..722e8c7831 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator.go @@ -36,8 +36,9 @@ type coordinator struct { logsBuffer []parquet.LogRecord lastSeenBlock uint64 blocksSinceFlush uint64 + nextWriteOrdinal uint64 - tempWriteCache map[common.Hash]tempReceipt + tempWriteCache map[common.Hash][]tempReceipt latestVersion int64 earliestVersion int64 @@ -52,5 +53,65 @@ type coordinator struct { } func (c *coordinator) run() { - _ = c + for { + select { + case req := <-c.requests: + switch r := req.(type) { + case writeReq: + c.handleWrite(r) + case readByTxHashReq: + c.handleReadByTxHash(r) + case readByTxHashInBlockReq: + c.handleReadByTxHashInBlock(r) + case getLogsReq: + c.handleGetLogs(r) + case observeEmptyBlockReq: + c.handleObserveEmptyBlock(r) + case flushReq: + c.handleFlush(r) + case latestVersionReq: + c.handleLatestVersion(r) + case setLatestVersionReq: + c.handleSetLatestVersion(r) + case setEarliestVersionReq: + c.handleSetEarliestVersion(r) + case updateLatestVersionReq: + c.handleUpdateLatestVersion(r) + case cacheRotateIntervalReq: + c.handleCacheRotateInterval(r) + case fileStartBlockReq: + c.handleFileStartBlock(r) + case isRotationBoundaryReq: + c.handleIsRotationBoundary(r) + case setBlockFlushIntervalReq: + c.handleSetBlockFlushInterval(r) + case setMaxBlocksPerFileReq: + c.handleSetMaxBlocksPerFile(r) + case setFaultHooksReq: + c.handleSetFaultHooks(r) + case replayWALReq: + c.handleReplayWAL(r) + case simulateCrashReq: + c.handleSimulateCrash(r) + return + case closeReq: + c.handleClose(r) + return + } + case <-c.pruneTick: + c.handlePruneTick() + case <-c.done: + c.stopPruneTicker() + return + } + } +} + +func (c *coordinator) stopPruneTicker() { + if c.pruneTicker == nil { + return + } + c.pruneTicker.Stop() + c.pruneTicker = nil + c.pruneTick = nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index ec2e4fae77..ea1e86cb56 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -31,53 +31,58 @@ func (c *coordinator) handleFlush(req flushReq) { } func (c *coordinator) handleLatestVersion(req latestVersionReq) { - _ = c - req.resp <- 0 + req.resp <- c.latestVersion } func (c *coordinator) handleSetLatestVersion(req setLatestVersionReq) { - _ = c - req.resp <- ErrNotImplemented + c.latestVersion = req.version + req.resp <- nil } func (c *coordinator) handleSetEarliestVersion(req setEarliestVersionReq) { - _ = c - req.resp <- ErrNotImplemented + c.earliestVersion = req.version + req.resp <- nil } func (c *coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { - _ = c - req.resp <- ErrNotImplemented + if req.version > c.latestVersion { + c.latestVersion = req.version + } + req.resp <- nil } func (c *coordinator) handleCacheRotateInterval(req cacheRotateIntervalReq) { - _ = c - req.resp <- 0 + req.resp <- c.config.MaxBlocksPerFile } func (c *coordinator) handleFileStartBlock(req fileStartBlockReq) { - _ = c - req.resp <- 0 + req.resp <- c.fileStartBlock } func (c *coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { - _ = c - req.resp <- false + if c.config.MaxBlocksPerFile == 0 { + req.resp <- false + return + } + req.resp <- req.blockNumber%c.config.MaxBlocksPerFile == 0 } func (c *coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { - _ = c - req.resp <- ErrNotImplemented + c.config.BlockFlushInterval = req.interval + req.resp <- nil } func (c *coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { - _ = c - req.resp <- ErrNotImplemented + c.config.MaxBlocksPerFile = req.maxBlocksPerFile + if c.reader != nil { + c.reader.setMaxBlocksPerFile(req.maxBlocksPerFile) + } + req.resp <- nil } func (c *coordinator) handleSetFaultHooks(req setFaultHooksReq) { - _ = c - req.resp <- ErrNotImplemented + c.faultHooks = req.hooks + req.resp <- nil } func (c *coordinator) handleReplayWAL(req replayWALReq) { @@ -90,11 +95,27 @@ func (c *coordinator) handlePruneTick() { } func (c *coordinator) handleClose(req closeReq) { - _ = c + c.stopPruneTicker() req.resp <- ErrNotImplemented } func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { - _ = c + c.stopPruneTicker() + if c.receiptFile != nil { + _ = c.receiptFile.Close() + c.receiptFile = nil + } + if c.logFile != nil { + _ = c.logFile.Close() + c.logFile = nil + } + c.receiptWriter = nil + c.logWriter = nil + if c.wal != nil { + _ = c.wal.Close() + } + if c.reader != nil { + _ = c.reader.Close() + } req.resp <- struct{}{} } diff --git a/sei-db/ledger_db/receipt/parquet_v2/reader.go b/sei-db/ledger_db/receipt/parquet_v2/reader.go index 3746a4e9ea..8a53878f52 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/reader.go @@ -9,17 +9,27 @@ import ( // Reader is the V2 DuckDB query helper. It intentionally owns no file-list // state; callers pass explicit file snapshots to each query. -type Reader struct{} +type Reader struct { + basePath string + maxBlocksPerFile uint64 +} func NewReader(basePath string) (*Reader, error) { - _ = basePath - return &Reader{}, nil + return NewReaderWithMaxBlocksPerFile(basePath, parquet.DefaultStoreConfig().MaxBlocksPerFile) } func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*Reader, error) { - _ = basePath - _ = maxBlocksPerFile - return &Reader{}, nil + if maxBlocksPerFile == 0 { + maxBlocksPerFile = parquet.DefaultStoreConfig().MaxBlocksPerFile + } + return &Reader{ + basePath: basePath, + maxBlocksPerFile: maxBlocksPerFile, + }, nil +} + +func (r *Reader) setMaxBlocksPerFile(maxBlocksPerFile uint64) { + r.maxBlocksPerFile = maxBlocksPerFile } func (r *Reader) Close() error { diff --git a/sei-db/ledger_db/receipt/parquet_v2/requests.go b/sei-db/ledger_db/receipt/parquet_v2/requests.go index 60df707e77..16cd3d30e2 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/requests.go @@ -96,8 +96,8 @@ type setBlockFlushIntervalReq struct { } type setMaxBlocksPerFileReq struct { - interval uint64 - resp chan error + maxBlocksPerFile uint64 + resp chan error } type setFaultHooksReq struct { diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index d2b1d3e09d..4c69db2565 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -3,6 +3,7 @@ package parquet_v2 import ( "sync" + "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) @@ -14,8 +15,79 @@ type Store struct { closeOnce sync.Once } -// NewStore creates a non-functional Step 1 V2 store scaffold. +// NewStore creates a V2 store with a live coordinator goroutine and +// stubbed request handlers. func NewStore(cfg parquet.StoreConfig) (*Store, error) { - _ = cfg - return &Store{}, nil + storeCfg := resolveStoreConfig(cfg) + + requests := make(chan coordRequest) + done := make(chan struct{}) + reader, err := NewReaderWithMaxBlocksPerFile(cfg.DBDirectory, storeCfg.MaxBlocksPerFile) + if err != nil { + return nil, err + } + + c := &coordinator{ + requests: requests, + done: done, + config: storeCfg, + basePath: cfg.DBDirectory, + receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), + logsBuffer: make([]parquet.LogRecord, 0, 10000), + tempWriteCache: make(map[common.Hash][]tempReceipt), + reader: reader, + latestVersion: 0, + earliestVersion: 0, + } + + s := &Store{ + requests: requests, + done: done, + } + + go c.run() + + return s, nil +} + +func resolveStoreConfig(cfg parquet.StoreConfig) parquet.StoreConfig { + resolved := parquet.DefaultStoreConfig() + resolved.DBDirectory = cfg.DBDirectory + resolved.KeepRecent = cfg.KeepRecent + resolved.PruneIntervalSeconds = cfg.PruneIntervalSeconds + if cfg.TxIndexBackend != "" { + resolved.TxIndexBackend = cfg.TxIndexBackend + } + if cfg.BlockFlushInterval > 0 { + resolved.BlockFlushInterval = cfg.BlockFlushInterval + } + if cfg.MaxBlocksPerFile > 0 { + resolved.MaxBlocksPerFile = cfg.MaxBlocksPerFile + } + return resolved +} + +func awaitResponse[T any](s *Store, req coordRequest, resp <-chan T) (T, error) { + var zero T + + select { + case s.requests <- req: + case <-s.done: + return zero, ErrStoreClosed + } + + select { + case r := <-resp: + return r, nil + case <-s.done: + return zero, ErrStoreClosed + } +} + +func awaitError(s *Store, req coordRequest, resp <-chan error) error { + err, waitErr := awaitResponse(s, req, resp) + if waitErr != nil { + return waitErr + } + return err } diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index b35e71877a..f4f772a3da 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -7,12 +7,17 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) -// ErrNotImplemented marks Step 1 scaffold methods that are intentionally -// non-functional until the coordinator handlers are implemented. +// ErrNotImplemented marks methods that are intentionally non-functional until +// their coordinator handlers are implemented. var ErrNotImplemented = errors.New("not implemented") +// ErrStoreClosed is returned when a request is made after the coordinator has +// stopped accepting work. +var ErrStoreClosed = errors.New("store closed") + type tempReceipt struct { blockNumber uint64 + writeOrdinal uint64 receiptBytes []byte } From d6daa29442f80eb993d3c3b91b0cdbc263490480 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:39:22 -0400 Subject: [PATCH 03/27] Initialization, file open, WAL setup -- step 3 --- sei-db/ledger_db/receipt/parquet_v2/files.go | 88 +++++++++ .../ledger_db/receipt/parquet_v2/handlers.go | 113 ++++++++++- sei-db/ledger_db/receipt/parquet_v2/reader.go | 107 +++++++++- sei-db/ledger_db/receipt/parquet_v2/store.go | 58 ++++++ .../receipt/parquet_v2/store_dispatch_test.go | 186 ++++++++++++++++++ .../receipt/parquet_v2/store_init_test.go | 171 ++++++++++++++++ sei-db/ledger_db/receipt/parquet_v2/types.go | 9 + 7 files changed, 723 insertions(+), 9 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/files.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_init_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/files.go b/sei-db/ledger_db/receipt/parquet_v2/files.go new file mode 100644 index 0000000000..6095f355c1 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/files.go @@ -0,0 +1,88 @@ +package parquet_v2 + +import ( + "fmt" + "os" + "path/filepath" + "sort" + + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +func scanClosedFiles(basePath string, reader *Reader) ([]closedFile, error) { + receiptFiles, err := parquetFilesByPrefix(basePath, "receipts") + if err != nil { + return nil, err + } + logFiles, err := parquetFilesByPrefix(basePath, "logs") + if err != nil { + return nil, err + } + + receiptFiles = validateAndCleanFiles(basePath, reader, receiptFiles, "logs") + logFiles = validateAndCleanFiles(basePath, reader, logFiles, "receipts") + + logByStart := make(map[uint64]string, len(logFiles)) + for _, path := range logFiles { + if fileExists(path) { + logByStart[parquet.ExtractBlockNumber(path)] = path + } + } + + closed := make([]closedFile, 0, len(receiptFiles)) + for _, receiptPath := range receiptFiles { + if !fileExists(receiptPath) { + continue + } + startBlock := parquet.ExtractBlockNumber(receiptPath) + logPath, ok := logByStart[startBlock] + if !ok { + continue + } + closed = append(closed, closedFile{ + startBlock: startBlock, + receiptPath: receiptPath, + logPath: logPath, + }) + } + + sort.Slice(closed, func(i, j int) bool { + return closed[i].startBlock < closed[j].startBlock + }) + return closed, nil +} + +func parquetFilesByPrefix(basePath, prefix string) ([]string, error) { + pattern := filepath.Join(basePath, prefix+"_*.parquet") + files, err := filepath.Glob(pattern) + if err != nil { + return nil, fmt.Errorf("failed to glob %s parquet files with pattern %q: %w", prefix, pattern, err) + } + return files, nil +} + +func validateAndCleanFiles(basePath string, reader *Reader, files []string, counterpartPrefix string) []string { + if len(files) == 0 { + return nil + } + + sort.Slice(files, func(i, j int) bool { + return parquet.ExtractBlockNumber(files[i]) < parquet.ExtractBlockNumber(files[j]) + }) + + lastFile := files[len(files)-1] + if reader.isFileReadable(lastFile) { + return files + } + + startBlock := parquet.ExtractBlockNumber(lastFile) + _ = os.Remove(lastFile) + counterpart := filepath.Join(basePath, fmt.Sprintf("%s_%d.parquet", counterpartPrefix, startBlock)) + _ = os.Remove(counterpart) + return files[:len(files)-1] +} + +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index ea1e86cb56..ced81e5b63 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -1,5 +1,7 @@ package parquet_v2 +import "fmt" + func (c *coordinator) handleWrite(req writeReq) { _ = c req.resp <- writeResp{err: ErrNotImplemented} @@ -26,8 +28,7 @@ func (c *coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { } func (c *coordinator) handleFlush(req flushReq) { - _ = c - req.resp <- ErrNotImplemented + req.resp <- c.flushOpenFile() } func (c *coordinator) handleLatestVersion(req latestVersionReq) { @@ -96,7 +97,29 @@ func (c *coordinator) handlePruneTick() { func (c *coordinator) handleClose(req closeReq) { c.stopPruneTicker() - req.resp <- ErrNotImplemented + if err := c.flushOpenFile(); err != nil { + req.resp <- err + return + } + if err := c.closeWriters(); err != nil { + req.resp <- err + return + } + if c.wal != nil { + if err := c.wal.Close(); err != nil { + req.resp <- err + return + } + c.wal = nil + } + if c.reader != nil { + if err := c.reader.Close(); err != nil { + req.resp <- err + return + } + c.reader = nil + } + req.resp <- nil } func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { @@ -119,3 +142,87 @@ func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { } req.resp <- struct{}{} } + +func (c *coordinator) flushOpenFile() error { + if len(c.receiptsBuffer) == 0 { + return nil + } + if c.receiptWriter == nil { + return fmt.Errorf("cannot flush receipts: receipt writer is not initialized") + } + + if h := c.faultHooks; h != nil && h.BeforeFlush != nil { + if err := h.BeforeFlush(c.lastSeenBlock); err != nil { + return err + } + } + + if _, err := c.receiptWriter.Write(c.receiptsBuffer); err != nil { + return fmt.Errorf("failed to write receipts to parquet: %w", err) + } + if err := c.receiptWriter.Flush(); err != nil { + return fmt.Errorf("failed to flush receipt parquet writer: %w", err) + } + + if len(c.logsBuffer) > 0 { + if c.logWriter == nil { + return fmt.Errorf("cannot flush logs: log writer is not initialized") + } + if _, err := c.logWriter.Write(c.logsBuffer); err != nil { + return fmt.Errorf("failed to write logs to parquet: %w", err) + } + if err := c.logWriter.Flush(); err != nil { + return fmt.Errorf("failed to flush log parquet writer: %w", err) + } + } + + if h := c.faultHooks; h != nil && h.AfterFlush != nil { + if err := h.AfterFlush(c.lastSeenBlock); err != nil { + return err + } + } + + c.receiptsBuffer = c.receiptsBuffer[:0] + c.logsBuffer = c.logsBuffer[:0] + return nil +} + +func (c *coordinator) closeWriters() error { + var errs []error + + if c.receiptWriter != nil { + if err := c.receiptWriter.Close(); err != nil { + errs = append(errs, fmt.Errorf("receipt writer: %w", err)) + } + c.receiptWriter = nil + } + if c.logWriter != nil { + if err := c.logWriter.Close(); err != nil { + errs = append(errs, fmt.Errorf("log writer: %w", err)) + } + c.logWriter = nil + } + if c.receiptFile != nil { + if err := c.receiptFile.Sync(); err != nil { + errs = append(errs, fmt.Errorf("receipt file sync: %w", err)) + } + if err := c.receiptFile.Close(); err != nil { + errs = append(errs, fmt.Errorf("receipt file: %w", err)) + } + c.receiptFile = nil + } + if c.logFile != nil { + if err := c.logFile.Sync(); err != nil { + errs = append(errs, fmt.Errorf("log file sync: %w", err)) + } + if err := c.logFile.Close(); err != nil { + errs = append(errs, fmt.Errorf("log file: %w", err)) + } + c.logFile = nil + } + + if len(errs) > 0 { + return fmt.Errorf("close errors: %v", errs) + } + return nil +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/reader.go b/sei-db/ledger_db/receipt/parquet_v2/reader.go index 8a53878f52..378d1cf110 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/reader.go @@ -2,7 +2,12 @@ package parquet_v2 import ( "context" + "database/sql" + "fmt" + "runtime" + "strings" + "github.com/duckdb/duckdb-go/v2" "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) @@ -10,6 +15,7 @@ import ( // Reader is the V2 DuckDB query helper. It intentionally owns no file-list // state; callers pass explicit file snapshots to each query. type Reader struct { + db *sql.DB basePath string maxBlocksPerFile uint64 } @@ -22,7 +28,37 @@ func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*R if maxBlocksPerFile == 0 { maxBlocksPerFile = parquet.DefaultStoreConfig().MaxBlocksPerFile } + + connector, err := duckdb.NewConnector("", nil) + if err != nil { + return nil, fmt.Errorf("failed to create DuckDB connector: %w", err) + } + + db := sql.OpenDB(connector) + numCPU := runtime.NumCPU() + db.SetMaxOpenConns(numCPU * 2) + db.SetMaxIdleConns(numCPU) + + settings := []string{ + fmt.Sprintf("SET threads TO %d", numCPU), + "SET memory_limit = '1GB'", + "SET enable_object_cache = true", + "SET enable_progress_bar = false", + "SET preserve_insertion_order = false", + } + for _, statement := range settings { + if _, err = db.Exec(statement); err != nil { + _ = db.Close() + return nil, fmt.Errorf("failed to configure duckdb (%s): %w", statement, err) + } + } + if err = configureParquetMetadataCache(db); err != nil { + _ = db.Close() + return nil, err + } + return &Reader{ + db: db, basePath: basePath, maxBlocksPerFile: maxBlocksPerFile, }, nil @@ -33,8 +69,12 @@ func (r *Reader) setMaxBlocksPerFile(maxBlocksPerFile uint64) { } func (r *Reader) Close() error { - _ = r - return ErrNotImplemented + if r == nil || r.db == nil { + return nil + } + err := r.db.Close() + r.db = nil + return err } func (r *Reader) QueryReceiptByTxHash(ctx context.Context, files []string, txHash common.Hash) (*parquet.ReceiptResult, error) { @@ -63,8 +103,63 @@ func (r *Reader) QueryLogs(ctx context.Context, files []string, filter parquet.L } func (r *Reader) MaxReceiptBlockNumber(ctx context.Context, files []string) (uint64, bool, error) { - _ = r - _ = ctx - _ = files - return 0, false, ErrNotImplemented + if len(files) == 0 { + return 0, false, nil + } + + var parquetFiles string + if len(files) == 1 { + parquetFiles = quoteSQLString(files[0]) + } else { + parquetFiles = fmt.Sprintf("[%s]", joinQuoted(files)) + } + + // #nosec G201 -- parquetFiles derived from local file paths + query := fmt.Sprintf("SELECT MAX(block_number) FROM read_parquet(%s, union_by_name=true)", parquetFiles) + row := r.db.QueryRowContext(ctx, query) + var max sql.NullInt64 + if err := row.Scan(&max); err != nil { + return 0, false, fmt.Errorf("failed to query max block number: %w", err) + } + if !max.Valid { + return 0, false, nil + } + if max.Int64 < 0 { + return 0, false, fmt.Errorf("invalid negative block number: %d", max.Int64) + } + return uint64(max.Int64), true, nil +} + +func (r *Reader) isFileReadable(path string) bool { + // #nosec G201 -- path comes from local parquet file scans, not user input. + _, err := r.db.Exec(fmt.Sprintf("SELECT 1 FROM read_parquet(%s) LIMIT 1", quoteSQLString(path))) + return err == nil +} + +func configureParquetMetadataCache(db *sql.DB) error { + const sizeSetting = "SET parquet_metadata_cache_size = 500" + if _, err := db.Exec(sizeSetting); err == nil { + return nil + } else if !strings.Contains(err.Error(), "unrecognized configuration parameter") { + return fmt.Errorf("failed to configure duckdb (%s): %w", sizeSetting, err) + } + + const toggleSetting = "SET parquet_metadata_cache = true" + if _, err := db.Exec(toggleSetting); err != nil { + return fmt.Errorf("failed to configure duckdb (%s): %w", toggleSetting, err) + } + + return nil +} + +func joinQuoted(files []string) string { + quoted := make([]string, len(files)) + for i, f := range files { + quoted[i] = quoteSQLString(f) + } + return strings.Join(quoted, ", ") +} + +func quoteSQLString(s string) string { + return "'" + strings.ReplaceAll(s, "'", "''") + "'" } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index 4c69db2565..85fdf9cf4c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -1,7 +1,12 @@ package parquet_v2 import ( + "context" + "fmt" + "os" + "path/filepath" "sync" + "time" "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" @@ -20,32 +25,85 @@ type Store struct { func NewStore(cfg parquet.StoreConfig) (*Store, error) { storeCfg := resolveStoreConfig(cfg) + if err := os.MkdirAll(storeCfg.DBDirectory, 0o750); err != nil { + return nil, fmt.Errorf("failed to create parquet base directory: %w", err) + } + requests := make(chan coordRequest) done := make(chan struct{}) reader, err := NewReaderWithMaxBlocksPerFile(cfg.DBDirectory, storeCfg.MaxBlocksPerFile) if err != nil { return nil, err } + cleanupReader := true + defer func() { + if cleanupReader { + _ = reader.Close() + } + }() + + walDir := filepath.Join(storeCfg.DBDirectory, "parquet-wal") + receiptWAL, err := parquet.NewWAL(walDir) + if err != nil { + return nil, err + } + cleanupWAL := true + defer func() { + if cleanupWAL { + _ = receiptWAL.Close() + } + }() + + closedFiles, err := scanClosedFiles(storeCfg.DBDirectory, reader) + if err != nil { + return nil, err + } c := &coordinator{ requests: requests, done: done, config: storeCfg, basePath: cfg.DBDirectory, + closedFiles: closedFiles, receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), logsBuffer: make([]parquet.LogRecord, 0, 10000), tempWriteCache: make(map[common.Hash][]tempReceipt), reader: reader, + wal: receiptWAL, latestVersion: 0, earliestVersion: 0, } + receiptFiles := make([]string, 0, len(closedFiles)) + for _, f := range closedFiles { + receiptFiles = append(receiptFiles, f.receiptPath) + } + if maxBlock, ok, err := reader.MaxReceiptBlockNumber(context.Background(), receiptFiles); err != nil { + return nil, err + } else if ok { + latest, err := int64FromUint64(maxBlock) + if err != nil { + return nil, err + } + c.latestVersion = latest + if maxBlock < ^uint64(0) { + c.fileStartBlock = maxBlock + 1 + } + } + + if storeCfg.KeepRecent > 0 && storeCfg.PruneIntervalSeconds > 0 { + c.pruneTicker = time.NewTicker(time.Duration(storeCfg.PruneIntervalSeconds) * time.Second) + c.pruneTick = c.pruneTicker.C + } + s := &Store{ requests: requests, done: done, } go c.run() + cleanupReader = false + cleanupWAL = false return s, nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go new file mode 100644 index 0000000000..6ae826669c --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -0,0 +1,186 @@ +package parquet_v2 + +import ( + "context" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func newDispatchStore(t *testing.T) *Store { + t.Helper() + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: t.TempDir(), + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + return store +} + +func TestUnimplementedOperationsDispatchThroughCoordinator(t *testing.T) { + ctx := context.Background() + txHash := common.HexToHash("0x1") + + tests := []struct { + name string + run func(*Store) error + }{ + { + name: "write receipts", + run: func(store *Store) error { + return store.WriteReceipts(nil) + }, + }, + { + name: "get receipt by tx hash", + run: func(store *Store) error { + _, err := store.GetReceiptByTxHash(ctx, txHash) + return err + }, + }, + { + name: "get receipt by tx hash in block", + run: func(store *Store) error { + _, err := store.GetReceiptByTxHashInBlock(ctx, txHash, 1) + return err + }, + }, + { + name: "get logs", + run: func(store *Store) error { + _, err := store.GetLogs(ctx, parquet.LogFilter{}) + return err + }, + }, + { + name: "observe empty block", + run: func(store *Store) error { + return store.ObserveEmptyBlock(1) + }, + }, + { + name: "replay WAL", + run: func(store *Store) error { + _, err := store.ReplayWAL(nil) + return err + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + store := newDispatchStore(t) + require.ErrorIs(t, tc.run(store), ErrNotImplemented) + }) + } +} + +func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { + store := newDispatchStore(t) + require.Zero(t, cap(store.requests)) + + require.Equal(t, uint64(0), store.FileStartBlock()) + require.Equal(t, int64(0), store.LatestVersion()) + require.Equal(t, uint64(4), store.CacheRotateInterval()) + require.True(t, store.IsRotationBoundary(8)) + require.False(t, store.IsRotationBoundary(9)) + + store.SetLatestVersion(10) + require.Equal(t, int64(10), store.LatestVersion()) + + store.UpdateLatestVersion(8) + require.Equal(t, int64(10), store.LatestVersion()) + + store.UpdateLatestVersion(12) + require.Equal(t, int64(12), store.LatestVersion()) + + store.SetEarliestVersion(3) + store.SetBlockFlushInterval(2) + store.SetFaultHooks(&parquet.FaultHooks{}) + + store.SetMaxBlocksPerFile(3) + require.Equal(t, uint64(3), store.CacheRotateInterval()) + require.True(t, store.IsRotationBoundary(6)) + require.False(t, store.IsRotationBoundary(8)) +} + +func TestSetMaxBlocksPerFileUpdatesReaderState(t *testing.T) { + reader, err := NewReaderWithMaxBlocksPerFile(t.TempDir(), 10) + require.NoError(t, err) + t.Cleanup(func() { _ = reader.Close() }) + + resp := make(chan error, 1) + coord := &coordinator{ + config: parquet.StoreConfig{ + MaxBlocksPerFile: 10, + }, + reader: reader, + } + + coord.handleSetMaxBlocksPerFile(setMaxBlocksPerFileReq{ + maxBlocksPerFile: 3, + resp: resp, + }) + + require.NoError(t, <-resp) + require.Equal(t, uint64(3), coord.config.MaxBlocksPerFile) + require.Equal(t, uint64(3), reader.maxBlocksPerFile) +} + +func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { + requests := make(chan coordRequest) + done := make(chan struct{}) + coord := &coordinator{ + requests: requests, + done: done, + } + store := &Store{ + requests: requests, + done: done, + } + go coord.run() + + require.Zero(t, cap(store.requests)) + + firstResp := make(chan writeResp) + store.requests <- writeReq{resp: firstResp} + time.Sleep(10 * time.Millisecond) + + secondDone := make(chan error, 1) + go func() { + secondDone <- store.Flush() + }() + + select { + case err := <-secondDone: + t.Fatalf("second request completed before first unblocked: %v", err) + case <-time.After(25 * time.Millisecond): + } + + require.ErrorIs(t, (<-firstResp).err, ErrNotImplemented) + require.NoError(t, <-secondDone) + require.NoError(t, store.Close()) +} + +func TestCloseStopsFutureRequests(t *testing.T) { + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) + require.NoError(t, err) + + require.NoError(t, store.Close()) + require.ErrorIs(t, store.WriteReceipts(nil), ErrStoreClosed) + require.NoError(t, store.Close()) +} + +func TestSimulateCrashStopsFutureRequests(t *testing.T) { + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) + require.NoError(t, err) + + store.SimulateCrash() + require.ErrorIs(t, store.WriteReceipts(nil), ErrStoreClosed) + require.NoError(t, store.Close()) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go new file mode 100644 index 0000000000..f4c012ba2a --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go @@ -0,0 +1,171 @@ +package parquet_v2 + +import ( + "fmt" + "math/big" + "os" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestNewStoreCreatesDirectoryAndClosesIdempotently(t *testing.T) { + dir := filepath.Join(t.TempDir(), "nested", "parquet") + + store, err := NewStore(parquet.StoreConfig{DBDirectory: dir}) + require.NoError(t, err) + require.DirExists(t, dir) + require.DirExists(t, filepath.Join(dir, "parquet-wal")) + + require.NoError(t, store.Flush()) + require.NoError(t, store.Close()) + require.NoError(t, store.Close()) +} + +func TestNewStoreSeedsLatestVersionFromClosedFiles(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 100, []uint64{101, 123}) + writeLogFile(t, dir, 100) + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 100, + }) + require.NoError(t, err) + require.Equal(t, int64(123), store.LatestVersion()) + require.Equal(t, uint64(124), store.FileStartBlock()) + require.NoError(t, store.Close()) + + reopened, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 100, + }) + require.NoError(t, err) + require.Equal(t, int64(123), reopened.LatestVersion()) + require.Equal(t, uint64(124), reopened.FileStartBlock()) + require.NoError(t, reopened.Close()) +} + +func TestNewStoreRemovesCorruptTrailingPair(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 0, []uint64{1}) + writeLogFile(t, dir, 0) + + corruptReceipt := filepath.Join(dir, "receipts_500.parquet") + require.NoError(t, os.WriteFile(corruptReceipt, []byte("not parquet"), 0o644)) + corruptLog := filepath.Join(dir, "logs_500.parquet") + require.NoError(t, os.WriteFile(corruptLog, []byte("not parquet"), 0o644)) + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + }) + require.NoError(t, err) + require.NoError(t, store.Close()) + + _, err = os.Stat(corruptReceipt) + require.True(t, os.IsNotExist(err), "corrupt receipt file should be deleted") + _, err = os.Stat(corruptLog) + require.True(t, os.IsNotExist(err), "corrupt log file should be deleted") +} + +func TestNewStoreRemovesReceiptCounterpartForCorruptTrailingLog(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 0, []uint64{1}) + writeLogFile(t, dir, 0) + writeReceiptFile(t, dir, 500, []uint64{501}) + + corruptLog := filepath.Join(dir, "logs_500.parquet") + require.NoError(t, os.WriteFile(corruptLog, []byte("not parquet"), 0o644)) + receiptCounterpart := filepath.Join(dir, "receipts_500.parquet") + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + }) + require.NoError(t, err) + require.Equal(t, int64(1), store.LatestVersion()) + require.NoError(t, store.Close()) + + _, err = os.Stat(receiptCounterpart) + require.True(t, os.IsNotExist(err), "receipt counterpart should be deleted") + _, err = os.Stat(corruptLog) + require.True(t, os.IsNotExist(err), "corrupt log file should be deleted") +} + +func TestNewStoreIgnoresUnmatchedFiles(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 0, []uint64{1}) + writeLogFile(t, dir, 500) + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + }) + require.NoError(t, err) + require.Equal(t, int64(0), store.LatestVersion()) + require.Equal(t, uint64(0), store.FileStartBlock()) + require.NoError(t, store.Close()) +} + +func TestScanClosedFilesSortsByStartBlock(t *testing.T) { + dir := t.TempDir() + for _, startBlock := range []uint64{1000, 0, 500} { + writeReceiptFile(t, dir, startBlock, []uint64{startBlock + 1}) + writeLogFile(t, dir, startBlock) + } + + reader, err := NewReaderWithMaxBlocksPerFile(dir, 500) + require.NoError(t, err) + t.Cleanup(func() { _ = reader.Close() }) + + closedFiles, err := scanClosedFiles(dir, reader) + require.NoError(t, err) + require.Len(t, closedFiles, 3) + require.Equal(t, uint64(0), closedFiles[0].startBlock) + require.Equal(t, uint64(500), closedFiles[1].startBlock) + require.Equal(t, uint64(1000), closedFiles[2].startBlock) +} + +func writeReceiptFile(t *testing.T, dir string, startBlock uint64, blocks []uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("receipts_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.ReceiptRecord](f) + for _, block := range blocks { + txHash := common.BigToHash(new(big.Int).SetUint64(block)) + _, err := w.Write([]parquet.ReceiptRecord{{ + TxHash: txHash[:], + BlockNumber: block, + ReceiptBytes: []byte{byte(block)}, + }}) + require.NoError(t, err) + } + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} + +func writeLogFile(t *testing.T, dir string, startBlock uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("logs_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.LogRecord](f) + txHash := common.BigToHash(new(big.Int).SetUint64(startBlock)) + _, err = w.Write([]parquet.LogRecord{{ + BlockNumber: startBlock, + TxHash: txHash[:], + }}) + require.NoError(t, err) + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index f4f772a3da..48ecb6691c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -2,6 +2,8 @@ package parquet_v2 import ( "errors" + "fmt" + "math" "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" @@ -39,3 +41,10 @@ type ReplayResult struct { WarmupRecords []parquet.ReceiptRecord Blocks []ReplayedBlock } + +func int64FromUint64(value uint64) (int64, error) { + if value > uint64(math.MaxInt64) { + return 0, fmt.Errorf("value %d overflows int64", value) + } + return int64(value), nil +} From f7ddab19c6d497cdd2787123e4a90d10d2ea14c3 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:51:58 -0400 Subject: [PATCH 04/27] Add parquet v2 write path --- .../ledger_db/receipt/parquet_v2/handlers.go | 166 ++++++++++++++++- .../receipt/parquet_v2/store_dispatch_test.go | 13 +- .../receipt/parquet_v2/store_write_test.go | 167 ++++++++++++++++++ 3 files changed, 335 insertions(+), 11 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_write_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index ced81e5b63..ab6cd6f985 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -1,10 +1,17 @@ package parquet_v2 -import "fmt" +import ( + "fmt" + "os" + "path/filepath" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) func (c *coordinator) handleWrite(req writeReq) { - _ = c - req.resp <- writeResp{err: ErrNotImplemented} + req.resp <- writeResp{err: c.writeReceipts(req.inputs)} } func (c *coordinator) handleReadByTxHash(req readByTxHashReq) { @@ -143,6 +150,159 @@ func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { req.resp <- struct{}{} } +func (c *coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { + if len(inputs) == 0 { + return nil + } + if c.wal == nil { + return fmt.Errorf("parquet WAL is not initialized") + } + + type blockBatch struct { + blockNumber uint64 + receipts [][]byte + inputs []parquet.ReceiptInput + } + var batches []blockBatch + batchIdx := make(map[uint64]int) + + for i := range inputs { + bn := inputs[i].BlockNumber + if idx, ok := batchIdx[bn]; ok { + batches[idx].receipts = append(batches[idx].receipts, inputs[i].ReceiptBytes) + batches[idx].inputs = append(batches[idx].inputs, inputs[i]) + continue + } + batchIdx[bn] = len(batches) + batches = append(batches, blockBatch{ + blockNumber: bn, + receipts: [][]byte{inputs[i].ReceiptBytes}, + inputs: []parquet.ReceiptInput{inputs[i]}, + }) + } + + maxBlock := inputs[0].BlockNumber + for _, b := range batches { + entry := parquet.WALEntry{ + BlockNumber: b.blockNumber, + Receipts: b.receipts, + } + if err := c.wal.Write(entry); err != nil { + return err + } + + if h := c.faultHooks; h != nil && h.AfterWALWrite != nil { + if err := h.AfterWALWrite(b.blockNumber); err != nil { + return err + } + } + + for i := range b.inputs { + if err := c.applyReceipt(b.inputs[i]); err != nil { + return err + } + if b.inputs[i].BlockNumber > maxBlock { + maxBlock = b.inputs[i].BlockNumber + } + } + } + + latest, err := int64FromUint64(maxBlock) + if err != nil { + return err + } + if latest > c.latestVersion { + c.latestVersion = latest + } + return nil +} + +func (c *coordinator) applyReceipt(input parquet.ReceiptInput) error { + if c.receiptWriter == nil { + aligned := alignedFileStartBlock(input.BlockNumber, c.config.MaxBlocksPerFile) + if aligned >= c.fileStartBlock { + c.fileStartBlock = aligned + } + if err := c.initWriters(); err != nil { + return err + } + } + + blockNumber := input.BlockNumber + if blockNumber != c.lastSeenBlock { + if c.lastSeenBlock != 0 { + c.blocksSinceFlush++ + } + c.lastSeenBlock = blockNumber + } + + c.receiptsBuffer = append(c.receiptsBuffer, input.Receipt) + if len(input.Logs) > 0 { + c.logsBuffer = append(c.logsBuffer, input.Logs...) + } + + txHash := common.BytesToHash(input.Receipt.TxHash) + c.tempWriteCache[txHash] = append(c.tempWriteCache[txHash], tempReceipt{ + blockNumber: input.BlockNumber, + writeOrdinal: c.nextWriteOrdinal, + receiptBytes: input.ReceiptBytes, + }) + c.nextWriteOrdinal++ + + if c.config.BlockFlushInterval > 0 && c.blocksSinceFlush >= c.config.BlockFlushInterval { + if err := c.flushOpenFile(); err != nil { + return err + } + c.blocksSinceFlush = 0 + } + + return nil +} + +func alignedFileStartBlock(blockNumber, maxBlocksPerFile uint64) uint64 { + if maxBlocksPerFile == 0 { + return blockNumber + } + return (blockNumber / maxBlocksPerFile) * maxBlocksPerFile +} + +func (c *coordinator) initWriters() error { + receiptPath := filepath.Join(c.basePath, fmt.Sprintf("receipts_%d.parquet", c.fileStartBlock)) + logPath := filepath.Join(c.basePath, fmt.Sprintf("logs_%d.parquet", c.fileStartBlock)) + + // #nosec G304 -- paths are constructed from configured base directory. + receiptFile, err := os.Create(receiptPath) + if err != nil { + return fmt.Errorf("failed to create receipt parquet file: %w", err) + } + + // #nosec G304 -- paths are constructed from configured base directory. + logFile, err := os.Create(logPath) + if err != nil { + if closeErr := receiptFile.Close(); closeErr != nil { + return fmt.Errorf("failed to create log parquet file: %w; close receipt file error: %v", err, closeErr) + } + return fmt.Errorf("failed to create log parquet file: %w", err) + } + + blockNumberSorting := parquetgo.SortingWriterConfig( + parquetgo.SortingColumns(parquetgo.Ascending("block_number")), + ) + + c.receiptFile = receiptFile + c.logFile = logFile + c.receiptWriter = parquetgo.NewGenericWriter[parquet.ReceiptRecord](receiptFile, + parquetgo.Compression(&parquetgo.Snappy), + blockNumberSorting, + ) + c.logWriter = parquetgo.NewGenericWriter[parquet.LogRecord](logFile, + parquetgo.Compression(&parquetgo.Snappy), + blockNumberSorting, + ) + + return nil +} + func (c *coordinator) flushOpenFile() error { if len(c.receiptsBuffer) == 0 { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index 6ae826669c..79d7dbed90 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -30,12 +30,6 @@ func TestUnimplementedOperationsDispatchThroughCoordinator(t *testing.T) { name string run func(*Store) error }{ - { - name: "write receipts", - run: func(store *Store) error { - return store.WriteReceipts(nil) - }, - }, { name: "get receipt by tx hash", run: func(store *Store) error { @@ -148,7 +142,10 @@ func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { require.Zero(t, cap(store.requests)) firstResp := make(chan writeResp) - store.requests <- writeReq{resp: firstResp} + store.requests <- writeReq{ + inputs: []parquet.ReceiptInput{testReceiptInput(1, common.HexToHash("0x1"))}, + resp: firstResp, + } time.Sleep(10 * time.Millisecond) secondDone := make(chan error, 1) @@ -162,7 +159,7 @@ func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { case <-time.After(25 * time.Millisecond): } - require.ErrorIs(t, (<-firstResp).err, ErrNotImplemented) + require.Error(t, (<-firstResp).err) require.NoError(t, <-secondDone) require.NoError(t, store.Close()) } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go new file mode 100644 index 0000000000..32aea5872e --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -0,0 +1,167 @@ +package parquet_v2 + +import ( + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { + dir := t.TempDir() + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + BlockFlushInterval: 100, + PruneIntervalSeconds: 0, + }) + require.NoError(t, err) + + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + testReceiptInput(2, common.HexToHash("0x2")), + testReceiptInput(3, common.HexToHash("0x3")), + })) + require.Equal(t, int64(3), store.LatestVersion()) + require.NoError(t, store.Close()) + + reopened, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + PruneIntervalSeconds: 0, + }) + require.NoError(t, err) + require.Equal(t, int64(3), reopened.LatestVersion()) + require.Equal(t, uint64(4), reopened.FileStartBlock()) + require.NoError(t, reopened.Close()) +} + +func TestWriteReceiptsGroupsWALByBlockEncounterOrder(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(2, common.HexToHash("0x22")), + testReceiptInput(1, common.HexToHash("0x11")), + testReceiptInput(2, common.HexToHash("0x23")), + })) + + require.Len(t, wal.entries, 2) + require.Equal(t, uint64(2), wal.entries[0].BlockNumber) + require.Len(t, wal.entries[0].Receipts, 2) + require.Equal(t, uint64(1), wal.entries[1].BlockNumber) + require.Len(t, wal.entries[1].Receipts, 1) +} + +func TestWriteReceiptsKeepsDuplicateHashCacheEntries(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + txHash := common.HexToHash("0xabc") + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, txHash), + testReceiptInput(2, txHash), + })) + + require.Len(t, coord.receiptsBuffer, 2) + require.Equal(t, int64(2), coord.latestVersion) + require.Len(t, coord.tempWriteCache[txHash], 2) + require.Equal(t, uint64(1), coord.tempWriteCache[txHash][0].blockNumber) + require.Equal(t, uint64(0), coord.tempWriteCache[txHash][0].writeOrdinal) + require.Equal(t, uint64(2), coord.tempWriteCache[txHash][1].blockNumber) + require.Equal(t, uint64(1), coord.tempWriteCache[txHash][1].writeOrdinal) +} + +func TestWriteReceiptsFlushesAtConfiguredBlockInterval(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.BlockFlushInterval = 1 + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + testReceiptInput(2, common.HexToHash("0x2")), + })) + + require.Empty(t, coord.receiptsBuffer) + require.Empty(t, coord.logsBuffer) + require.Zero(t, coord.blocksSinceFlush) + require.Equal(t, int64(2), coord.latestVersion) +} + +func newWriteCoordinator(t *testing.T, wal *recordingWAL) *coordinator { + t.Helper() + + cfg := parquet.DefaultStoreConfig() + cfg.DBDirectory = t.TempDir() + cfg.MaxBlocksPerFile = 500 + cfg.BlockFlushInterval = 0 + + return &coordinator{ + config: cfg, + basePath: cfg.DBDirectory, + receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), + logsBuffer: make([]parquet.LogRecord, 0, 10000), + tempWriteCache: make(map[common.Hash][]tempReceipt), + wal: wal, + } +} + +func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInput { + receiptBytes := []byte{byte(blockNumber), txHash[31]} + return parquet.ReceiptInput{ + BlockNumber: blockNumber, + Receipt: parquet.ReceiptRecord{ + TxHash: txHash[:], + BlockNumber: blockNumber, + ReceiptBytes: receiptBytes, + }, + Logs: []parquet.LogRecord{{ + BlockNumber: blockNumber, + TxHash: txHash[:], + Address: common.BigToAddress(new(big.Int).SetUint64(blockNumber)).Bytes(), + }}, + ReceiptBytes: receiptBytes, + } +} + +type recordingWAL struct { + entries []parquet.WALEntry +} + +func (w *recordingWAL) Write(entry parquet.WALEntry) error { + w.entries = append(w.entries, entry) + return nil +} + +func (w *recordingWAL) TruncateBefore(uint64) error { return nil } + +func (w *recordingWAL) TruncateAfter(uint64) error { return nil } + +func (w *recordingWAL) ReadAt(uint64) (parquet.WALEntry, error) { return parquet.WALEntry{}, nil } + +func (w *recordingWAL) FirstOffset() (uint64, error) { return 0, nil } + +func (w *recordingWAL) LastOffset() (uint64, error) { return 0, nil } + +func (w *recordingWAL) Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error { + return nil +} + +func (w *recordingWAL) Close() error { return nil } + +var _ interface { + Write(parquet.WALEntry) error + TruncateBefore(uint64) error + TruncateAfter(uint64) error + ReadAt(uint64) (parquet.WALEntry, error) + FirstOffset() (uint64, error) + LastOffset() (uint64, error) + Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error + Close() error +} = (*recordingWAL)(nil) From c0fb4e9596c9dcf1571041af9c095ca2959fcee1 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:54:26 -0400 Subject: [PATCH 05/27] Test parquet v2 rotation boundaries --- .../receipt/parquet_v2/store_rotation_test.go | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go new file mode 100644 index 0000000000..d81763cbbf --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -0,0 +1,38 @@ +package parquet_v2 + +import ( + "testing" + + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestRotationBoundaryPrimitives(t *testing.T) { + coord := &coordinator{ + config: parquet.StoreConfig{MaxBlocksPerFile: 500}, + } + + resp := make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 0, resp: resp}) + require.True(t, <-resp) + + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) + require.True(t, <-resp) + + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 501, resp: resp}) + require.False(t, <-resp) + + coord.config.MaxBlocksPerFile = 0 + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) + require.False(t, <-resp) +} + +func TestAlignedFileStartBlock(t *testing.T) { + require.Equal(t, uint64(5000), alignedFileStartBlock(5234, 500)) + require.Equal(t, uint64(5000), alignedFileStartBlock(5000, 500)) + require.Equal(t, uint64(0), alignedFileStartBlock(499, 500)) + require.Equal(t, uint64(5234), alignedFileStartBlock(5234, 0)) +} From b55a9ea3759859826e74b8be2a46ed1c795fa82e Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:55:19 -0400 Subject: [PATCH 06/27] Test parquet v2 lazy writer init --- .../receipt/parquet_v2/store_rotation_test.go | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index d81763cbbf..34bd840a40 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -1,8 +1,11 @@ package parquet_v2 import ( + "os" + "path/filepath" "testing" + "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" "github.com/stretchr/testify/require" ) @@ -36,3 +39,67 @@ func TestAlignedFileStartBlock(t *testing.T) { require.Equal(t, uint64(0), alignedFileStartBlock(499, 500)) require.Equal(t, uint64(5234), alignedFileStartBlock(5234, 0)) } + +func TestLazyInitUsesAlignedStartForFirstOffBoundaryWrite(t *testing.T) { + dir := t.TempDir() + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 500, + }) + require.NoError(t, err) + + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(5234, common.HexToHash("0x5234")), + })) + require.NoError(t, store.Close()) + + require.FileExists(t, filepath.Join(dir, "receipts_5000.parquet")) + require.FileExists(t, filepath.Join(dir, "logs_5000.parquet")) +} + +func TestReopenLazyInitPreservesExistingAlignedFile(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 10, []uint64{10}) + writeLogFile(t, dir, 10) + + alignedFile := filepath.Join(dir, "receipts_10.parquet") + infoBefore, err := os.Stat(alignedFile) + require.NoError(t, err) + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 10, + }) + require.NoError(t, err) + require.Equal(t, uint64(11), store.FileStartBlock()) + + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(11, common.HexToHash("0x11")), + })) + require.NoError(t, store.Close()) + + infoAfter, err := os.Stat(alignedFile) + require.NoError(t, err) + require.Equal(t, infoBefore.Size(), infoAfter.Size(), "existing aligned file must not be truncated") + require.FileExists(t, filepath.Join(dir, "receipts_11.parquet")) +} + +func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { + dir := t.TempDir() + writeReceiptFile(t, dir, 10, []uint64{10}) + writeLogFile(t, dir, 10) + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 10, + }) + require.NoError(t, err) + + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(25, common.HexToHash("0x25")), + })) + require.NoError(t, store.Close()) + + require.FileExists(t, filepath.Join(dir, "receipts_20.parquet")) + require.FileExists(t, filepath.Join(dir, "logs_20.parquet")) +} From c1a7d0ea932675e0c1346fd1d3df86b48259ab5f Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:56:59 -0400 Subject: [PATCH 07/27] Add parquet v2 boundary rotation --- .../ledger_db/receipt/parquet_v2/handlers.go | 83 +++++++++++++++++-- .../receipt/parquet_v2/store_rotation_test.go | 48 +++++++++++ .../receipt/parquet_v2/store_write_test.go | 18 +++- sei-db/ledger_db/receipt/parquet_v2/wal.go | 29 +++++++ 4 files changed, 169 insertions(+), 9 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index ab6cd6f985..d52ac9ce15 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -68,11 +68,7 @@ func (c *coordinator) handleFileStartBlock(req fileStartBlockReq) { } func (c *coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { - if c.config.MaxBlocksPerFile == 0 { - req.resp <- false - return - } - req.resp <- req.blockNumber%c.config.MaxBlocksPerFile == 0 + req.resp <- c.isRotationBoundary(req.blockNumber) } func (c *coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { @@ -197,6 +193,12 @@ func (c *coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { } } + if c.receiptWriter != nil && b.blockNumber != c.lastSeenBlock && c.isRotationBoundary(b.blockNumber) { + if err := c.rotateOpenFile(b.blockNumber); err != nil { + return err + } + } + for i := range b.inputs { if err := c.applyReceipt(b.inputs[i]); err != nil { return err @@ -259,6 +261,10 @@ func (c *coordinator) applyReceipt(input parquet.ReceiptInput) error { return nil } +func (c *coordinator) isRotationBoundary(blockNumber uint64) bool { + return c.config.MaxBlocksPerFile > 0 && blockNumber%c.config.MaxBlocksPerFile == 0 +} + func alignedFileStartBlock(blockNumber, maxBlocksPerFile uint64) uint64 { if maxBlocksPerFile == 0 { return blockNumber @@ -303,6 +309,73 @@ func (c *coordinator) initWriters() error { return nil } +func (c *coordinator) rotateOpenFile(newBlockNumber uint64) error { + if err := c.rotateOpenFileWithoutWAL(newBlockNumber); err != nil { + return err + } + if err := c.clearWALPreservingLast(); err != nil { + return err + } + if h := c.faultHooks; h != nil && h.AfterWALClear != nil { + if err := h.AfterWALClear(newBlockNumber); err != nil { + return err + } + } + c.dropTempCacheBefore(c.fileStartBlock) + return nil +} + +func (c *coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { + if c.receiptWriter == nil { + return nil + } + if err := c.flushOpenFile(); err != nil { + return err + } + + oldStartBlock := c.fileStartBlock + oldReceiptPath := filepath.Join(c.basePath, fmt.Sprintf("receipts_%d.parquet", oldStartBlock)) + oldLogPath := filepath.Join(c.basePath, fmt.Sprintf("logs_%d.parquet", oldStartBlock)) + + if err := c.closeWriters(); err != nil { + return err + } + + if h := c.faultHooks; h != nil && h.AfterCloseWriters != nil { + if err := h.AfterCloseWriters(newBlockNumber); err != nil { + return err + } + } + + c.closedFiles = append(c.closedFiles, closedFile{ + startBlock: oldStartBlock, + receiptPath: oldReceiptPath, + logPath: oldLogPath, + }) + c.fileStartBlock = newBlockNumber + if err := c.initWriters(); err != nil { + return err + } + c.blocksSinceFlush = 0 + return nil +} + +func (c *coordinator) dropTempCacheBefore(blockNumber uint64) { + for txHash, entries := range c.tempWriteCache { + kept := entries[:0] + for _, entry := range entries { + if entry.blockNumber >= blockNumber { + kept = append(kept, entry) + } + } + if len(kept) == 0 { + delete(c.tempWriteCache, txHash) + continue + } + c.tempWriteCache[txHash] = kept + } +} + func (c *coordinator) flushOpenFile() error { if len(c.receiptsBuffer) == 0 { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index 34bd840a40..c66b20dd64 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -1,6 +1,7 @@ package parquet_v2 import ( + "math/big" "os" "path/filepath" "testing" @@ -103,3 +104,50 @@ func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { require.FileExists(t, filepath.Join(dir, "receipts_20.parquet")) require.FileExists(t, filepath.Join(dir, "logs_20.parquet")) } + +func TestWriteRotatesAtAlignedBoundary(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + for block := uint64(1); block <= 4; block++ { + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), + })) + } + + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "logs_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "logs_4.parquet")) + + require.Len(t, wal.truncatedBefore, 1) + require.Equal(t, uint64(4), wal.truncatedBefore[0]) + require.Len(t, coord.tempWriteCache, 1) + require.Contains(t, coord.tempWriteCache, common.BigToHash(big.NewInt(4))) +} + +func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { + txHash := common.HexToHash("0xabc") + coord := &coordinator{ + tempWriteCache: map[common.Hash][]tempReceipt{ + txHash: { + {blockNumber: 1, writeOrdinal: 0}, + {blockNumber: 4, writeOrdinal: 1}, + }, + common.HexToHash("0xdef"): { + {blockNumber: 2, writeOrdinal: 2}, + }, + }, + } + + coord.dropTempCacheBefore(4) + + require.Len(t, coord.tempWriteCache, 1) + require.Len(t, coord.tempWriteCache[txHash], 1) + require.Equal(t, uint64(4), coord.tempWriteCache[txHash][0].blockNumber) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index 32aea5872e..0f34238182 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -131,23 +131,33 @@ func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInp } type recordingWAL struct { - entries []parquet.WALEntry + entries []parquet.WALEntry + firstOffset uint64 + lastOffset uint64 + truncatedBefore []uint64 } func (w *recordingWAL) Write(entry parquet.WALEntry) error { + if w.firstOffset == 0 { + w.firstOffset = 1 + } + w.lastOffset++ w.entries = append(w.entries, entry) return nil } -func (w *recordingWAL) TruncateBefore(uint64) error { return nil } +func (w *recordingWAL) TruncateBefore(offset uint64) error { + w.truncatedBefore = append(w.truncatedBefore, offset) + return nil +} func (w *recordingWAL) TruncateAfter(uint64) error { return nil } func (w *recordingWAL) ReadAt(uint64) (parquet.WALEntry, error) { return parquet.WALEntry{}, nil } -func (w *recordingWAL) FirstOffset() (uint64, error) { return 0, nil } +func (w *recordingWAL) FirstOffset() (uint64, error) { return w.firstOffset, nil } -func (w *recordingWAL) LastOffset() (uint64, error) { return 0, nil } +func (w *recordingWAL) LastOffset() (uint64, error) { return w.lastOffset, nil } func (w *recordingWAL) Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/wal.go b/sei-db/ledger_db/receipt/parquet_v2/wal.go index 2097c0bf96..0d84e29484 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/wal.go @@ -1,11 +1,40 @@ package parquet_v2 +import ( + "fmt" + "strings" +) + func (c *coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { _ = c _ = converter return ReplayResult{}, ErrNotImplemented } +func (c *coordinator) clearWALPreservingLast() error { + if c.wal == nil { + return nil + } + firstOffset, errFirst := c.wal.FirstOffset() + if errFirst != nil || firstOffset <= 0 { + return nil + } + lastOffset, errLast := c.wal.LastOffset() + if errLast != nil || lastOffset <= 0 { + return nil + } + if lastOffset <= firstOffset { + return nil + } + if err := c.wal.TruncateBefore(lastOffset); err != nil { + if strings.Contains(err.Error(), "out of range") { + return nil + } + return fmt.Errorf("failed to truncate parquet WAL before offset %d: %w", lastOffset, err) + } + return nil +} + func truncateReplayWAL(w interface{ TruncateBefore(offset uint64) error }, dropOffset uint64) error { _ = w _ = dropOffset From 3573c98c97c7200e160bf6f121d5667d43384547 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 21:58:12 -0400 Subject: [PATCH 08/27] Add parquet v2 empty block rotation --- .../ledger_db/receipt/parquet_v2/handlers.go | 18 ++++++++-- .../receipt/parquet_v2/store_dispatch_test.go | 6 ---- .../receipt/parquet_v2/store_rotation_test.go | 33 +++++++++++++++++++ 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index d52ac9ce15..798df084fc 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -30,8 +30,7 @@ func (c *coordinator) handleGetLogs(req getLogsReq) { } func (c *coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { - _ = c - req.resp <- ErrNotImplemented + req.resp <- c.observeEmptyBlock(req.height) } func (c *coordinator) handleFlush(req flushReq) { @@ -376,6 +375,21 @@ func (c *coordinator) dropTempCacheBefore(blockNumber uint64) { } } +func (c *coordinator) observeEmptyBlock(height uint64) error { + if height <= c.lastSeenBlock { + return nil + } + if c.receiptWriter == nil || !c.isRotationBoundary(height) { + c.lastSeenBlock = height + return nil + } + if err := c.rotateOpenFile(height); err != nil { + return err + } + c.lastSeenBlock = height + return nil +} + func (c *coordinator) flushOpenFile() error { if len(c.receiptsBuffer) == 0 { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index 79d7dbed90..626a5aad67 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -51,12 +51,6 @@ func TestUnimplementedOperationsDispatchThroughCoordinator(t *testing.T) { return err }, }, - { - name: "observe empty block", - run: func(store *Store) error { - return store.ObserveEmptyBlock(1) - }, - }, { name: "replay WAL", run: func(store *Store) error { diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index c66b20dd64..41e4c2d4c6 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -151,3 +151,36 @@ func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { require.Len(t, coord.tempWriteCache[txHash], 1) require.Equal(t, uint64(4), coord.tempWriteCache[txHash][0].blockNumber) } + +func TestObserveEmptyBlockHonorsMonotonicLastSeen(t *testing.T) { + coord := newWriteCoordinator(t, &recordingWAL{}) + + require.NoError(t, coord.observeEmptyBlock(5)) + require.Equal(t, uint64(5), coord.lastSeenBlock) + + require.NoError(t, coord.observeEmptyBlock(4)) + require.Equal(t, uint64(5), coord.lastSeenBlock) + require.Empty(t, coord.closedFiles) +} + +func TestObserveEmptyBlockRotatesAtBoundary(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + })) + require.NotNil(t, coord.receiptWriter) + + require.NoError(t, coord.observeEmptyBlock(4)) + + require.Equal(t, uint64(4), coord.lastSeenBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.Empty(t, coord.tempWriteCache) +} From b83c4b54fd034341bcf4a73c9b469c159cc12724 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 22:04:27 -0400 Subject: [PATCH 09/27] Add parquet v2 read path --- .../ledger_db/receipt/parquet_v2/handlers.go | 90 ++++++++- sei-db/ledger_db/receipt/parquet_v2/reader.go | 177 ++++++++++++++++-- .../receipt/parquet_v2/store_dispatch_test.go | 25 --- .../receipt/parquet_v2/store_read_test.go | 176 +++++++++++++++++ 4 files changed, 421 insertions(+), 47 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_read_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index 798df084fc..165107662c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -15,18 +15,41 @@ func (c *coordinator) handleWrite(req writeReq) { } func (c *coordinator) handleReadByTxHash(req readByTxHashReq) { - _ = c - req.resp <- readReceiptResp{err: ErrNotImplemented} + if result := c.cachedReceiptByTxHash(req.txHash); result != nil { + req.resp <- readReceiptResp{result: result} + return + } + if c.reader == nil { + req.resp <- readReceiptResp{err: fmt.Errorf("parquet reader is not initialized")} + return + } + + result, err := c.reader.QueryReceiptByTxHash(req.ctx, c.receiptFilesSnapshot(), req.txHash) + req.resp <- readReceiptResp{result: result, err: err} } func (c *coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { - _ = c - req.resp <- readReceiptResp{err: ErrNotImplemented} + if result := c.cachedReceiptByTxHashInBlock(req.txHash, req.blockNumber); result != nil { + req.resp <- readReceiptResp{result: result} + return + } + if c.reader == nil { + req.resp <- readReceiptResp{err: fmt.Errorf("parquet reader is not initialized")} + return + } + + result, err := c.reader.QueryReceiptByTxHashInBlock(req.ctx, c.receiptFileSnapshotForBlock(req.blockNumber), req.txHash, req.blockNumber) + req.resp <- readReceiptResp{result: result, err: err} } func (c *coordinator) handleGetLogs(req getLogsReq) { - _ = c - req.resp <- getLogsResp{err: ErrNotImplemented} + if c.reader == nil { + req.resp <- getLogsResp{err: fmt.Errorf("parquet reader is not initialized")} + return + } + + results, err := c.reader.QueryLogs(req.ctx, c.logFilesSnapshot(), req.filter) + req.resp <- getLogsResp{results: results, err: err} } func (c *coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { @@ -260,6 +283,61 @@ func (c *coordinator) applyReceipt(input parquet.ReceiptInput) error { return nil } +func (c *coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.ReceiptResult { + entries := c.tempWriteCache[txHash] + if len(entries) == 0 { + return nil + } + return receiptResultFromTemp(txHash, entries[0]) +} + +func (c *coordinator) cachedReceiptByTxHashInBlock(txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { + for _, entry := range c.tempWriteCache[txHash] { + if entry.blockNumber == blockNumber { + return receiptResultFromTemp(txHash, entry) + } + } + return nil +} + +func receiptResultFromTemp(txHash common.Hash, entry tempReceipt) *parquet.ReceiptResult { + return &parquet.ReceiptResult{ + TxHash: append([]byte(nil), txHash[:]...), + BlockNumber: entry.blockNumber, + ReceiptBytes: append([]byte(nil), entry.receiptBytes...), + } +} + +func (c *coordinator) receiptFilesSnapshot() []string { + files := make([]string, 0, len(c.closedFiles)) + for _, f := range c.closedFiles { + files = append(files, f.receiptPath) + } + return files +} + +func (c *coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { + var best string + for _, f := range c.closedFiles { + if f.startBlock > blockNumber { + break + } + best = f.receiptPath + } + if best == "" { + return nil + } + return []string{best} +} + +func (c *coordinator) logFilesSnapshot() []string { + files := make([]string, 0, len(c.closedFiles)) + for _, f := range c.closedFiles { + files = append(files, f.logPath) + } + return files +} + func (c *coordinator) isRotationBoundary(blockNumber uint64) bool { return c.config.MaxBlocksPerFile > 0 && blockNumber%c.config.MaxBlocksPerFile == 0 } diff --git a/sei-db/ledger_db/receipt/parquet_v2/reader.go b/sei-db/ledger_db/receipt/parquet_v2/reader.go index 378d1cf110..fc181b10c1 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/reader.go @@ -3,6 +3,7 @@ package parquet_v2 import ( "context" "database/sql" + "errors" "fmt" "runtime" "strings" @@ -78,28 +79,165 @@ func (r *Reader) Close() error { } func (r *Reader) QueryReceiptByTxHash(ctx context.Context, files []string, txHash common.Hash) (*parquet.ReceiptResult, error) { - _ = r - _ = ctx - _ = files - _ = txHash - return nil, ErrNotImplemented + if len(files) == 0 { + return nil, nil + } + parquetFiles := parquetFilesSQL(files) + + // #nosec G201 -- parquetFiles derived from coordinator-owned local file paths. + query := fmt.Sprintf(` + SELECT + tx_hash, block_number, receipt_bytes + FROM read_parquet(%s, union_by_name=true) + WHERE tx_hash = $1 + ORDER BY block_number + LIMIT 1 + `, parquetFiles) + + row := r.db.QueryRowContext(ctx, query, txHash[:]) + var rec parquet.ReceiptResult + if err := row.Scan(&rec.TxHash, &rec.BlockNumber, &rec.ReceiptBytes); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } + return nil, fmt.Errorf("failed to query receipt: %w", err) + } + return &rec, nil } func (r *Reader) QueryReceiptByTxHashInBlock(ctx context.Context, files []string, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { - _ = r - _ = ctx - _ = files - _ = txHash - _ = blockNumber - return nil, ErrNotImplemented + if len(files) == 0 { + return nil, nil + } + parquetFiles := parquetFilesSQL(files) + + // #nosec G201 -- parquetFiles derived from coordinator-owned local file paths. + query := fmt.Sprintf(` + SELECT + tx_hash, block_number, receipt_bytes + FROM read_parquet(%s, union_by_name=true) + WHERE tx_hash = $1 AND block_number = $2 + LIMIT 1 + `, parquetFiles) + + row := r.db.QueryRowContext(ctx, query, txHash[:], blockNumber) + var rec parquet.ReceiptResult + if err := row.Scan(&rec.TxHash, &rec.BlockNumber, &rec.ReceiptBytes); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } + return nil, fmt.Errorf("failed to query receipt: %w", err) + } + return &rec, nil } func (r *Reader) QueryLogs(ctx context.Context, files []string, filter parquet.LogFilter) ([]parquet.LogResult, error) { - _ = r - _ = ctx - _ = files - _ = filter - return nil, ErrNotImplemented + files = r.filterLogFiles(files, filter) + if len(files) == 0 { + return nil, nil + } + return r.queryLogFiles(ctx, files, filter) +} + +func (r *Reader) filterLogFiles(files []string, filter parquet.LogFilter) []string { + filtered := make([]string, 0, len(files)) + for _, f := range files { + startBlock := parquet.ExtractBlockNumber(f) + if filter.ToBlock != nil && startBlock > *filter.ToBlock { + continue + } + if filter.FromBlock != nil && startBlock+r.maxBlocksPerFile <= *filter.FromBlock { + continue + } + filtered = append(filtered, f) + } + return filtered +} + +func (r *Reader) queryLogFiles(ctx context.Context, files []string, filter parquet.LogFilter) ([]parquet.LogResult, error) { + // #nosec G201 -- parquetFiles derived from coordinator-owned local file paths. + query := fmt.Sprintf(` + SELECT + block_number, tx_hash, tx_index, log_index, address, + topic0, topic1, topic2, topic3, data, block_hash, removed + FROM read_parquet(%s, union_by_name=true) + WHERE 1=1 + `, parquetFilesSQL(files)) + + var args []any + argIdx := 1 + + if filter.FromBlock != nil { + query += fmt.Sprintf(" AND block_number >= $%d", argIdx) + args = append(args, *filter.FromBlock) + argIdx++ + } + + if filter.ToBlock != nil { + query += fmt.Sprintf(" AND block_number <= $%d", argIdx) + args = append(args, *filter.ToBlock) + argIdx++ + } + + if len(filter.Addresses) > 0 { + placeholders := make([]string, len(filter.Addresses)) + for i, addr := range filter.Addresses { + placeholders[i] = fmt.Sprintf("$%d", argIdx) + args = append(args, addr[:]) + argIdx++ + } + query += fmt.Sprintf(" AND address IN (%s)", strings.Join(placeholders, ", ")) + } + + topicCols := []string{"topic0", "topic1", "topic2", "topic3"} + for i, topicList := range filter.Topics { + if i >= 4 { + break + } + if len(topicList) == 0 { + continue + } + if len(topicList) == 1 { + query += fmt.Sprintf(" AND %s = $%d", topicCols[i], argIdx) + args = append(args, topicList[0][:]) + argIdx++ + continue + } + + placeholders := make([]string, len(topicList)) + for j, topic := range topicList { + placeholders[j] = fmt.Sprintf("$%d", argIdx) + args = append(args, topic[:]) + argIdx++ + } + query += fmt.Sprintf(" AND %s IN (%s)", topicCols[i], strings.Join(placeholders, ", ")) + } + + query += " ORDER BY block_number, tx_index, log_index" + if filter.Limit > 0 { + query += fmt.Sprintf(" LIMIT %d", filter.Limit) + } + + rows, err := r.db.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("failed to query logs: %w", err) + } + defer func() { _ = rows.Close() }() + + var results []parquet.LogResult + for rows.Next() { + var log parquet.LogResult + if err := rows.Scan( + &log.BlockNumber, &log.TxHash, &log.TxIndex, &log.LogIndex, + &log.Address, &log.Topic0, &log.Topic1, &log.Topic2, &log.Topic3, + &log.Data, &log.BlockHash, &log.Removed, + ); err != nil { + return nil, fmt.Errorf("failed to scan log: %w", err) + } + results = append(results, log) + } + + return results, rows.Err() } func (r *Reader) MaxReceiptBlockNumber(ctx context.Context, files []string) (uint64, bool, error) { @@ -163,3 +301,10 @@ func joinQuoted(files []string) string { func quoteSQLString(s string) string { return "'" + strings.ReplaceAll(s, "'", "''") + "'" } + +func parquetFilesSQL(files []string) string { + if len(files) == 1 { + return quoteSQLString(files[0]) + } + return fmt.Sprintf("[%s]", joinQuoted(files)) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index 626a5aad67..87c90e4474 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -1,7 +1,6 @@ package parquet_v2 import ( - "context" "testing" "time" @@ -23,34 +22,10 @@ func newDispatchStore(t *testing.T) *Store { } func TestUnimplementedOperationsDispatchThroughCoordinator(t *testing.T) { - ctx := context.Background() - txHash := common.HexToHash("0x1") - tests := []struct { name string run func(*Store) error }{ - { - name: "get receipt by tx hash", - run: func(store *Store) error { - _, err := store.GetReceiptByTxHash(ctx, txHash) - return err - }, - }, - { - name: "get receipt by tx hash in block", - run: func(store *Store) error { - _, err := store.GetReceiptByTxHashInBlock(ctx, txHash, 1) - return err - }, - }, - { - name: "get logs", - run: func(store *Store) error { - _, err := store.GetLogs(ctx, parquet.LogFilter{}) - return err - }, - }, { name: "replay WAL", run: func(store *Store) error { diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go new file mode 100644 index 0000000000..8106c8f995 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go @@ -0,0 +1,176 @@ +package parquet_v2 + +import ( + "context" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestReadByTxHashHitsTempCache(t *testing.T) { + txHash := common.HexToHash("0xabc") + coord := &coordinator{ + tempWriteCache: map[common.Hash][]tempReceipt{ + txHash: { + {blockNumber: 10, writeOrdinal: 0, receiptBytes: []byte("first")}, + {blockNumber: 10, writeOrdinal: 1, receiptBytes: []byte("second")}, + {blockNumber: 11, writeOrdinal: 2, receiptBytes: []byte("third")}, + }, + }, + } + + resp := make(chan readReceiptResp, 1) + coord.handleReadByTxHash(readByTxHashReq{ + ctx: context.Background(), + txHash: txHash, + resp: resp, + }) + result := <-resp + require.NoError(t, result.err) + require.Equal(t, uint64(10), result.result.BlockNumber) + require.Equal(t, []byte("first"), result.result.ReceiptBytes) + + resp = make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: 11, + resp: resp, + }) + result = <-resp + require.NoError(t, result.err) + require.Equal(t, uint64(11), result.result.BlockNumber) + require.Equal(t, []byte("third"), result.result.ReceiptBytes) + + resp = make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: 10, + resp: resp, + }) + result = <-resp + require.NoError(t, result.err) + require.Equal(t, []byte("first"), result.result.ReceiptBytes) +} + +func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + txHash := common.HexToHash("0xabc") + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 10, + }) + require.NoError(t, err) + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, txHash), + testReceiptInput(2, txHash), + })) + require.NoError(t, store.Close()) + + reopened, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 10, + }) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, reopened.Close()) }) + + result, err := reopened.GetReceiptByTxHash(ctx, txHash) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, uint64(1), result.BlockNumber) + + result, err = reopened.GetReceiptByTxHashInBlock(ctx, txHash, 2) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, uint64(2), result.BlockNumber) + require.Equal(t, testReceiptInput(2, txHash).ReceiptBytes, result.ReceiptBytes) +} + +func TestReadByTxHashAfterRotationUsesClosedFilesAndTempCache(t *testing.T) { + ctx := context.Background() + txHash := common.HexToHash("0xabc") + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: t.TempDir(), + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, store.Close()) }) + + require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, txHash), + testReceiptInput(2, common.HexToHash("0x2")), + testReceiptInput(3, common.HexToHash("0x3")), + testReceiptInput(4, common.HexToHash("0x4")), + testReceiptInput(5, txHash), + })) + + closedResult, err := store.GetReceiptByTxHashInBlock(ctx, txHash, 1) + require.NoError(t, err) + require.NotNil(t, closedResult) + require.Equal(t, uint64(1), closedResult.BlockNumber) + + openResult, err := store.GetReceiptByTxHashInBlock(ctx, txHash, 5) + require.NoError(t, err) + require.NotNil(t, openResult) + require.Equal(t, uint64(5), openResult.BlockNumber) + require.Equal(t, testReceiptInput(5, txHash).ReceiptBytes, openResult.ReceiptBytes) +} + +func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + + store, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + + var inputs []parquet.ReceiptInput + for block := uint64(1); block <= 8; block++ { + inputs = append(inputs, testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block)))) + } + require.NoError(t, store.WriteReceipts(inputs)) + require.NoError(t, store.Close()) + + reopened, err := NewStore(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, reopened.Close()) }) + + from, to := uint64(2), uint64(6) + results, err := reopened.GetLogs(ctx, parquet.LogFilter{ + FromBlock: &from, + ToBlock: &to, + }) + require.NoError(t, err) + require.Len(t, results, 5) + require.Equal(t, []uint64{2, 3, 4, 5, 6}, logBlockNumbers(results)) + + address := common.BigToAddress(new(big.Int).SetUint64(5)) + results, err = reopened.GetLogs(ctx, parquet.LogFilter{ + FromBlock: &from, + ToBlock: &to, + Addresses: []common.Address{address}, + }) + require.NoError(t, err) + require.Len(t, results, 1) + require.Equal(t, uint64(5), results[0].BlockNumber) +} + +func logBlockNumbers(results []parquet.LogResult) []uint64 { + blocks := make([]uint64, 0, len(results)) + for _, result := range results { + blocks = append(blocks, result.BlockNumber) + } + return blocks +} From 90e05cb219617854efaa24721236b75c105db57f Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 22:07:19 -0400 Subject: [PATCH 10/27] Add parquet v2 pruning --- .../receipt/parquet_v2/export_test.go | 5 + .../ledger_db/receipt/parquet_v2/handlers.go | 12 +- sei-db/ledger_db/receipt/parquet_v2/prune.go | 55 ++++++++- .../receipt/parquet_v2/store_prune_test.go | 110 ++++++++++++++++++ 4 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/export_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/export_test.go b/sei-db/ledger_db/receipt/parquet_v2/export_test.go new file mode 100644 index 0000000000..db3a88b823 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/export_test.go @@ -0,0 +1,5 @@ +package parquet_v2 + +func forcePruneTickForTest(c *coordinator) { + c.handlePruneTick() +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index 165107662c..f867765dbd 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -117,7 +117,17 @@ func (c *coordinator) handleReplayWAL(req replayWALReq) { } func (c *coordinator) handlePruneTick() { - _ = c + // TODO(future-async): if read I/O moves to a worker pool, gate prune on + // map[fileID]int reference counts that the coordinator increments on + // dispatch and decrements on completion. + if c.config.KeepRecent <= 0 { + return + } + pruneBeforeBlock := c.latestVersion - c.config.KeepRecent + if pruneBeforeBlock <= 0 { + return + } + c.pruneOldFiles(uint64(pruneBeforeBlock)) } func (c *coordinator) handleClose(req closeReq) { diff --git a/sei-db/ledger_db/receipt/parquet_v2/prune.go b/sei-db/ledger_db/receipt/parquet_v2/prune.go index 66440c0b2e..ff35262d25 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/prune.go +++ b/sei-db/ledger_db/receipt/parquet_v2/prune.go @@ -1,7 +1,56 @@ package parquet_v2 +import ( + "log" + "os" +) + +var removeFile = os.Remove + func (c *coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { - _ = c - _ = pruneBeforeBlock - return 0 + if len(c.closedFiles) == 0 { + return 0 + } + + prunedCount := 0 + kept := c.closedFiles[:0] + for _, f := range c.closedFiles { + if !c.shouldPruneClosedFile(f, pruneBeforeBlock) { + kept = append(kept, f) + continue + } + + receiptRemoved := removePrunedFile(f.receiptPath) + if !receiptRemoved { + kept = append(kept, f) + continue + } + logRemoved := removePrunedFile(f.logPath) + if logRemoved { + prunedCount++ + continue + } + kept = append(kept, f) + } + c.closedFiles = kept + return prunedCount +} + +func (c *coordinator) shouldPruneClosedFile(f closedFile, pruneBeforeBlock uint64) bool { + fileEndBlock := f.startBlock + c.config.MaxBlocksPerFile + if fileEndBlock < f.startBlock { + fileEndBlock = ^uint64(0) + } + return fileEndBlock <= pruneBeforeBlock +} + +func removePrunedFile(path string) bool { + if path == "" { + return true + } + if err := removeFile(path); err != nil && !os.IsNotExist(err) { + log.Printf("failed to prune parquet file %s: %v", path, err) + return false + } + return true } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go new file mode 100644 index 0000000000..650bd7be92 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go @@ -0,0 +1,110 @@ +package parquet_v2 + +import ( + "context" + "errors" + "math/big" + "os" + "path/filepath" + "strconv" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestPruneTickDeletesEligibleClosedFiles(t *testing.T) { + dir := t.TempDir() + closedFiles := writeClosedFileSet(t, dir, 0, 4, 8) + + reader, err := NewReaderWithMaxBlocksPerFile(dir, 4) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, reader.Close()) }) + + coord := &coordinator{ + config: parquet.StoreConfig{ + KeepRecent: 4, + MaxBlocksPerFile: 4, + }, + closedFiles: closedFiles, + latestVersion: 12, + reader: reader, + } + + forcePruneTickForTest(coord) + + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(8), coord.closedFiles[0].startBlock) + require.NoFileExists(t, filepath.Join(dir, "receipts_0.parquet")) + require.NoFileExists(t, filepath.Join(dir, "logs_0.parquet")) + require.NoFileExists(t, filepath.Join(dir, "receipts_4.parquet")) + require.NoFileExists(t, filepath.Join(dir, "logs_4.parquet")) + require.FileExists(t, filepath.Join(dir, "receipts_8.parquet")) + require.FileExists(t, filepath.Join(dir, "logs_8.parquet")) + + prunedResult := readClosedReceiptForTest(t, coord, common.BigToHash(new(big.Int).SetUint64(1)), 1) + require.Nil(t, prunedResult) + + keptResult := readClosedReceiptForTest(t, coord, common.BigToHash(new(big.Int).SetUint64(9)), 9) + require.NotNil(t, keptResult) + require.Equal(t, uint64(9), keptResult.BlockNumber) +} + +func TestPruneKeepsFilePairTrackedWhenDeleteFails(t *testing.T) { + dir := t.TempDir() + closedFiles := writeClosedFileSet(t, dir, 0) + failPath := filepath.Join(dir, "receipts_0.parquet") + + originalRemoveFile := removeFile + t.Cleanup(func() { removeFile = originalRemoveFile }) + removeFile = func(path string) error { + if path == failPath { + return errors.New("delete failed") + } + return os.Remove(path) + } + + coord := &coordinator{ + config: parquet.StoreConfig{MaxBlocksPerFile: 4}, + closedFiles: closedFiles, + } + + require.Zero(t, coord.pruneOldFiles(4)) + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.FileExists(t, failPath) + require.FileExists(t, filepath.Join(dir, "logs_0.parquet")) +} + +func writeClosedFileSet(t *testing.T, dir string, starts ...uint64) []closedFile { + t.Helper() + + closed := make([]closedFile, 0, len(starts)) + for _, start := range starts { + block := start + 1 + writeReceiptFile(t, dir, start, []uint64{block}) + writeLogFile(t, dir, start) + closed = append(closed, closedFile{ + startBlock: start, + receiptPath: filepath.Join(dir, "receipts_"+strconv.FormatUint(start, 10)+".parquet"), + logPath: filepath.Join(dir, "logs_"+strconv.FormatUint(start, 10)+".parquet"), + }) + } + return closed +} + +func readClosedReceiptForTest(t *testing.T, coord *coordinator, txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { + t.Helper() + + resp := make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: blockNumber, + resp: resp, + }) + result := <-resp + require.NoError(t, result.err) + return result.result +} From 4d409ef2915c2a7fef0cd74cbf1d43c1f8b2ca84 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 22:10:59 -0400 Subject: [PATCH 11/27] Add parquet v2 WAL replay --- .../ledger_db/receipt/parquet_v2/handlers.go | 4 +- .../receipt/parquet_v2/store_dispatch_test.go | 22 --- .../receipt/parquet_v2/store_wal_test.go | 128 +++++++++++++++ .../receipt/parquet_v2/store_write_test.go | 11 +- sei-db/ledger_db/receipt/parquet_v2/types.go | 4 - sei-db/ledger_db/receipt/parquet_v2/wal.go | 155 +++++++++++++++++- 6 files changed, 289 insertions(+), 35 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/handlers.go index f867765dbd..56671c72ec 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/handlers.go @@ -112,8 +112,8 @@ func (c *coordinator) handleSetFaultHooks(req setFaultHooksReq) { } func (c *coordinator) handleReplayWAL(req replayWALReq) { - _ = c - req.resp <- replayWALResp{err: ErrNotImplemented} + result, err := c.replayWAL(req.converter) + req.resp <- replayWALResp{result: result, err: err} } func (c *coordinator) handlePruneTick() { diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index 87c90e4474..1a3668ab54 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -21,28 +21,6 @@ func newDispatchStore(t *testing.T) *Store { return store } -func TestUnimplementedOperationsDispatchThroughCoordinator(t *testing.T) { - tests := []struct { - name string - run func(*Store) error - }{ - { - name: "replay WAL", - run: func(store *Store) error { - _, err := store.ReplayWAL(nil) - return err - }, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - store := newDispatchStore(t) - require.ErrorIs(t, tc.run(store), ErrNotImplemented) - }) - } -} - func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { store := newDispatchStore(t) require.Zero(t, cap(store.requests)) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go new file mode 100644 index 0000000000..02254f142d --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go @@ -0,0 +1,128 @@ +package parquet_v2 + +import ( + "context" + "math/big" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestReplayWALAppliesReceiptsAndPreservesDuplicateHashes(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{7, 1}, {7, 2}}}, + parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{8, 1}}}, + ) + coord := newReplayCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + result, err := coord.replayWAL(replayConverterForTest) + require.NoError(t, err) + + duplicateHash := common.BigToHash(new(big.Int).SetUint64(7)) + require.Len(t, result.WarmupRecords, 3) + require.Len(t, result.Blocks, 2) + require.Equal(t, uint64(1), result.Blocks[0].BlockNumber) + require.Equal(t, []common.Hash{duplicateHash, duplicateHash}, result.Blocks[0].TxHashes) + require.Len(t, coord.tempWriteCache[duplicateHash], 2) + require.Equal(t, int64(2), coord.latestVersion) + require.Empty(t, wal.truncatedBefore) +} + +func TestReplayWALSkipsEntriesBeforeFileStartAndTruncates(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{2}}}, + parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, + ) + coord := newReplayCoordinator(t, wal) + coord.fileStartBlock = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + result, err := coord.replayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { + require.NotEqual(t, uint64(2), blockNumber) + return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) + }) + require.NoError(t, err) + + require.Len(t, result.WarmupRecords, 1) + require.Equal(t, uint64(4), result.WarmupRecords[0].BlockNumber) + require.Equal(t, []uint64{2}, wal.truncatedBefore) + require.Equal(t, int64(4), coord.latestVersion) +} + +func TestReplayWALRotatesBoundaryWithoutClearingWAL(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{1}}}, + parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, + ) + coord := newReplayCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + _, err := coord.replayWAL(replayConverterForTest) + require.NoError(t, err) + + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.Equal(t, []uint64{2}, wal.truncatedBefore) + require.Len(t, coord.tempWriteCache, 1) + require.Contains(t, coord.tempWriteCache, common.BigToHash(new(big.Int).SetUint64(4))) +} + +func TestReplayWALRequiresConverter(t *testing.T) { + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, store.Close()) }) + + _, err = store.ReplayWAL(nil) + require.ErrorContains(t, err, "converter is nil") +} + +func replayWALWithEntries(t *testing.T, entries ...parquet.WALEntry) *recordingWAL { + t.Helper() + + wal := &recordingWAL{} + for _, entry := range entries { + require.NoError(t, wal.Write(entry)) + } + return wal +} + +func newReplayCoordinator(t *testing.T, wal *recordingWAL) *coordinator { + t.Helper() + + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + return coord +} + +func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { + txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) + input := testReceiptInput(blockNumber, txHash) + input.ReceiptBytes = append([]byte(nil), receiptBytes...) + input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) + + return ReplayReceipt{ + Input: input, + TxHash: txHash, + Warmup: input.Receipt, + LogCount: uint(len(input.Logs)), + }, nil +} + +func TestReplayWALPublicDispatch(t *testing.T) { + store := newDispatchStore(t) + _, err := store.ReplayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { + return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) + }) + require.NoError(t, err) + + result, err := store.GetReceiptByTxHash(context.Background(), common.HexToHash("0x1")) + require.NoError(t, err) + require.Nil(t, result) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index 0f34238182..df4a4945f6 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -159,7 +159,16 @@ func (w *recordingWAL) FirstOffset() (uint64, error) { return w.firstOffset, nil func (w *recordingWAL) LastOffset() (uint64, error) { return w.lastOffset, nil } -func (w *recordingWAL) Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error { +func (w *recordingWAL) Replay(firstOffset, lastOffset uint64, fn func(uint64, parquet.WALEntry) error) error { + for i, entry := range w.entries { + offset := uint64(i) + 1 + if offset < firstOffset || offset > lastOffset { + continue + } + if err := fn(offset, entry); err != nil { + return err + } + } return nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index 48ecb6691c..667f579cc0 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -9,10 +9,6 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) -// ErrNotImplemented marks methods that are intentionally non-functional until -// their coordinator handlers are implemented. -var ErrNotImplemented = errors.New("not implemented") - // ErrStoreClosed is returned when a request is made after the coordinator has // stopped accepting work. var ErrStoreClosed = errors.New("store closed") diff --git a/sei-db/ledger_db/receipt/parquet_v2/wal.go b/sei-db/ledger_db/receipt/parquet_v2/wal.go index 0d84e29484..16fb632b64 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/wal.go @@ -3,12 +3,148 @@ package parquet_v2 import ( "fmt" "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) func (c *coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { - _ = c - _ = converter - return ReplayResult{}, ErrNotImplemented + if converter == nil { + return ReplayResult{}, fmt.Errorf("WAL receipt converter is nil") + } + if c.wal == nil { + return ReplayResult{}, nil + } + + firstOffset, errFirst := c.wal.FirstOffset() + if errFirst != nil || firstOffset <= 0 { + return ReplayResult{}, nil + } + lastOffset, errLast := c.wal.LastOffset() + if errLast != nil || lastOffset <= 0 { + return ReplayResult{}, nil + } + + var ( + currentBlock uint64 + haveBlock bool + logStartIndex uint + maxBlock uint64 + dropOffset uint64 + ) + + result := ReplayResult{} + replayIdx := make(map[uint64]int) + + err := c.wal.Replay(firstOffset, lastOffset, func(offset uint64, entry parquet.WALEntry) error { + if len(entry.Receipts) == 0 { + return nil + } + + blockNumber := entry.BlockNumber + if blockNumber < c.fileStartBlock { + dropOffset = offset + return nil + } + + if haveBlock && blockNumber != currentBlock && c.isRotationBoundary(blockNumber) && blockNumber > c.fileStartBlock && offset > 0 { + dropOffset = offset - 1 + } + + if !haveBlock || blockNumber != currentBlock { + currentBlock = blockNumber + haveBlock = true + logStartIndex = 0 + } + + for _, receiptBytes := range entry.Receipts { + if len(receiptBytes) == 0 { + continue + } + + replayed, err := converter(blockNumber, receiptBytes, logStartIndex) + if err != nil { + return err + } + logStartIndex += replayed.LogCount + + result.WarmupRecords = append(result.WarmupRecords, copyReceiptRecord(replayed.Warmup)) + if idx, ok := replayIdx[blockNumber]; ok { + result.Blocks[idx].TxHashes = append(result.Blocks[idx].TxHashes, replayed.TxHash) + } else { + replayIdx[blockNumber] = len(result.Blocks) + result.Blocks = append(result.Blocks, ReplayedBlock{ + BlockNumber: blockNumber, + TxHashes: []common.Hash{replayed.TxHash}, + }) + } + + input := normalizeReplayInput(blockNumber, receiptBytes, replayed) + if err := c.applyReceiptFromReplay(input); err != nil { + return err + } + + if blockNumber > maxBlock { + maxBlock = blockNumber + } + } + + return nil + }) + if err != nil { + return ReplayResult{}, err + } + + if maxBlock > 0 { + latest, err := int64FromUint64(maxBlock) + if err != nil { + return ReplayResult{}, err + } + if latest > c.latestVersion { + c.latestVersion = latest + } + } + if err := truncateReplayWAL(c.wal, dropOffset); err != nil { + return ReplayResult{}, err + } + + c.replayedWarmup = append(c.replayedWarmup[:0], result.WarmupRecords...) + c.replayedBlocks = append(c.replayedBlocks[:0], result.Blocks...) + return result, nil +} + +func (c *coordinator) applyReceiptFromReplay(input parquet.ReceiptInput) error { + if c.receiptWriter != nil && input.BlockNumber != c.lastSeenBlock && c.isRotationBoundary(input.BlockNumber) { + if err := c.rotateOpenFileWithoutWAL(input.BlockNumber); err != nil { + return err + } + c.dropTempCacheBefore(c.fileStartBlock) + } + return c.applyReceipt(input) +} + +func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed ReplayReceipt) parquet.ReceiptInput { + input := replayed.Input + input.BlockNumber = blockNumber + input.Receipt.BlockNumber = blockNumber + if len(input.Receipt.TxHash) == 0 { + input.Receipt.TxHash = append([]byte(nil), replayed.TxHash[:]...) + } + if len(input.Receipt.ReceiptBytes) == 0 { + input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) + } + if len(input.ReceiptBytes) == 0 { + input.ReceiptBytes = append([]byte(nil), receiptBytes...) + } + return input +} + +func copyReceiptRecord(record parquet.ReceiptRecord) parquet.ReceiptRecord { + return parquet.ReceiptRecord{ + TxHash: append([]byte(nil), record.TxHash...), + BlockNumber: record.BlockNumber, + ReceiptBytes: append([]byte(nil), record.ReceiptBytes...), + } } func (c *coordinator) clearWALPreservingLast() error { @@ -36,7 +172,14 @@ func (c *coordinator) clearWALPreservingLast() error { } func truncateReplayWAL(w interface{ TruncateBefore(offset uint64) error }, dropOffset uint64) error { - _ = w - _ = dropOffset - return ErrNotImplemented + if dropOffset == 0 { + return nil + } + if err := w.TruncateBefore(dropOffset + 1); err != nil { + if strings.Contains(err.Error(), "out of range") { + return nil + } + return fmt.Errorf("failed to truncate replay WAL before offset %d: %w", dropOffset+1, err) + } + return nil } From 064ae484b7e0633674d83feafc909789043956a1 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 22:15:34 -0400 Subject: [PATCH 12/27] Wire parquet v2 receipt backend --- sei-db/config/receipt_config.go | 10 +- sei-db/config/receipt_config_test.go | 18 + sei-db/config/toml.go | 6 +- sei-db/config/toml_test.go | 1 + .../receipt/parquet_v2_receipt_store.go | 392 ++++++++++++++++++ sei-db/ledger_db/receipt/receipt_store.go | 13 +- .../ledger_db/receipt/receipt_store_test.go | 45 ++ 7 files changed, 474 insertions(+), 11 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2_receipt_store.go diff --git a/sei-db/config/receipt_config.go b/sei-db/config/receipt_config.go index dbbe347a31..15bcdcfe78 100644 --- a/sei-db/config/receipt_config.go +++ b/sei-db/config/receipt_config.go @@ -42,7 +42,7 @@ type ReceiptStoreConfig struct { DBDirectory string `mapstructure:"db-directory"` // Backend defines the backend database used for receipt-store. - // Supported backends: pebbledb (aka pebble), parquet + // Supported backends: pebbledb (aka pebble), parquet, parquet_v2 // defaults to pebbledb Backend string `mapstructure:"rs-backend"` @@ -63,12 +63,12 @@ type ReceiptStoreConfig struct { PruneIntervalSeconds int `mapstructure:"prune-interval-seconds"` // TxIndexBackend selects the tx-hash index implementation used by the - // parquet receipt store. Set to "pebbledb" (the default) to maintain a + // parquet receipt stores. Set to "pebbledb" (the default) to maintain a // Pebble-backed tx_hash -> block_number index alongside parquet files so // receipt-by-hash lookups can target a single file instead of scanning all // files. Set to "" to disable the index; receipt-by-hash lookups that miss // the in-memory cache then fail (no full-parquet scan). Ignored when the - // receipt backend is not parquet. + // receipt backend is not parquet or parquet_v2. TxIndexBackend string `mapstructure:"tx-index-backend"` } @@ -105,10 +105,10 @@ func ReadReceiptConfig(opts AppOptions) (ReceiptStoreConfig, error) { } backend = strings.ToLower(strings.TrimSpace(backend)) switch backend { - case "pebbledb", "pebble", "parquet": + case "pebbledb", "pebble", "parquet", "parquet_v2": cfg.Backend = backend default: - return cfg, fmt.Errorf("unsupported receipt-store backend %q; supported: pebbledb, parquet", backend) + return cfg, fmt.Errorf("unsupported receipt-store backend %q; supported: pebbledb, parquet, parquet_v2", backend) } } if v := opts.Get(flagRSAsyncWriteBuffer); v != nil { diff --git a/sei-db/config/receipt_config_test.go b/sei-db/config/receipt_config_test.go index 3f9bea786f..eca53ec1e3 100644 --- a/sei-db/config/receipt_config_test.go +++ b/sei-db/config/receipt_config_test.go @@ -22,6 +22,24 @@ func TestReadReceiptConfigRejectsMisnamedBackendKey(t *testing.T) { require.ErrorContains(t, err, "receipt-store.rs-backend") } +func TestReadReceiptConfigAcceptsParquetV2Backend(t *testing.T) { + cfg, err := ReadReceiptConfig(mapAppOpts{ + "receipt-store.rs-backend": " parquet_v2 ", + }) + + require.NoError(t, err) + require.Equal(t, "parquet_v2", cfg.Backend) +} + +func TestReadReceiptConfigBackendErrorListsParquetV2(t *testing.T) { + _, err := ReadReceiptConfig(mapAppOpts{ + "receipt-store.rs-backend": "rocksdb", + }) + + require.Error(t, err) + require.ErrorContains(t, err, "parquet_v2") +} + func TestReadReceiptConfigTxIndexBackendOverride(t *testing.T) { cfg, err := ReadReceiptConfig(mapAppOpts{ "receipt-store.tx-index-backend": "", diff --git a/sei-db/config/toml.go b/sei-db/config/toml.go index eb387cb1d5..a55f1cfde4 100644 --- a/sei-db/config/toml.go +++ b/sei-db/config/toml.go @@ -149,7 +149,7 @@ const ReceiptStoreConfigTemplate = ` [receipt-store] # Backend defines the receipt store backend. -# Supported backends: pebble (aka pebbledb), parquet +# Supported backends: pebble (aka pebbledb), parquet, parquet_v2 # defaults to pebbledb rs-backend = "{{ .ReceiptStore.Backend }}" @@ -157,7 +157,7 @@ rs-backend = "{{ .ReceiptStore.Backend }}" db-directory = "{{ .ReceiptStore.DBDirectory }}" # AsyncWriteBuffer defines the async queue length for commits to be applied to receipt store. -# Applies only when rs-backend = "pebbledb"; parquet ignores this setting. +# Applies only when rs-backend = "pebbledb"; parquet and parquet_v2 ignore this setting. # Set <= 0 for synchronous writes. # defaults to 100 async-write-buffer = {{ .ReceiptStore.AsyncWriteBuffer }} @@ -169,7 +169,7 @@ prune-interval-seconds = {{ .ReceiptStore.PruneIntervalSeconds }} # TxIndexBackend selects the tx-hash index implementation for parquet receipts. # Set to "pebbledb" to enable the index, or "" to disable it. -# Ignored unless rs-backend = "parquet". +# Ignored unless rs-backend = "parquet" or "parquet_v2". tx-index-backend = "{{ .ReceiptStore.TxIndexBackend }}" ` diff --git a/sei-db/config/toml_test.go b/sei-db/config/toml_test.go index fd0a51f932..284ab29ad8 100644 --- a/sei-db/config/toml_test.go +++ b/sei-db/config/toml_test.go @@ -112,6 +112,7 @@ func TestReceiptStoreConfigTemplate(t *testing.T) { require.Contains(t, output, "[receipt-store]", "Missing receipt-store section") require.Contains(t, output, `rs-backend = "pebbledb"`, "Missing or incorrect rs-backend") + require.Contains(t, output, `parquet_v2`, "Missing parquet_v2 supported backend note") require.Contains(t, output, `db-directory = ""`, "Missing or incorrect db-directory") require.Contains(t, output, "async-write-buffer =", "Missing async-write-buffer") require.Contains(t, output, "prune-interval-seconds =", "Missing prune-interval-seconds") diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go new file mode 100644 index 0000000000..72c0ce6003 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -0,0 +1,392 @@ +package receipt + +import ( + "context" + "fmt" + + "github.com/ethereum/go-ethereum/common" + ethtypes "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/eth/filters" + sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" + dbconfig "github.com/sei-protocol/sei-chain/sei-db/config" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2" + "github.com/sei-protocol/sei-chain/x/evm/types" +) + +type parquetReceiptStoreV2 struct { + store *parquet_v2.Store + storeKey sdk.StoreKey + txHashIndex TxHashIndex + indexPruner *txHashIndexPruner + warmupRecords []parquet.ReceiptRecord +} + +func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.StoreKey) (ReceiptStore, error) { + txIndexBackend := dbconfig.NormalizeReceiptTxIndexBackend(cfg.TxIndexBackend) + parquetTxIndexBackend := txIndexBackend + if parquetTxIndexBackend == dbconfig.ReceiptTxIndexBackendNone { + parquetTxIndexBackend = "none" + } + + store, err := parquet_v2.NewStore(parquet.StoreConfig{ + DBDirectory: cfg.DBDirectory, + KeepRecent: int64(cfg.KeepRecent), + PruneIntervalSeconds: int64(cfg.PruneIntervalSeconds), + TxIndexBackend: parquetTxIndexBackend, + }) + if err != nil { + return nil, err + } + + wrapper := &parquetReceiptStoreV2{ + store: store, + storeKey: storeKey, + } + + switch txIndexBackend { + case dbconfig.ReceiptTxIndexBackendNone: + case dbconfig.ReceiptTxIndexBackendPebble: + idx, err := NewPebbleTxHashIndex(TxHashIndexDir(cfg.DBDirectory)) + if err != nil { + _ = store.Close() + return nil, fmt.Errorf("failed to open tx hash index: %w", err) + } + wrapper.txHashIndex = idx + wrapper.indexPruner = newTxHashIndexPruner( + idx, + int64(cfg.KeepRecent), + int64(cfg.PruneIntervalSeconds), + func() int64 { return store.LatestVersion() }, + ) + default: + _ = store.Close() + return nil, fmt.Errorf("unsupported receipt tx index backend: %s", txIndexBackend) + } + + if err := wrapper.replayWAL(); err != nil { + _ = wrapper.Close() + return nil, err + } + + if wrapper.indexPruner != nil { + wrapper.indexPruner.Start() + } + + return wrapper, nil +} + +func (s *parquetReceiptStoreV2) LatestVersion() int64 { + return s.store.LatestVersion() +} + +func (s *parquetReceiptStoreV2) SetLatestVersion(version int64) error { + s.store.SetLatestVersion(version) + return nil +} + +func (s *parquetReceiptStoreV2) SetEarliestVersion(version int64) error { + s.store.SetEarliestVersion(version) + return nil +} + +func (s *parquetReceiptStoreV2) cacheRotateInterval() uint64 { + return s.store.CacheRotateInterval() +} + +func (s *parquetReceiptStoreV2) warmupReceipts() []ReceiptRecord { + records := make([]ReceiptRecord, 0, len(s.warmupRecords)) + for _, rec := range s.warmupRecords { + receipt := &types.Receipt{} + if err := receipt.Unmarshal(rec.ReceiptBytes); err != nil { + continue + } + records = append(records, ReceiptRecord{ + TxHash: common.BytesToHash(rec.TxHash), + Receipt: receipt, + }) + } + s.warmupRecords = nil + return records +} + +func (s *parquetReceiptStoreV2) GetReceipt(ctx sdk.Context, txHash common.Hash) (*types.Receipt, error) { + result, err := s.indexedReceiptLookup(ctx.Context(), txHash) + if err != nil { + return nil, err + } + if result != nil { + receipt := &types.Receipt{} + if err := receipt.Unmarshal(result.ReceiptBytes); err != nil { + return nil, err + } + return receipt, nil + } + + if s.storeKey == nil { + return nil, ErrNotFound + } + store := ctx.KVStore(s.storeKey) + bz := store.Get(types.ReceiptKey(txHash)) + if bz == nil { + return nil, ErrNotFound + } + var r types.Receipt + if err := r.Unmarshal(bz); err != nil { + return nil, err + } + return &r, nil +} + +func (s *parquetReceiptStoreV2) GetReceiptFromStore(ctx sdk.Context, txHash common.Hash) (*types.Receipt, error) { + result, err := s.indexedReceiptLookup(ctx.Context(), txHash) + if err != nil { + return nil, err + } + if result == nil { + return nil, ErrNotFound + } + + receipt := &types.Receipt{} + if err := receipt.Unmarshal(result.ReceiptBytes); err != nil { + return nil, err + } + return receipt, nil +} + +func (s *parquetReceiptStoreV2) indexedReceiptLookup(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { + if s.txHashIndex == nil { + return nil, ErrTxIndexDisabled + } + blockNum, ok, err := s.txHashIndex.GetBlockNumber(ctx, txHash) + if err != nil { + logger.Error("tx hash index lookup failed, falling back to full scan", "err", err) + return s.store.GetReceiptByTxHash(ctx, txHash) + } + if !ok { + return s.store.GetReceiptByTxHash(ctx, txHash) + } + return s.store.GetReceiptByTxHashInBlock(ctx, txHash, blockNum) +} + +func (s *parquetReceiptStoreV2) SetReceipts(ctx sdk.Context, receipts []ReceiptRecord) error { + if len(receipts) == 0 { + if ctx.BlockHeight() > 0 { + if err := s.store.ObserveEmptyBlock(uint64(ctx.BlockHeight())); err != nil { //nolint:gosec // block heights fit within uint64 + return err + } + } + if ctx.BlockHeight() > s.store.LatestVersion() { + s.store.SetLatestVersion(ctx.BlockHeight()) + } + return nil + } + + inputs, maxBlock, err := buildParquetReceiptInputs(receipts) + if err != nil { + return err + } + + if err := s.store.WriteReceipts(inputs); err != nil { + return err + } + + if s.txHashIndex != nil { + if err := s.indexReceiptInputs(inputs); err != nil { + return fmt.Errorf("tx hash index write failed: %w", err) + } + } + + if maxBlock > 0 { + s.store.UpdateLatestVersion(int64(maxBlock)) //nolint:gosec // block numbers won't exceed int64 max + } + + return nil +} + +func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput, uint64, error) { + blockHash := common.Hash{} + inputs := make([]parquet.ReceiptInput, 0, len(receipts)) + + var ( + currentBlock uint64 + logStartIndex uint + maxBlock uint64 + ) + + for _, record := range receipts { + if record.Receipt == nil { + continue + } + + receipt := record.Receipt + blockNumber := receipt.BlockNumber + if blockNumber > maxBlock { + maxBlock = blockNumber + } + + if currentBlock == 0 { + currentBlock = blockNumber + } + if blockNumber != currentBlock { + currentBlock = blockNumber + logStartIndex = 0 + } + + receiptBytes := record.ReceiptBytes + if len(receiptBytes) == 0 { + var err error + receiptBytes, err = receipt.Marshal() + if err != nil { + return nil, 0, err + } + } + + txLogs := getLogsForTx(receipt, logStartIndex) + logStartIndex += uint(len(txLogs)) + for _, lg := range txLogs { + lg.BlockHash = blockHash + } + + inputs = append(inputs, parquet.ReceiptInput{ + BlockNumber: blockNumber, + Receipt: parquet.ReceiptRecord{ + TxHash: parquet.CopyBytes(record.TxHash[:]), + BlockNumber: blockNumber, + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + }, + Logs: BuildParquetLogRecords(txLogs, blockHash), + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + }) + } + + return inputs, maxBlock, nil +} + +func (s *parquetReceiptStoreV2) indexReceiptInputs(inputs []parquet.ReceiptInput) error { + type blockBatch struct { + blockNumber uint64 + hashes []common.Hash + } + var batches []blockBatch + batchIdx := make(map[uint64]int) + + for i := range inputs { + bn := inputs[i].BlockNumber + txHash := common.BytesToHash(inputs[i].Receipt.TxHash) + if idx, ok := batchIdx[bn]; ok { + batches[idx].hashes = append(batches[idx].hashes, txHash) + continue + } + batchIdx[bn] = len(batches) + batches = append(batches, blockBatch{ + blockNumber: bn, + hashes: []common.Hash{txHash}, + }) + } + + ctx := context.Background() + for _, b := range batches { + if err := s.txHashIndex.IndexBlock(ctx, b.blockNumber, b.hashes); err != nil { + return err + } + } + return nil +} + +func (s *parquetReceiptStoreV2) FilterLogs(ctx sdk.Context, fromBlock, toBlock uint64, crit filters.FilterCriteria) ([]*ethtypes.Log, error) { + if fromBlock > toBlock { + return nil, fmt.Errorf("fromBlock (%d) > toBlock (%d)", fromBlock, toBlock) + } + + results, err := s.store.GetLogs(ctx.Context(), parquet.LogFilter{ + FromBlock: &fromBlock, + ToBlock: &toBlock, + Addresses: crit.Addresses, + Topics: crit.Topics, + }) + if err != nil { + return nil, err + } + + logs := make([]*ethtypes.Log, 0, len(results)) + for i := range results { + lr := results[i] + logEntry := ðtypes.Log{ + BlockNumber: lr.BlockNumber, + TxHash: common.BytesToHash(lr.TxHash), + TxIndex: uint(lr.TxIndex), + Index: uint(lr.LogIndex), + Data: lr.Data, + Removed: lr.Removed, + BlockHash: common.BytesToHash(lr.BlockHash), + } + copy(logEntry.Address[:], lr.Address) + logEntry.Topics = buildTopicsFromParquetLogResult(lr) + logs = append(logs, logEntry) + } + + return logs, nil +} + +func (s *parquetReceiptStoreV2) Close() error { + if s.indexPruner != nil { + s.indexPruner.Stop() + } + storeErr := s.store.Close() + if s.txHashIndex != nil { + if err := s.txHashIndex.Close(); err != nil && storeErr == nil { + storeErr = err + } + } + return storeErr +} + +func (s *parquetReceiptStoreV2) replayWAL() error { + result, err := s.store.ReplayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet_v2.ReplayReceipt, error) { + receipt := &types.Receipt{} + if err := receipt.Unmarshal(receiptBytes); err != nil { + return parquet_v2.ReplayReceipt{}, err + } + + txHash := common.HexToHash(receipt.TxHashHex) + blockHash := common.Hash{} + txLogs := getLogsForTx(receipt, logStartIndex) + for _, lg := range txLogs { + lg.BlockHash = blockHash + } + + record := parquet.ReceiptRecord{ + TxHash: parquet.CopyBytes(txHash[:]), + BlockNumber: blockNumber, + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + } + return parquet_v2.ReplayReceipt{ + Input: parquet.ReceiptInput{ + BlockNumber: blockNumber, + Receipt: record, + Logs: BuildParquetLogRecords(txLogs, blockHash), + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + }, + TxHash: txHash, + Warmup: record, + LogCount: uint(len(txLogs)), + }, nil + }) + if err != nil { + return err + } + + s.warmupRecords = result.WarmupRecords + if s.txHashIndex == nil { + return nil + } + + ctx := context.Background() + for _, rb := range result.Blocks { + if err := s.txHashIndex.IndexBlock(ctx, rb.BlockNumber, rb.TxHashes); err != nil { + return fmt.Errorf("failed to re-index replayed block %d: %w", rb.BlockNumber, err) + } + } + return nil +} diff --git a/sei-db/ledger_db/receipt/receipt_store.go b/sei-db/ledger_db/receipt/receipt_store.go index c2ee7f2742..b5e4fdd986 100644 --- a/sei-db/ledger_db/receipt/receipt_store.go +++ b/sei-db/ledger_db/receipt/receipt_store.go @@ -80,8 +80,9 @@ type receiptStore struct { } const ( - receiptBackendPebble = "pebble" - receiptBackendParquet = "parquet" + receiptBackendPebble = "pebble" + receiptBackendParquet = "parquet" + receiptBackendParquetV2 = "parquet_v2" ) func normalizeReceiptBackend(backend string) string { @@ -90,6 +91,8 @@ func normalizeReceiptBackend(backend string) string { return receiptBackendPebble case receiptBackendParquet: return receiptBackendParquet + case receiptBackendParquetV2: + return receiptBackendParquetV2 default: return strings.ToLower(strings.TrimSpace(backend)) } @@ -113,7 +116,7 @@ func NewReceiptStoreWithReadMetrics( return newCachedReceiptStore(backend, metrics), nil } -// BackendTypeName returns the backend implementation name ("parquet" or "pebble") for testing. +// BackendTypeName returns the backend implementation name for testing. // Returns "" if store is nil or the backend type is unknown. func BackendTypeName(store ReceiptStore) string { if store == nil { @@ -123,6 +126,8 @@ func BackendTypeName(store ReceiptStore) string { store = c.backend } switch store.(type) { + case *parquetReceiptStoreV2: + return receiptBackendParquetV2 case *parquetReceiptStore: return receiptBackendParquet case *receiptStore: @@ -139,6 +144,8 @@ func newReceiptBackend(config dbconfig.ReceiptStoreConfig, storeKey sdk.StoreKey backend := normalizeReceiptBackend(config.Backend) switch backend { + case receiptBackendParquetV2: + return newParquetReceiptStoreV2(config, storeKey) case receiptBackendParquet: return newParquetReceiptStore(config, storeKey) case receiptBackendPebble: diff --git a/sei-db/ledger_db/receipt/receipt_store_test.go b/sei-db/ledger_db/receipt/receipt_store_test.go index e7aa1faf41..94a2f5c6a2 100644 --- a/sei-db/ledger_db/receipt/receipt_store_test.go +++ b/sei-db/ledger_db/receipt/receipt_store_test.go @@ -80,6 +80,13 @@ func TestNewReceiptStoreConfigErrors(t *testing.T) { require.NotNil(t, store) require.NoError(t, store.Close()) + cfg.Backend = "parquet_v2" + store, err = receipt.NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + require.NotNil(t, store) + require.Equal(t, "parquet_v2", receipt.BackendTypeName(store)) + require.NoError(t, store.Close()) + cfg.TxIndexBackend = "rocksdb" cfg.Backend = "pebble" store, err = receipt.NewReceiptStore(cfg, storeKey) @@ -92,6 +99,44 @@ func TestNewReceiptStoreConfigErrors(t *testing.T) { require.NoError(t, err) require.NotNil(t, store) require.NoError(t, store.Close()) + + cfg.Backend = "parquet_v2" + store, err = receipt.NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + require.NotNil(t, store) + require.NoError(t, store.Close()) +} + +func TestParquetV2ReceiptStoreRoundTripAfterReopen(t *testing.T) { + storeKey := storetypes.NewKVStoreKey("evm") + tkey := storetypes.NewTransientStoreKey("evm_transient") + ctx := testutil.DefaultContext(storeKey, tkey) + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = "parquet_v2" + cfg.DBDirectory = t.TempDir() + cfg.KeepRecent = 0 + + store, err := receipt.NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + require.Equal(t, "parquet_v2", receipt.BackendTypeName(store)) + + txHash := common.HexToHash("0x99") + addr := common.HexToAddress("0x1") + topic := common.HexToHash("0x2") + want := makeReceipt(txHash, addr, []common.Hash{topic}, 0) + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(1), []receipt.ReceiptRecord{ + {TxHash: txHash, Receipt: want}, + })) + require.NoError(t, store.Close()) + + reopened, err := receipt.NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = reopened.Close() }) + + got, err := reopened.GetReceiptFromStore(ctx, txHash) + require.NoError(t, err) + require.Equal(t, want.TxHashHex, got.TxHashHex) + require.Equal(t, uint64(1), got.BlockNumber) } func TestSetReceiptsAndGet(t *testing.T) { From 542463232d734c8e963481a37a5345719c40de77 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 27 Apr 2026 22:21:25 -0400 Subject: [PATCH 13/27] Test parquet v2 receipt backend --- .../receipt/cached_receipt_store_test.go | 78 +++++----- sei-db/ledger_db/receipt/export_test.go | 8 ++ .../receipt/parquet_v2_crash_test.go | 131 +++++++++++++++++ .../receipt/parquet_v2_store_test.go | 133 ++++++++++++++++++ 4 files changed, 313 insertions(+), 37 deletions(-) create mode 100644 sei-db/ledger_db/receipt/parquet_v2_crash_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2_store_test.go diff --git a/sei-db/ledger_db/receipt/cached_receipt_store_test.go b/sei-db/ledger_db/receipt/cached_receipt_store_test.go index cacc526c0d..057e79b1e2 100644 --- a/sei-db/ledger_db/receipt/cached_receipt_store_test.go +++ b/sei-db/ledger_db/receipt/cached_receipt_store_test.go @@ -411,43 +411,47 @@ func TestCachedReceiptStoreReportsCacheMiss(t *testing.T) { // Wrapper tests for cachedReceiptStore using parquet backend. func TestCachedReceiptStoreFallsBackToDuckDBOnReceiptCacheMiss(t *testing.T) { - ctx, storeKey := newTestContext() - cfg := dbconfig.DefaultReceiptStoreConfig() - cfg.Backend = "parquet" - cfg.DBDirectory = t.TempDir() - - store, err := NewReceiptStore(cfg, storeKey) - require.NoError(t, err) - - txHash := common.HexToHash("0x12") - addr := common.HexToAddress("0x212") - topic := common.HexToHash("0xcafe") - receipt := makeTestReceipt(txHash, 8, 0, addr, []common.Hash{topic}) - - require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(8), []ReceiptRecord{ - {TxHash: txHash, Receipt: receipt}, - })) - require.NoError(t, store.Close()) - - store, err = NewReceiptStore(cfg, storeKey) - require.NoError(t, err) - t.Cleanup(func() { _ = store.Close() }) - - cached, ok := store.(*cachedReceiptStore) - require.True(t, ok, "expected cached receipt store wrapper") - - // A clean reopen leaves no WAL warmup records, so receipt lookup must - // miss the in-memory cache and fall through to the parquet/DuckDB backend. - _, ok = cached.cache.GetReceipt(txHash) - require.False(t, ok, "receipt cache should be cold after clean reopen") - - // There is no legacy KV receipt entry to rescue the lookup, so success - // here proves GetReceipt() can recover from DuckDB after a cache miss. - require.Nil(t, ctx.KVStore(storeKey).Get(types.ReceiptKey(txHash))) - - got, err := store.GetReceipt(ctx.WithBlockHeight(8), txHash) - require.NoError(t, err) - require.Equal(t, receipt.TxHashHex, got.TxHashHex) + for _, backend := range []string{"parquet", "parquet_v2"} { + t.Run(backend, func(t *testing.T) { + ctx, storeKey := newTestContext() + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = backend + cfg.DBDirectory = t.TempDir() + + store, err := NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + + txHash := common.HexToHash("0x12") + addr := common.HexToAddress("0x212") + topic := common.HexToHash("0xcafe") + receipt := makeTestReceipt(txHash, 8, 0, addr, []common.Hash{topic}) + + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(8), []ReceiptRecord{ + {TxHash: txHash, Receipt: receipt}, + })) + require.NoError(t, store.Close()) + + store, err = NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + + cached, ok := store.(*cachedReceiptStore) + require.True(t, ok, "expected cached receipt store wrapper") + + // A clean reopen leaves no WAL warmup records, so receipt lookup must + // miss the in-memory cache and fall through to the parquet/DuckDB backend. + _, ok = cached.cache.GetReceipt(txHash) + require.False(t, ok, "receipt cache should be cold after clean reopen") + + // There is no legacy KV receipt entry to rescue the lookup, so success + // here proves GetReceipt() can recover from DuckDB after a cache miss. + require.Nil(t, ctx.KVStore(storeKey).Get(types.ReceiptKey(txHash))) + + got, err := store.GetReceipt(ctx.WithBlockHeight(8), txHash) + require.NoError(t, err) + require.Equal(t, receipt.TxHashHex, got.TxHashHex) + }) + } } func TestCachedReceiptStoreMergesDuckDBAndCacheAcrossBoundary(t *testing.T) { diff --git a/sei-db/ledger_db/receipt/export_test.go b/sei-db/ledger_db/receipt/export_test.go index 8950a2e4ae..87a1cb9869 100644 --- a/sei-db/ledger_db/receipt/export_test.go +++ b/sei-db/ledger_db/receipt/export_test.go @@ -30,4 +30,12 @@ func CloseTxHashIndex(store ReceiptStore) { _ = pq.txHashIndex.Close() pq.txHashIndex = nil } + if pq, ok := store.(*parquetReceiptStoreV2); ok && pq.txHashIndex != nil { + if pq.indexPruner != nil { + pq.indexPruner.Stop() + pq.indexPruner = nil + } + _ = pq.txHashIndex.Close() + pq.txHashIndex = nil + } } diff --git a/sei-db/ledger_db/receipt/parquet_v2_crash_test.go b/sei-db/ledger_db/receipt/parquet_v2_crash_test.go new file mode 100644 index 0000000000..aa69fb751c --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2_crash_test.go @@ -0,0 +1,131 @@ +package receipt + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + dbconfig "github.com/sei-protocol/sei-chain/sei-db/config" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2" + "github.com/stretchr/testify/require" +) + +func simulateCrashV2(store ReceiptStore, pqStore *parquet_v2.Store) { + CloseTxHashIndex(store) + pqStore.SimulateCrash() +} + +func TestParquetV2CrashRecoveryAtEachHookPoint(t *testing.T) { + type hookSetup struct { + name string + needsRotation bool + install func(h *parquet.FaultHooks, trigger func() error) + } + + hookPoints := []hookSetup{ + { + name: "AfterWALWrite", + install: func(h *parquet.FaultHooks, trigger func() error) { + h.AfterWALWrite = func(uint64) error { return trigger() } + }, + }, + { + name: "BeforeFlush", + install: func(h *parquet.FaultHooks, trigger func() error) { + h.BeforeFlush = func(uint64) error { return trigger() } + }, + }, + { + name: "AfterFlush", + install: func(h *parquet.FaultHooks, trigger func() error) { + h.AfterFlush = func(uint64) error { return trigger() } + }, + }, + { + name: "AfterCloseWriters", + needsRotation: true, + install: func(h *parquet.FaultHooks, trigger func() error) { + h.AfterCloseWriters = func(uint64) error { return trigger() } + }, + }, + { + name: "AfterWALClear", + needsRotation: true, + install: func(h *parquet.FaultHooks, trigger func() error) { + h.AfterWALClear = func(uint64) error { return trigger() } + }, + }, + } + + for _, hp := range hookPoints { + t.Run(hp.name, func(t *testing.T) { + ctx, storeKey := newTestContext() + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = "parquet_v2" + cfg.DBDirectory = t.TempDir() + + store, err := NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + + pqStore := extractParquetV2Store(t, store) + pqStore.SetMaxBlocksPerFile(4) + + preBlocks := uint64(2) + if hp.needsRotation { + preBlocks = 3 + } + + addr := common.HexToAddress("0x1") + for block := uint64(1); block <= preBlocks; block++ { + txHash := blockTxHash(block) + receipt := makeTestReceipt(txHash, block, 0, addr, nil) + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(int64(block)), []ReceiptRecord{ + {TxHash: txHash, Receipt: receipt}, + })) + } + + fired := false + hooks := &parquet.FaultHooks{} + hp.install(hooks, func() error { + if !fired { + fired = true + return errSimulatedCrash + } + return nil + }) + pqStore.SetFaultHooks(hooks) + + crashBlock := preBlocks + 1 + crashTxHash := blockTxHash(crashBlock) + crashReceipt := makeTestReceipt(crashTxHash, crashBlock, 0, addr, nil) + err = store.SetReceipts(ctx.WithBlockHeight(int64(crashBlock)), []ReceiptRecord{ + {TxHash: crashTxHash, Receipt: crashReceipt}, + }) + require.ErrorIs(t, err, errSimulatedCrash) + + simulateCrashV2(store, pqStore) + + store, err = NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + extractParquetV2Store(t, store).SetMaxBlocksPerFile(4) + + for block := uint64(1); block <= crashBlock; block++ { + txHash := blockTxHash(block) + got, err := store.GetReceiptFromStore(ctx, txHash) + require.NoError(t, err, "block %d not recovered", block) + require.Equal(t, txHash.Hex(), got.TxHashHex) + } + + postBlock := crashBlock + 1 + postTxHash := blockTxHash(postBlock) + postReceipt := makeTestReceipt(postTxHash, postBlock, 0, addr, nil) + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(int64(postBlock)), []ReceiptRecord{ + {TxHash: postTxHash, Receipt: postReceipt}, + })) + got, err := store.GetReceiptFromStore(ctx, postTxHash) + require.NoError(t, err) + require.Equal(t, postTxHash.Hex(), got.TxHashHex) + }) + } +} diff --git a/sei-db/ledger_db/receipt/parquet_v2_store_test.go b/sei-db/ledger_db/receipt/parquet_v2_store_test.go new file mode 100644 index 0000000000..fcf8a6921d --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2_store_test.go @@ -0,0 +1,133 @@ +package receipt + +import ( + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/eth/filters" + dbconfig "github.com/sei-protocol/sei-chain/sei-db/config" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2" + "github.com/stretchr/testify/require" +) + +func extractParquetV2Store(t *testing.T, store ReceiptStore) *parquet_v2.Store { + t.Helper() + cached, ok := store.(*cachedReceiptStore) + require.True(t, ok, "expected *cachedReceiptStore") + pq, ok := cached.backend.(*parquetReceiptStoreV2) + require.True(t, ok, "expected *parquetReceiptStoreV2 backend") + return pq.store +} + +func TestParquetV2ReceiptStoreReopenQueries(t *testing.T) { + ctx, storeKey := newTestContext() + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = "parquet_v2" + cfg.DBDirectory = t.TempDir() + + store, err := NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + + txHash := common.HexToHash("0x220") + addr := common.HexToAddress("0x300") + topic := common.HexToHash("0x5678") + receipt := makeTestReceipt(txHash, 5, 1, addr, []common.Hash{topic}) + + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(5), []ReceiptRecord{ + {TxHash: txHash, Receipt: receipt}, + })) + require.NoError(t, store.Close()) + + store, err = NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + + got, err := store.GetReceiptFromStore(ctx, txHash) + require.NoError(t, err) + require.Equal(t, receipt.TxHashHex, got.TxHashHex) + + logs, err := store.FilterLogs(ctx, 3, 5, filters.FilterCriteria{ + Addresses: []common.Address{addr}, + Topics: [][]common.Hash{{topic}}, + }) + require.NoError(t, err) + require.Len(t, logs, 1) + require.Equal(t, uint64(5), logs[0].BlockNumber) +} + +func TestParquetV2DuplicateHashLogsSurviveReopen(t *testing.T) { + ctx, storeKey := newTestContext() + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = "parquet_v2" + cfg.DBDirectory = t.TempDir() + + store, err := NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + + txHash := common.HexToHash("0x333") + addr := common.HexToAddress("0x3330") + topic := common.HexToHash("0x3331") + for _, block := range []uint64{1, 2} { + receipt := makeTestReceipt(txHash, block, 0, addr, []common.Hash{topic}) + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(int64(block)), []ReceiptRecord{ + {TxHash: txHash, Receipt: receipt}, + })) + } + require.NoError(t, store.Close()) + + store, err = NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + + logs, err := store.FilterLogs(ctx, 1, 2, filters.FilterCriteria{ + Addresses: []common.Address{addr}, + Topics: [][]common.Hash{{topic}}, + }) + require.NoError(t, err) + require.Len(t, logs, 2) + require.Equal(t, []uint64{1, 2}, []uint64{logs[0].BlockNumber, logs[1].BlockNumber}) + require.Equal(t, txHash, logs[0].TxHash) + require.Equal(t, txHash, logs[1].TxHash) +} + +func TestParquetV2EmptyBoundaryRotationFeedsClosedFileReads(t *testing.T) { + ctx, storeKey := newTestContext() + cfg := dbconfig.DefaultReceiptStoreConfig() + cfg.Backend = "parquet_v2" + cfg.DBDirectory = t.TempDir() + cfg.TxIndexBackend = "" + + store, err := NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + + pqStore := extractParquetV2Store(t, store) + pqStore.SetMaxBlocksPerFile(4) + + addr := common.HexToAddress("0x440") + topic := common.HexToHash("0x441") + for _, block := range []uint64{2, 5} { + txHash := common.BigToHash(new(big.Int).SetUint64(block)) + receipt := makeTestReceipt(txHash, block, 0, addr, []common.Hash{topic}) + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(int64(block)), []ReceiptRecord{ + {TxHash: txHash, Receipt: receipt}, + })) + if block == 2 { + require.NoError(t, store.SetReceipts(ctx.WithBlockHeight(4), nil)) + } + } + require.NoError(t, store.Close()) + + store, err = NewReceiptStore(cfg, storeKey) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + extractParquetV2Store(t, store).SetMaxBlocksPerFile(4) + + logs, err := store.FilterLogs(ctx, 5, 5, filters.FilterCriteria{ + Addresses: []common.Address{addr}, + Topics: [][]common.Hash{{topic}}, + }) + require.NoError(t, err) + require.Len(t, logs, 1) + require.Equal(t, uint64(5), logs[0].BlockNumber) +} From 1938ac06a75b5340ce3cb32f890b0ecbe5b4a696 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Wed, 29 Apr 2026 11:04:49 -0400 Subject: [PATCH 14/27] make a coordinator module --- .../ledger_db/receipt/parquet_v2/api_stub.go | 152 ----------- .../receipt/parquet_v2/coordinator.go | 117 -------- .../receipt/parquet_v2/coordinator/api.go | 152 +++++++++++ .../parquet_v2/coordinator/coordinator.go | 250 ++++++++++++++++++ .../parquet_v2/coordinator/dispatch_test.go | 67 +++++ .../parquet_v2/coordinator/export_test.go | 5 + .../parquet_v2/{ => coordinator}/files.go | 2 +- .../parquet_v2/coordinator/files_test.go | 26 ++ .../parquet_v2/{ => coordinator}/handlers.go | 72 ++--- .../parquet_v2/coordinator/helpers_test.go | 210 +++++++++++++++ .../parquet_v2/{ => coordinator}/prune.go | 6 +- .../prune_test.go} | 40 +-- .../parquet_v2/coordinator/read_test.go | 56 ++++ .../parquet_v2/{ => coordinator}/reader.go | 2 +- .../parquet_v2/{ => coordinator}/requests.go | 2 +- .../parquet_v2/coordinator/rotation_test.go | 121 +++++++++ .../receipt/parquet_v2/coordinator/types.go | 46 ++++ .../parquet_v2/{ => coordinator}/wal.go | 8 +- .../parquet_v2/coordinator/wal_test.go | 74 ++++++ .../parquet_v2/coordinator/write_test.go | 64 +++++ .../receipt/parquet_v2/export_test.go | 5 - .../receipt/parquet_v2/helpers_test.go | 93 +++++++ sei-db/ledger_db/receipt/parquet_v2/store.go | 189 +++++-------- .../receipt/parquet_v2/store_dispatch_test.go | 64 ----- .../receipt/parquet_v2/store_init_test.go | 62 ----- .../receipt/parquet_v2/store_read_test.go | 55 ---- .../receipt/parquet_v2/store_rotation_test.go | 111 -------- .../receipt/parquet_v2/store_wal_test.go | 97 ------- .../receipt/parquet_v2/store_write_test.go | 147 ---------- sei-db/ledger_db/receipt/parquet_v2/types.go | 48 +--- 30 files changed, 1289 insertions(+), 1054 deletions(-) delete mode 100644 sei-db/ledger_db/receipt/parquet_v2/api_stub.go delete mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/export_test.go rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/files.go (99%) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/files_test.go rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/handlers.go (85%) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/prune.go (87%) rename sei-db/ledger_db/receipt/parquet_v2/{store_prune_test.go => coordinator/prune_test.go} (67%) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/reader.go (99%) rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/requests.go (99%) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go rename sei-db/ledger_db/receipt/parquet_v2/{ => coordinator}/wal.go (95%) create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go delete mode 100644 sei-db/ledger_db/receipt/parquet_v2/export_test.go create mode 100644 sei-db/ledger_db/receipt/parquet_v2/helpers_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/api_stub.go b/sei-db/ledger_db/receipt/parquet_v2/api_stub.go deleted file mode 100644 index 9b7b0b1d0a..0000000000 --- a/sei-db/ledger_db/receipt/parquet_v2/api_stub.go +++ /dev/null @@ -1,152 +0,0 @@ -package parquet_v2 - -import ( - "context" - - "github.com/ethereum/go-ethereum/common" - "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" -) - -func (s *Store) WriteReceipts(inputs []parquet.ReceiptInput) error { - resp := make(chan writeResp, 1) - r, err := awaitResponse(s, writeReq{inputs: inputs, resp: resp}, resp) - if err != nil { - return err - } - return r.err -} - -func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { - resp := make(chan readReceiptResp, 1) - r, err := awaitResponse(s, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) - if err != nil { - return nil, err - } - return r.result, r.err -} - -func (s *Store) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { - resp := make(chan readReceiptResp, 1) - r, err := awaitResponse(s, readByTxHashInBlockReq{ - ctx: ctx, - txHash: txHash, - blockNumber: blockNumber, - resp: resp, - }, resp) - if err != nil { - return nil, err - } - return r.result, r.err -} - -func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { - resp := make(chan getLogsResp, 1) - r, err := awaitResponse(s, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) - if err != nil { - return nil, err - } - return r.results, r.err -} - -func (s *Store) ObserveEmptyBlock(height uint64) error { - resp := make(chan error, 1) - return awaitError(s, observeEmptyBlockReq{height: height, resp: resp}, resp) -} - -func (s *Store) IsRotationBoundary(blockNumber uint64) bool { - resp := make(chan bool, 1) - r, err := awaitResponse(s, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) - if err != nil { - return false - } - return r -} - -func (s *Store) FileStartBlock() uint64 { - resp := make(chan uint64, 1) - r, err := awaitResponse(s, fileStartBlockReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (s *Store) LatestVersion() int64 { - resp := make(chan int64, 1) - r, err := awaitResponse(s, latestVersionReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (s *Store) SetLatestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(s, setLatestVersionReq{version: version, resp: resp}, resp) -} - -func (s *Store) SetEarliestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(s, setEarliestVersionReq{version: version, resp: resp}, resp) -} - -func (s *Store) UpdateLatestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(s, updateLatestVersionReq{version: version, resp: resp}, resp) -} - -func (s *Store) CacheRotateInterval() uint64 { - resp := make(chan uint64, 1) - r, err := awaitResponse(s, cacheRotateIntervalReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (s *Store) Flush() error { - resp := make(chan error, 1) - return awaitError(s, flushReq{resp: resp}, resp) -} - -func (s *Store) Close() error { - var err error - s.closeOnce.Do(func() { - resp := make(chan error, 1) - err = awaitError(s, closeReq{resp: resp}, resp) - close(s.done) - }) - return err -} - -func (s *Store) SimulateCrash() { - s.closeOnce.Do(func() { - resp := make(chan struct{}, 1) - _, _ = awaitResponse(s, simulateCrashReq{resp: resp}, resp) - close(s.done) - }) -} - -func (s *Store) SetBlockFlushInterval(interval uint64) { - resp := make(chan error, 1) - _ = awaitError(s, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) -} - -func (s *Store) SetMaxBlocksPerFile(n uint64) { - resp := make(chan error, 1) - _ = awaitError(s, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) -} - -func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { - resp := make(chan error, 1) - _ = awaitError(s, setFaultHooksReq{hooks: hooks, resp: resp}, resp) -} - -func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { - resp := make(chan replayWALResp, 1) - r, err := awaitResponse(s, replayWALReq{converter: converter, resp: resp}, resp) - if err != nil { - return ReplayResult{}, err - } - return r.result, r.err -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator.go deleted file mode 100644 index 722e8c7831..0000000000 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator.go +++ /dev/null @@ -1,117 +0,0 @@ -package parquet_v2 - -import ( - "os" - "time" - - "github.com/ethereum/go-ethereum/common" - parquetgo "github.com/parquet-go/parquet-go" - "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" - dbwal "github.com/sei-protocol/sei-chain/sei-db/wal" -) - -type closedFile struct { - startBlock uint64 - receiptPath string - logPath string -} - -type coordinator struct { - requests chan coordRequest - pruneTick <-chan time.Time - pruneTicker *time.Ticker - done chan struct{} - - config parquet.StoreConfig - - basePath string - fileStartBlock uint64 - receiptWriter *parquetgo.GenericWriter[parquet.ReceiptRecord] - logWriter *parquetgo.GenericWriter[parquet.LogRecord] - receiptFile *os.File - logFile *os.File - closedFiles []closedFile - - receiptsBuffer []parquet.ReceiptRecord - logsBuffer []parquet.LogRecord - lastSeenBlock uint64 - blocksSinceFlush uint64 - nextWriteOrdinal uint64 - - tempWriteCache map[common.Hash][]tempReceipt - - latestVersion int64 - earliestVersion int64 - - replayedWarmup []parquet.ReceiptRecord - replayedBlocks []ReplayedBlock - - faultHooks *parquet.FaultHooks - - wal dbwal.GenericWAL[parquet.WALEntry] - reader *Reader -} - -func (c *coordinator) run() { - for { - select { - case req := <-c.requests: - switch r := req.(type) { - case writeReq: - c.handleWrite(r) - case readByTxHashReq: - c.handleReadByTxHash(r) - case readByTxHashInBlockReq: - c.handleReadByTxHashInBlock(r) - case getLogsReq: - c.handleGetLogs(r) - case observeEmptyBlockReq: - c.handleObserveEmptyBlock(r) - case flushReq: - c.handleFlush(r) - case latestVersionReq: - c.handleLatestVersion(r) - case setLatestVersionReq: - c.handleSetLatestVersion(r) - case setEarliestVersionReq: - c.handleSetEarliestVersion(r) - case updateLatestVersionReq: - c.handleUpdateLatestVersion(r) - case cacheRotateIntervalReq: - c.handleCacheRotateInterval(r) - case fileStartBlockReq: - c.handleFileStartBlock(r) - case isRotationBoundaryReq: - c.handleIsRotationBoundary(r) - case setBlockFlushIntervalReq: - c.handleSetBlockFlushInterval(r) - case setMaxBlocksPerFileReq: - c.handleSetMaxBlocksPerFile(r) - case setFaultHooksReq: - c.handleSetFaultHooks(r) - case replayWALReq: - c.handleReplayWAL(r) - case simulateCrashReq: - c.handleSimulateCrash(r) - return - case closeReq: - c.handleClose(r) - return - } - case <-c.pruneTick: - c.handlePruneTick() - case <-c.done: - c.stopPruneTicker() - return - } - } -} - -func (c *coordinator) stopPruneTicker() { - if c.pruneTicker == nil { - return - } - c.pruneTicker.Stop() - c.pruneTicker = nil - c.pruneTick = nil -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go new file mode 100644 index 0000000000..47955c9ece --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go @@ -0,0 +1,152 @@ +package coordinator + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +func (c *Coordinator) WriteReceipts(inputs []parquet.ReceiptInput) error { + resp := make(chan writeResp, 1) + r, err := awaitResponse(c, writeReq{inputs: inputs, resp: resp}, resp) + if err != nil { + return err + } + return r.err +} + +func (c *Coordinator) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { + resp := make(chan readReceiptResp, 1) + r, err := awaitResponse(c, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.result, r.err +} + +func (c *Coordinator) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { + resp := make(chan readReceiptResp, 1) + r, err := awaitResponse(c, readByTxHashInBlockReq{ + ctx: ctx, + txHash: txHash, + blockNumber: blockNumber, + resp: resp, + }, resp) + if err != nil { + return nil, err + } + return r.result, r.err +} + +func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { + resp := make(chan getLogsResp, 1) + r, err := awaitResponse(c, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.results, r.err +} + +func (c *Coordinator) ObserveEmptyBlock(height uint64) error { + resp := make(chan error, 1) + return awaitError(c, observeEmptyBlockReq{height: height, resp: resp}, resp) +} + +func (c *Coordinator) IsRotationBoundary(blockNumber uint64) bool { + resp := make(chan bool, 1) + r, err := awaitResponse(c, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) + if err != nil { + return false + } + return r +} + +func (c *Coordinator) FileStartBlock() uint64 { + resp := make(chan uint64, 1) + r, err := awaitResponse(c, fileStartBlockReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) LatestVersion() int64 { + resp := make(chan int64, 1) + r, err := awaitResponse(c, latestVersionReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) SetLatestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, setLatestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) SetEarliestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, setEarliestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) UpdateLatestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, updateLatestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) CacheRotateInterval() uint64 { + resp := make(chan uint64, 1) + r, err := awaitResponse(c, cacheRotateIntervalReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) Flush() error { + resp := make(chan error, 1) + return awaitError(c, flushReq{resp: resp}, resp) +} + +func (c *Coordinator) Close() error { + var err error + c.closeOnce.Do(func() { + resp := make(chan error, 1) + err = awaitError(c, closeReq{resp: resp}, resp) + close(c.done) + }) + return err +} + +func (c *Coordinator) SimulateCrash() { + c.closeOnce.Do(func() { + resp := make(chan struct{}, 1) + _, _ = awaitResponse(c, simulateCrashReq{resp: resp}, resp) + close(c.done) + }) +} + +func (c *Coordinator) SetBlockFlushInterval(interval uint64) { + resp := make(chan error, 1) + _ = awaitError(c, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) +} + +func (c *Coordinator) SetMaxBlocksPerFile(n uint64) { + resp := make(chan error, 1) + _ = awaitError(c, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) +} + +func (c *Coordinator) SetFaultHooks(hooks *parquet.FaultHooks) { + resp := make(chan error, 1) + _ = awaitError(c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) +} + +func (c *Coordinator) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { + resp := make(chan replayWALResp, 1) + r, err := awaitResponse(c, replayWALReq{converter: converter, resp: resp}, resp) + if err != nil { + return ReplayResult{}, err + } + return r.result, r.err +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go new file mode 100644 index 0000000000..e365b20347 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -0,0 +1,250 @@ +package coordinator + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + dbwal "github.com/sei-protocol/sei-chain/sei-db/wal" +) + +type closedFile struct { + startBlock uint64 + receiptPath string + logPath string +} + +// Coordinator owns parquet_v2's mutable state and serializes all access via +// its requests channel. Construct with New; interact through the typed +// methods (WriteReceipts, GetLogs, ...). +type Coordinator struct { + requests chan coordRequest + pruneTick <-chan time.Time + pruneTicker *time.Ticker + done chan struct{} + closeOnce sync.Once + + config parquet.StoreConfig + + basePath string + fileStartBlock uint64 + receiptWriter *parquetgo.GenericWriter[parquet.ReceiptRecord] + logWriter *parquetgo.GenericWriter[parquet.LogRecord] + receiptFile *os.File + logFile *os.File + closedFiles []closedFile + + receiptsBuffer []parquet.ReceiptRecord + logsBuffer []parquet.LogRecord + lastSeenBlock uint64 + blocksSinceFlush uint64 + nextWriteOrdinal uint64 + + tempWriteCache map[common.Hash][]tempReceipt + + latestVersion int64 + earliestVersion int64 + + replayedWarmup []parquet.ReceiptRecord + replayedBlocks []ReplayedBlock + + faultHooks *parquet.FaultHooks + + wal dbwal.GenericWAL[parquet.WALEntry] + reader *Reader +} + +// New constructs a Coordinator with a live goroutine. The returned +// Coordinator is ready to accept requests via its typed methods. +func New(cfg parquet.StoreConfig) (*Coordinator, error) { + storeCfg := resolveStoreConfig(cfg) + + if err := os.MkdirAll(storeCfg.DBDirectory, 0o750); err != nil { + return nil, fmt.Errorf("failed to create parquet base directory: %w", err) + } + + requests := make(chan coordRequest) + done := make(chan struct{}) + reader, err := NewReaderWithMaxBlocksPerFile(cfg.DBDirectory, storeCfg.MaxBlocksPerFile) + if err != nil { + return nil, err + } + cleanupReader := true + defer func() { + if cleanupReader { + _ = reader.Close() + } + }() + + walDir := filepath.Join(storeCfg.DBDirectory, "parquet-wal") + receiptWAL, err := parquet.NewWAL(walDir) + if err != nil { + return nil, err + } + cleanupWAL := true + defer func() { + if cleanupWAL { + _ = receiptWAL.Close() + } + }() + + closedFiles, err := scanClosedFiles(storeCfg.DBDirectory, reader) + if err != nil { + return nil, err + } + + c := &Coordinator{ + requests: requests, + done: done, + config: storeCfg, + basePath: cfg.DBDirectory, + closedFiles: closedFiles, + receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), + logsBuffer: make([]parquet.LogRecord, 0, 10000), + tempWriteCache: make(map[common.Hash][]tempReceipt), + reader: reader, + wal: receiptWAL, + latestVersion: 0, + earliestVersion: 0, + } + + receiptFiles := make([]string, 0, len(closedFiles)) + for _, f := range closedFiles { + receiptFiles = append(receiptFiles, f.receiptPath) + } + if maxBlock, ok, err := reader.MaxReceiptBlockNumber(context.Background(), receiptFiles); err != nil { + return nil, err + } else if ok { + latest, err := int64FromUint64(maxBlock) + if err != nil { + return nil, err + } + c.latestVersion = latest + if maxBlock < ^uint64(0) { + c.fileStartBlock = maxBlock + 1 + } + } + + if storeCfg.KeepRecent > 0 && storeCfg.PruneIntervalSeconds > 0 { + c.pruneTicker = time.NewTicker(time.Duration(storeCfg.PruneIntervalSeconds) * time.Second) + c.pruneTick = c.pruneTicker.C + } + + go c.run() + cleanupReader = false + cleanupWAL = false + + return c, nil +} + +func resolveStoreConfig(cfg parquet.StoreConfig) parquet.StoreConfig { + resolved := parquet.DefaultStoreConfig() + resolved.DBDirectory = cfg.DBDirectory + resolved.KeepRecent = cfg.KeepRecent + resolved.PruneIntervalSeconds = cfg.PruneIntervalSeconds + if cfg.TxIndexBackend != "" { + resolved.TxIndexBackend = cfg.TxIndexBackend + } + if cfg.BlockFlushInterval > 0 { + resolved.BlockFlushInterval = cfg.BlockFlushInterval + } + if cfg.MaxBlocksPerFile > 0 { + resolved.MaxBlocksPerFile = cfg.MaxBlocksPerFile + } + return resolved +} + +func (c *Coordinator) run() { + for { + select { + case req := <-c.requests: + switch r := req.(type) { + case writeReq: + c.handleWrite(r) + case readByTxHashReq: + c.handleReadByTxHash(r) + case readByTxHashInBlockReq: + c.handleReadByTxHashInBlock(r) + case getLogsReq: + c.handleGetLogs(r) + case observeEmptyBlockReq: + c.handleObserveEmptyBlock(r) + case flushReq: + c.handleFlush(r) + case latestVersionReq: + c.handleLatestVersion(r) + case setLatestVersionReq: + c.handleSetLatestVersion(r) + case setEarliestVersionReq: + c.handleSetEarliestVersion(r) + case updateLatestVersionReq: + c.handleUpdateLatestVersion(r) + case cacheRotateIntervalReq: + c.handleCacheRotateInterval(r) + case fileStartBlockReq: + c.handleFileStartBlock(r) + case isRotationBoundaryReq: + c.handleIsRotationBoundary(r) + case setBlockFlushIntervalReq: + c.handleSetBlockFlushInterval(r) + case setMaxBlocksPerFileReq: + c.handleSetMaxBlocksPerFile(r) + case setFaultHooksReq: + c.handleSetFaultHooks(r) + case replayWALReq: + c.handleReplayWAL(r) + case simulateCrashReq: + c.handleSimulateCrash(r) + return + case closeReq: + c.handleClose(r) + return + } + case <-c.pruneTick: + c.handlePruneTick() + case <-c.done: + c.stopPruneTicker() + return + } + } +} + +func (c *Coordinator) stopPruneTicker() { + if c.pruneTicker == nil { + return + } + c.pruneTicker.Stop() + c.pruneTicker = nil + c.pruneTick = nil +} + +func awaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { + var zero T + + select { + case c.requests <- req: + case <-c.done: + return zero, ErrStoreClosed + } + + select { + case r := <-resp: + return r, nil + case <-c.done: + return zero, ErrStoreClosed + } +} + +func awaitError(c *Coordinator, req coordRequest, resp <-chan error) error { + err, waitErr := awaitResponse(c, req, resp) + if waitErr != nil { + return waitErr + } + return err +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go new file mode 100644 index 0000000000..899f98f539 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go @@ -0,0 +1,67 @@ +package coordinator + +import ( + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestSetMaxBlocksPerFileUpdatesReaderState(t *testing.T) { + reader, err := NewReaderWithMaxBlocksPerFile(t.TempDir(), 10) + require.NoError(t, err) + t.Cleanup(func() { _ = reader.Close() }) + + resp := make(chan error, 1) + coord := &Coordinator{ + config: parquet.StoreConfig{ + MaxBlocksPerFile: 10, + }, + reader: reader, + } + + coord.handleSetMaxBlocksPerFile(setMaxBlocksPerFileReq{ + maxBlocksPerFile: 3, + resp: resp, + }) + + require.NoError(t, <-resp) + require.Equal(t, uint64(3), coord.config.MaxBlocksPerFile) + require.Equal(t, uint64(3), reader.maxBlocksPerFile) +} + +func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { + requests := make(chan coordRequest) + done := make(chan struct{}) + coord := &Coordinator{ + requests: requests, + done: done, + } + go coord.run() + + require.Zero(t, cap(coord.requests)) + + firstResp := make(chan writeResp) + coord.requests <- writeReq{ + inputs: []parquet.ReceiptInput{testReceiptInput(1, common.HexToHash("0x1"))}, + resp: firstResp, + } + time.Sleep(10 * time.Millisecond) + + secondDone := make(chan error, 1) + go func() { + secondDone <- coord.Flush() + }() + + select { + case err := <-secondDone: + t.Fatalf("second request completed before first unblocked: %v", err) + case <-time.After(25 * time.Millisecond): + } + + require.Error(t, (<-firstResp).err) + require.NoError(t, <-secondDone) + require.NoError(t, coord.Close()) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/export_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/export_test.go new file mode 100644 index 0000000000..29067570b8 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/export_test.go @@ -0,0 +1,5 @@ +package coordinator + +func forcePruneTickForTest(c *Coordinator) { + c.handlePruneTick() +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/files.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go similarity index 99% rename from sei-db/ledger_db/receipt/parquet_v2/files.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go index 6095f355c1..49be193b3d 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/files.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "fmt" diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/files_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files_test.go new file mode 100644 index 0000000000..6363c95987 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files_test.go @@ -0,0 +1,26 @@ +package coordinator + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestScanClosedFilesSortsByStartBlock(t *testing.T) { + dir := t.TempDir() + for _, startBlock := range []uint64{1000, 0, 500} { + writeReceiptFile(t, dir, startBlock, []uint64{startBlock + 1}) + writeLogFile(t, dir, startBlock) + } + + reader, err := NewReaderWithMaxBlocksPerFile(dir, 500) + require.NoError(t, err) + t.Cleanup(func() { _ = reader.Close() }) + + closedFiles, err := scanClosedFiles(dir, reader) + require.NoError(t, err) + require.Len(t, closedFiles, 3) + require.Equal(t, uint64(0), closedFiles[0].startBlock) + require.Equal(t, uint64(500), closedFiles[1].startBlock) + require.Equal(t, uint64(1000), closedFiles[2].startBlock) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go similarity index 85% rename from sei-db/ledger_db/receipt/parquet_v2/handlers.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 56671c72ec..a158d1297c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "fmt" @@ -10,11 +10,11 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) -func (c *coordinator) handleWrite(req writeReq) { +func (c *Coordinator) handleWrite(req writeReq) { req.resp <- writeResp{err: c.writeReceipts(req.inputs)} } -func (c *coordinator) handleReadByTxHash(req readByTxHashReq) { +func (c *Coordinator) handleReadByTxHash(req readByTxHashReq) { if result := c.cachedReceiptByTxHash(req.txHash); result != nil { req.resp <- readReceiptResp{result: result} return @@ -28,7 +28,7 @@ func (c *coordinator) handleReadByTxHash(req readByTxHashReq) { req.resp <- readReceiptResp{result: result, err: err} } -func (c *coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { +func (c *Coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { if result := c.cachedReceiptByTxHashInBlock(req.txHash, req.blockNumber); result != nil { req.resp <- readReceiptResp{result: result} return @@ -42,7 +42,7 @@ func (c *coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { req.resp <- readReceiptResp{result: result, err: err} } -func (c *coordinator) handleGetLogs(req getLogsReq) { +func (c *Coordinator) handleGetLogs(req getLogsReq) { if c.reader == nil { req.resp <- getLogsResp{err: fmt.Errorf("parquet reader is not initialized")} return @@ -52,53 +52,53 @@ func (c *coordinator) handleGetLogs(req getLogsReq) { req.resp <- getLogsResp{results: results, err: err} } -func (c *coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { +func (c *Coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { req.resp <- c.observeEmptyBlock(req.height) } -func (c *coordinator) handleFlush(req flushReq) { +func (c *Coordinator) handleFlush(req flushReq) { req.resp <- c.flushOpenFile() } -func (c *coordinator) handleLatestVersion(req latestVersionReq) { +func (c *Coordinator) handleLatestVersion(req latestVersionReq) { req.resp <- c.latestVersion } -func (c *coordinator) handleSetLatestVersion(req setLatestVersionReq) { +func (c *Coordinator) handleSetLatestVersion(req setLatestVersionReq) { c.latestVersion = req.version req.resp <- nil } -func (c *coordinator) handleSetEarliestVersion(req setEarliestVersionReq) { +func (c *Coordinator) handleSetEarliestVersion(req setEarliestVersionReq) { c.earliestVersion = req.version req.resp <- nil } -func (c *coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { +func (c *Coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { if req.version > c.latestVersion { c.latestVersion = req.version } req.resp <- nil } -func (c *coordinator) handleCacheRotateInterval(req cacheRotateIntervalReq) { +func (c *Coordinator) handleCacheRotateInterval(req cacheRotateIntervalReq) { req.resp <- c.config.MaxBlocksPerFile } -func (c *coordinator) handleFileStartBlock(req fileStartBlockReq) { +func (c *Coordinator) handleFileStartBlock(req fileStartBlockReq) { req.resp <- c.fileStartBlock } -func (c *coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { +func (c *Coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { req.resp <- c.isRotationBoundary(req.blockNumber) } -func (c *coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { +func (c *Coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { c.config.BlockFlushInterval = req.interval req.resp <- nil } -func (c *coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { +func (c *Coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { c.config.MaxBlocksPerFile = req.maxBlocksPerFile if c.reader != nil { c.reader.setMaxBlocksPerFile(req.maxBlocksPerFile) @@ -106,17 +106,17 @@ func (c *coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { req.resp <- nil } -func (c *coordinator) handleSetFaultHooks(req setFaultHooksReq) { +func (c *Coordinator) handleSetFaultHooks(req setFaultHooksReq) { c.faultHooks = req.hooks req.resp <- nil } -func (c *coordinator) handleReplayWAL(req replayWALReq) { +func (c *Coordinator) handleReplayWAL(req replayWALReq) { result, err := c.replayWAL(req.converter) req.resp <- replayWALResp{result: result, err: err} } -func (c *coordinator) handlePruneTick() { +func (c *Coordinator) handlePruneTick() { // TODO(future-async): if read I/O moves to a worker pool, gate prune on // map[fileID]int reference counts that the coordinator increments on // dispatch and decrements on completion. @@ -130,7 +130,7 @@ func (c *coordinator) handlePruneTick() { c.pruneOldFiles(uint64(pruneBeforeBlock)) } -func (c *coordinator) handleClose(req closeReq) { +func (c *Coordinator) handleClose(req closeReq) { c.stopPruneTicker() if err := c.flushOpenFile(); err != nil { req.resp <- err @@ -157,7 +157,7 @@ func (c *coordinator) handleClose(req closeReq) { req.resp <- nil } -func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { +func (c *Coordinator) handleSimulateCrash(req simulateCrashReq) { c.stopPruneTicker() if c.receiptFile != nil { _ = c.receiptFile.Close() @@ -178,7 +178,7 @@ func (c *coordinator) handleSimulateCrash(req simulateCrashReq) { req.resp <- struct{}{} } -func (c *coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { +func (c *Coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { if len(inputs) == 0 { return nil } @@ -251,7 +251,7 @@ func (c *coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { return nil } -func (c *coordinator) applyReceipt(input parquet.ReceiptInput) error { +func (c *Coordinator) applyReceipt(input parquet.ReceiptInput) error { if c.receiptWriter == nil { aligned := alignedFileStartBlock(input.BlockNumber, c.config.MaxBlocksPerFile) if aligned >= c.fileStartBlock { @@ -293,7 +293,7 @@ func (c *coordinator) applyReceipt(input parquet.ReceiptInput) error { return nil } -func (c *coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.ReceiptResult { +func (c *Coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.ReceiptResult { entries := c.tempWriteCache[txHash] if len(entries) == 0 { return nil @@ -301,7 +301,7 @@ func (c *coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.Receipt return receiptResultFromTemp(txHash, entries[0]) } -func (c *coordinator) cachedReceiptByTxHashInBlock(txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { +func (c *Coordinator) cachedReceiptByTxHashInBlock(txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { for _, entry := range c.tempWriteCache[txHash] { if entry.blockNumber == blockNumber { return receiptResultFromTemp(txHash, entry) @@ -318,7 +318,7 @@ func receiptResultFromTemp(txHash common.Hash, entry tempReceipt) *parquet.Recei } } -func (c *coordinator) receiptFilesSnapshot() []string { +func (c *Coordinator) receiptFilesSnapshot() []string { files := make([]string, 0, len(c.closedFiles)) for _, f := range c.closedFiles { files = append(files, f.receiptPath) @@ -326,7 +326,7 @@ func (c *coordinator) receiptFilesSnapshot() []string { return files } -func (c *coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { +func (c *Coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { var best string for _, f := range c.closedFiles { if f.startBlock > blockNumber { @@ -340,7 +340,7 @@ func (c *coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { return []string{best} } -func (c *coordinator) logFilesSnapshot() []string { +func (c *Coordinator) logFilesSnapshot() []string { files := make([]string, 0, len(c.closedFiles)) for _, f := range c.closedFiles { files = append(files, f.logPath) @@ -348,7 +348,7 @@ func (c *coordinator) logFilesSnapshot() []string { return files } -func (c *coordinator) isRotationBoundary(blockNumber uint64) bool { +func (c *Coordinator) isRotationBoundary(blockNumber uint64) bool { return c.config.MaxBlocksPerFile > 0 && blockNumber%c.config.MaxBlocksPerFile == 0 } @@ -359,7 +359,7 @@ func alignedFileStartBlock(blockNumber, maxBlocksPerFile uint64) uint64 { return (blockNumber / maxBlocksPerFile) * maxBlocksPerFile } -func (c *coordinator) initWriters() error { +func (c *Coordinator) initWriters() error { receiptPath := filepath.Join(c.basePath, fmt.Sprintf("receipts_%d.parquet", c.fileStartBlock)) logPath := filepath.Join(c.basePath, fmt.Sprintf("logs_%d.parquet", c.fileStartBlock)) @@ -396,7 +396,7 @@ func (c *coordinator) initWriters() error { return nil } -func (c *coordinator) rotateOpenFile(newBlockNumber uint64) error { +func (c *Coordinator) rotateOpenFile(newBlockNumber uint64) error { if err := c.rotateOpenFileWithoutWAL(newBlockNumber); err != nil { return err } @@ -412,7 +412,7 @@ func (c *coordinator) rotateOpenFile(newBlockNumber uint64) error { return nil } -func (c *coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { +func (c *Coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { if c.receiptWriter == nil { return nil } @@ -447,7 +447,7 @@ func (c *coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { return nil } -func (c *coordinator) dropTempCacheBefore(blockNumber uint64) { +func (c *Coordinator) dropTempCacheBefore(blockNumber uint64) { for txHash, entries := range c.tempWriteCache { kept := entries[:0] for _, entry := range entries { @@ -463,7 +463,7 @@ func (c *coordinator) dropTempCacheBefore(blockNumber uint64) { } } -func (c *coordinator) observeEmptyBlock(height uint64) error { +func (c *Coordinator) observeEmptyBlock(height uint64) error { if height <= c.lastSeenBlock { return nil } @@ -478,7 +478,7 @@ func (c *coordinator) observeEmptyBlock(height uint64) error { return nil } -func (c *coordinator) flushOpenFile() error { +func (c *Coordinator) flushOpenFile() error { if len(c.receiptsBuffer) == 0 { return nil } @@ -522,7 +522,7 @@ func (c *coordinator) flushOpenFile() error { return nil } -func (c *coordinator) closeWriters() error { +func (c *Coordinator) closeWriters() error { var errs []error if c.receiptWriter != nil { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go new file mode 100644 index 0000000000..aba9d3576c --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go @@ -0,0 +1,210 @@ +package coordinator + +import ( + "context" + "fmt" + "math/big" + "os" + "path/filepath" + "strconv" + "testing" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInput { + receiptBytes := []byte{byte(blockNumber), txHash[31]} + return parquet.ReceiptInput{ + BlockNumber: blockNumber, + Receipt: parquet.ReceiptRecord{ + TxHash: txHash[:], + BlockNumber: blockNumber, + ReceiptBytes: receiptBytes, + }, + Logs: []parquet.LogRecord{{ + BlockNumber: blockNumber, + TxHash: txHash[:], + Address: common.BigToAddress(new(big.Int).SetUint64(blockNumber)).Bytes(), + }}, + ReceiptBytes: receiptBytes, + } +} + +func newWriteCoordinator(t *testing.T, wal *recordingWAL) *Coordinator { + t.Helper() + + cfg := parquet.DefaultStoreConfig() + cfg.DBDirectory = t.TempDir() + cfg.MaxBlocksPerFile = 500 + cfg.BlockFlushInterval = 0 + + return &Coordinator{ + config: cfg, + basePath: cfg.DBDirectory, + receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), + logsBuffer: make([]parquet.LogRecord, 0, 10000), + tempWriteCache: make(map[common.Hash][]tempReceipt), + wal: wal, + } +} + +func newReplayCoordinator(t *testing.T, wal *recordingWAL) *Coordinator { + t.Helper() + + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + return coord +} + +func replayWALWithEntries(t *testing.T, entries ...parquet.WALEntry) *recordingWAL { + t.Helper() + + wal := &recordingWAL{} + for _, entry := range entries { + require.NoError(t, wal.Write(entry)) + } + return wal +} + +func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { + txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) + input := testReceiptInput(blockNumber, txHash) + input.ReceiptBytes = append([]byte(nil), receiptBytes...) + input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) + + return ReplayReceipt{ + Input: input, + TxHash: txHash, + Warmup: input.Receipt, + LogCount: uint(len(input.Logs)), + }, nil +} + +func writeReceiptFile(t *testing.T, dir string, startBlock uint64, blocks []uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("receipts_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.ReceiptRecord](f) + for _, block := range blocks { + txHash := common.BigToHash(new(big.Int).SetUint64(block)) + _, err := w.Write([]parquet.ReceiptRecord{{ + TxHash: txHash[:], + BlockNumber: block, + ReceiptBytes: []byte{byte(block)}, + }}) + require.NoError(t, err) + } + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} + +func writeLogFile(t *testing.T, dir string, startBlock uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("logs_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.LogRecord](f) + txHash := common.BigToHash(new(big.Int).SetUint64(startBlock)) + _, err = w.Write([]parquet.LogRecord{{ + BlockNumber: startBlock, + TxHash: txHash[:], + }}) + require.NoError(t, err) + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} + +func writeClosedFileSet(t *testing.T, dir string, starts ...uint64) []closedFile { + t.Helper() + + closed := make([]closedFile, 0, len(starts)) + for _, start := range starts { + block := start + 1 + writeReceiptFile(t, dir, start, []uint64{block}) + writeLogFile(t, dir, start) + closed = append(closed, closedFile{ + startBlock: start, + receiptPath: filepath.Join(dir, "receipts_"+strconv.FormatUint(start, 10)+".parquet"), + logPath: filepath.Join(dir, "logs_"+strconv.FormatUint(start, 10)+".parquet"), + }) + } + return closed +} + +func readClosedReceiptForTest(t *testing.T, coord *Coordinator, txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { + t.Helper() + + resp := make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: blockNumber, + resp: resp, + }) + result := <-resp + require.NoError(t, result.err) + return result.result +} + +type recordingWAL struct { + entries []parquet.WALEntry + firstOffset uint64 + lastOffset uint64 + truncatedBefore []uint64 +} + +func (w *recordingWAL) Write(entry parquet.WALEntry) error { + if w.firstOffset == 0 { + w.firstOffset = 1 + } + w.lastOffset++ + w.entries = append(w.entries, entry) + return nil +} + +func (w *recordingWAL) TruncateBefore(offset uint64) error { + w.truncatedBefore = append(w.truncatedBefore, offset) + return nil +} + +func (w *recordingWAL) TruncateAfter(uint64) error { return nil } + +func (w *recordingWAL) ReadAt(uint64) (parquet.WALEntry, error) { return parquet.WALEntry{}, nil } + +func (w *recordingWAL) FirstOffset() (uint64, error) { return w.firstOffset, nil } + +func (w *recordingWAL) LastOffset() (uint64, error) { return w.lastOffset, nil } + +func (w *recordingWAL) Replay(firstOffset, lastOffset uint64, fn func(uint64, parquet.WALEntry) error) error { + for i, entry := range w.entries { + offset := uint64(i) + 1 + if offset < firstOffset || offset > lastOffset { + continue + } + if err := fn(offset, entry); err != nil { + return err + } + } + return nil +} + +func (w *recordingWAL) Close() error { return nil } + +var _ interface { + Write(parquet.WALEntry) error + TruncateBefore(uint64) error + TruncateAfter(uint64) error + ReadAt(uint64) (parquet.WALEntry, error) + FirstOffset() (uint64, error) + LastOffset() (uint64, error) + Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error + Close() error +} = (*recordingWAL)(nil) diff --git a/sei-db/ledger_db/receipt/parquet_v2/prune.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go similarity index 87% rename from sei-db/ledger_db/receipt/parquet_v2/prune.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go index ff35262d25..8d5a5811fa 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/prune.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "log" @@ -7,7 +7,7 @@ import ( var removeFile = os.Remove -func (c *coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { +func (c *Coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { if len(c.closedFiles) == 0 { return 0 } @@ -36,7 +36,7 @@ func (c *coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { return prunedCount } -func (c *coordinator) shouldPruneClosedFile(f closedFile, pruneBeforeBlock uint64) bool { +func (c *Coordinator) shouldPruneClosedFile(f closedFile, pruneBeforeBlock uint64) bool { fileEndBlock := f.startBlock + c.config.MaxBlocksPerFile if fileEndBlock < f.startBlock { fileEndBlock = ^uint64(0) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune_test.go similarity index 67% rename from sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/prune_test.go index 650bd7be92..033a2e5940 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_prune_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune_test.go @@ -1,12 +1,10 @@ -package parquet_v2 +package coordinator import ( - "context" "errors" "math/big" "os" "path/filepath" - "strconv" "testing" "github.com/ethereum/go-ethereum/common" @@ -22,7 +20,7 @@ func TestPruneTickDeletesEligibleClosedFiles(t *testing.T) { require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reader.Close()) }) - coord := &coordinator{ + coord := &Coordinator{ config: parquet.StoreConfig{ KeepRecent: 4, MaxBlocksPerFile: 4, @@ -65,7 +63,7 @@ func TestPruneKeepsFilePairTrackedWhenDeleteFails(t *testing.T) { return os.Remove(path) } - coord := &coordinator{ + coord := &Coordinator{ config: parquet.StoreConfig{MaxBlocksPerFile: 4}, closedFiles: closedFiles, } @@ -76,35 +74,3 @@ func TestPruneKeepsFilePairTrackedWhenDeleteFails(t *testing.T) { require.FileExists(t, failPath) require.FileExists(t, filepath.Join(dir, "logs_0.parquet")) } - -func writeClosedFileSet(t *testing.T, dir string, starts ...uint64) []closedFile { - t.Helper() - - closed := make([]closedFile, 0, len(starts)) - for _, start := range starts { - block := start + 1 - writeReceiptFile(t, dir, start, []uint64{block}) - writeLogFile(t, dir, start) - closed = append(closed, closedFile{ - startBlock: start, - receiptPath: filepath.Join(dir, "receipts_"+strconv.FormatUint(start, 10)+".parquet"), - logPath: filepath.Join(dir, "logs_"+strconv.FormatUint(start, 10)+".parquet"), - }) - } - return closed -} - -func readClosedReceiptForTest(t *testing.T, coord *coordinator, txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { - t.Helper() - - resp := make(chan readReceiptResp, 1) - coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ - ctx: context.Background(), - txHash: txHash, - blockNumber: blockNumber, - resp: resp, - }) - result := <-resp - require.NoError(t, result.err) - return result.result -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go new file mode 100644 index 0000000000..0fc8dbe36b --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go @@ -0,0 +1,56 @@ +package coordinator + +import ( + "context" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/require" +) + +func TestReadByTxHashHitsTempCache(t *testing.T) { + txHash := common.HexToHash("0xabc") + coord := &Coordinator{ + tempWriteCache: map[common.Hash][]tempReceipt{ + txHash: { + {blockNumber: 10, writeOrdinal: 0, receiptBytes: []byte("first")}, + {blockNumber: 10, writeOrdinal: 1, receiptBytes: []byte("second")}, + {blockNumber: 11, writeOrdinal: 2, receiptBytes: []byte("third")}, + }, + }, + } + + resp := make(chan readReceiptResp, 1) + coord.handleReadByTxHash(readByTxHashReq{ + ctx: context.Background(), + txHash: txHash, + resp: resp, + }) + result := <-resp + require.NoError(t, result.err) + require.Equal(t, uint64(10), result.result.BlockNumber) + require.Equal(t, []byte("first"), result.result.ReceiptBytes) + + resp = make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: 11, + resp: resp, + }) + result = <-resp + require.NoError(t, result.err) + require.Equal(t, uint64(11), result.result.BlockNumber) + require.Equal(t, []byte("third"), result.result.ReceiptBytes) + + resp = make(chan readReceiptResp, 1) + coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ + ctx: context.Background(), + txHash: txHash, + blockNumber: 10, + resp: resp, + }) + result = <-resp + require.NoError(t, result.err) + require.Equal(t, []byte("first"), result.result.ReceiptBytes) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/reader.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go similarity index 99% rename from sei-db/ledger_db/receipt/parquet_v2/reader.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go index fc181b10c1..581c5297c8 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "context" diff --git a/sei-db/ledger_db/receipt/parquet_v2/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go similarity index 99% rename from sei-db/ledger_db/receipt/parquet_v2/requests.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 16cd3d30e2..9bb6e79af1 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "context" diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go new file mode 100644 index 0000000000..c8733ef1f8 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go @@ -0,0 +1,121 @@ +package coordinator + +import ( + "math/big" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestRotationBoundaryPrimitives(t *testing.T) { + coord := &Coordinator{ + config: parquet.StoreConfig{MaxBlocksPerFile: 500}, + } + + resp := make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 0, resp: resp}) + require.True(t, <-resp) + + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) + require.True(t, <-resp) + + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 501, resp: resp}) + require.False(t, <-resp) + + coord.config.MaxBlocksPerFile = 0 + resp = make(chan bool, 1) + coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) + require.False(t, <-resp) +} + +func TestAlignedFileStartBlock(t *testing.T) { + require.Equal(t, uint64(5000), alignedFileStartBlock(5234, 500)) + require.Equal(t, uint64(5000), alignedFileStartBlock(5000, 500)) + require.Equal(t, uint64(0), alignedFileStartBlock(499, 500)) + require.Equal(t, uint64(5234), alignedFileStartBlock(5234, 0)) +} + +func TestWriteRotatesAtAlignedBoundary(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + for block := uint64(1); block <= 4; block++ { + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), + })) + } + + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "logs_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "logs_4.parquet")) + + require.Len(t, wal.truncatedBefore, 1) + require.Equal(t, uint64(4), wal.truncatedBefore[0]) + require.Len(t, coord.tempWriteCache, 1) + require.Contains(t, coord.tempWriteCache, common.BigToHash(big.NewInt(4))) +} + +func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { + txHash := common.HexToHash("0xabc") + coord := &Coordinator{ + tempWriteCache: map[common.Hash][]tempReceipt{ + txHash: { + {blockNumber: 1, writeOrdinal: 0}, + {blockNumber: 4, writeOrdinal: 1}, + }, + common.HexToHash("0xdef"): { + {blockNumber: 2, writeOrdinal: 2}, + }, + }, + } + + coord.dropTempCacheBefore(4) + + require.Len(t, coord.tempWriteCache, 1) + require.Len(t, coord.tempWriteCache[txHash], 1) + require.Equal(t, uint64(4), coord.tempWriteCache[txHash][0].blockNumber) +} + +func TestObserveEmptyBlockHonorsMonotonicLastSeen(t *testing.T) { + coord := newWriteCoordinator(t, &recordingWAL{}) + + require.NoError(t, coord.observeEmptyBlock(5)) + require.Equal(t, uint64(5), coord.lastSeenBlock) + + require.NoError(t, coord.observeEmptyBlock(4)) + require.Equal(t, uint64(5), coord.lastSeenBlock) + require.Empty(t, coord.closedFiles) +} + +func TestObserveEmptyBlockRotatesAtBoundary(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.MaxBlocksPerFile = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + })) + require.NotNil(t, coord.receiptWriter) + + require.NoError(t, coord.observeEmptyBlock(4)) + + require.Equal(t, uint64(4), coord.lastSeenBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.Empty(t, coord.tempWriteCache) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go new file mode 100644 index 0000000000..a5131eb9ad --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go @@ -0,0 +1,46 @@ +package coordinator + +import ( + "errors" + "fmt" + "math" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +) + +// ErrStoreClosed is returned when a request is made after the coordinator has +// stopped accepting work. +var ErrStoreClosed = errors.New("store closed") + +type tempReceipt struct { + blockNumber uint64 + writeOrdinal uint64 + receiptBytes []byte +} + +type ReplayedBlock struct { + BlockNumber uint64 + TxHashes []common.Hash +} + +type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) + +type ReplayReceipt struct { + Input parquet.ReceiptInput + TxHash common.Hash + Warmup parquet.ReceiptRecord + LogCount uint +} + +type ReplayResult struct { + WarmupRecords []parquet.ReceiptRecord + Blocks []ReplayedBlock +} + +func int64FromUint64(value uint64) (int64, error) { + if value > uint64(math.MaxInt64) { + return 0, fmt.Errorf("value %d overflows int64", value) + } + return int64(value), nil +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/wal.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go similarity index 95% rename from sei-db/ledger_db/receipt/parquet_v2/wal.go rename to sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go index 16fb632b64..5ae8b1d131 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go @@ -1,4 +1,4 @@ -package parquet_v2 +package coordinator import ( "fmt" @@ -8,7 +8,7 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) -func (c *coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { +func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { if converter == nil { return ReplayResult{}, fmt.Errorf("WAL receipt converter is nil") } @@ -113,7 +113,7 @@ func (c *coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er return result, nil } -func (c *coordinator) applyReceiptFromReplay(input parquet.ReceiptInput) error { +func (c *Coordinator) applyReceiptFromReplay(input parquet.ReceiptInput) error { if c.receiptWriter != nil && input.BlockNumber != c.lastSeenBlock && c.isRotationBoundary(input.BlockNumber) { if err := c.rotateOpenFileWithoutWAL(input.BlockNumber); err != nil { return err @@ -147,7 +147,7 @@ func copyReceiptRecord(record parquet.ReceiptRecord) parquet.ReceiptRecord { } } -func (c *coordinator) clearWALPreservingLast() error { +func (c *Coordinator) clearWALPreservingLast() error { if c.wal == nil { return nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go new file mode 100644 index 0000000000..520844f029 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go @@ -0,0 +1,74 @@ +package coordinator + +import ( + "math/big" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestReplayWALAppliesReceiptsAndPreservesDuplicateHashes(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{7, 1}, {7, 2}}}, + parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{8, 1}}}, + ) + coord := newReplayCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + result, err := coord.replayWAL(replayConverterForTest) + require.NoError(t, err) + + duplicateHash := common.BigToHash(new(big.Int).SetUint64(7)) + require.Len(t, result.WarmupRecords, 3) + require.Len(t, result.Blocks, 2) + require.Equal(t, uint64(1), result.Blocks[0].BlockNumber) + require.Equal(t, []common.Hash{duplicateHash, duplicateHash}, result.Blocks[0].TxHashes) + require.Len(t, coord.tempWriteCache[duplicateHash], 2) + require.Equal(t, int64(2), coord.latestVersion) + require.Empty(t, wal.truncatedBefore) +} + +func TestReplayWALSkipsEntriesBeforeFileStartAndTruncates(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{2}}}, + parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, + ) + coord := newReplayCoordinator(t, wal) + coord.fileStartBlock = 4 + defer func() { require.NoError(t, coord.closeWriters()) }() + + result, err := coord.replayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { + require.NotEqual(t, uint64(2), blockNumber) + return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) + }) + require.NoError(t, err) + + require.Len(t, result.WarmupRecords, 1) + require.Equal(t, uint64(4), result.WarmupRecords[0].BlockNumber) + require.Equal(t, []uint64{2}, wal.truncatedBefore) + require.Equal(t, int64(4), coord.latestVersion) +} + +func TestReplayWALRotatesBoundaryWithoutClearingWAL(t *testing.T) { + wal := replayWALWithEntries(t, + parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{1}}}, + parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, + ) + coord := newReplayCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + _, err := coord.replayWAL(replayConverterForTest) + require.NoError(t, err) + + require.Len(t, coord.closedFiles, 1) + require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) + require.Equal(t, uint64(4), coord.fileStartBlock) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) + require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) + require.Equal(t, []uint64{2}, wal.truncatedBefore) + require.Len(t, coord.tempWriteCache, 1) + require.Contains(t, coord.tempWriteCache, common.BigToHash(new(big.Int).SetUint64(4))) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go new file mode 100644 index 0000000000..0d5adfb332 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go @@ -0,0 +1,64 @@ +package coordinator + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func TestWriteReceiptsGroupsWALByBlockEncounterOrder(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(2, common.HexToHash("0x22")), + testReceiptInput(1, common.HexToHash("0x11")), + testReceiptInput(2, common.HexToHash("0x23")), + })) + + require.Len(t, wal.entries, 2) + require.Equal(t, uint64(2), wal.entries[0].BlockNumber) + require.Len(t, wal.entries[0].Receipts, 2) + require.Equal(t, uint64(1), wal.entries[1].BlockNumber) + require.Len(t, wal.entries[1].Receipts, 1) +} + +func TestWriteReceiptsKeepsDuplicateHashCacheEntries(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + defer func() { require.NoError(t, coord.closeWriters()) }() + + txHash := common.HexToHash("0xabc") + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, txHash), + testReceiptInput(2, txHash), + })) + + require.Len(t, coord.receiptsBuffer, 2) + require.Equal(t, int64(2), coord.latestVersion) + require.Len(t, coord.tempWriteCache[txHash], 2) + require.Equal(t, uint64(1), coord.tempWriteCache[txHash][0].blockNumber) + require.Equal(t, uint64(0), coord.tempWriteCache[txHash][0].writeOrdinal) + require.Equal(t, uint64(2), coord.tempWriteCache[txHash][1].blockNumber) + require.Equal(t, uint64(1), coord.tempWriteCache[txHash][1].writeOrdinal) +} + +func TestWriteReceiptsFlushesAtConfiguredBlockInterval(t *testing.T) { + wal := &recordingWAL{} + coord := newWriteCoordinator(t, wal) + coord.config.BlockFlushInterval = 1 + defer func() { require.NoError(t, coord.closeWriters()) }() + + require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + testReceiptInput(2, common.HexToHash("0x2")), + })) + + require.Empty(t, coord.receiptsBuffer) + require.Empty(t, coord.logsBuffer) + require.Zero(t, coord.blocksSinceFlush) + require.Equal(t, int64(2), coord.latestVersion) +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/export_test.go b/sei-db/ledger_db/receipt/parquet_v2/export_test.go deleted file mode 100644 index db3a88b823..0000000000 --- a/sei-db/ledger_db/receipt/parquet_v2/export_test.go +++ /dev/null @@ -1,5 +0,0 @@ -package parquet_v2 - -func forcePruneTickForTest(c *coordinator) { - c.handlePruneTick() -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go b/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go new file mode 100644 index 0000000000..1eaba96888 --- /dev/null +++ b/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go @@ -0,0 +1,93 @@ +package parquet_v2 + +import ( + "fmt" + "math/big" + "os" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + parquetgo "github.com/parquet-go/parquet-go" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/stretchr/testify/require" +) + +func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInput { + receiptBytes := []byte{byte(blockNumber), txHash[31]} + return parquet.ReceiptInput{ + BlockNumber: blockNumber, + Receipt: parquet.ReceiptRecord{ + TxHash: txHash[:], + BlockNumber: blockNumber, + ReceiptBytes: receiptBytes, + }, + Logs: []parquet.LogRecord{{ + BlockNumber: blockNumber, + TxHash: txHash[:], + Address: common.BigToAddress(new(big.Int).SetUint64(blockNumber)).Bytes(), + }}, + ReceiptBytes: receiptBytes, + } +} + +func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { + txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) + input := testReceiptInput(blockNumber, txHash) + input.ReceiptBytes = append([]byte(nil), receiptBytes...) + input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) + + return ReplayReceipt{ + Input: input, + TxHash: txHash, + Warmup: input.Receipt, + LogCount: uint(len(input.Logs)), + }, nil +} + +func writeReceiptFile(t *testing.T, dir string, startBlock uint64, blocks []uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("receipts_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.ReceiptRecord](f) + for _, block := range blocks { + txHash := common.BigToHash(new(big.Int).SetUint64(block)) + _, err := w.Write([]parquet.ReceiptRecord{{ + TxHash: txHash[:], + BlockNumber: block, + ReceiptBytes: []byte{byte(block)}, + }}) + require.NoError(t, err) + } + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} + +func writeLogFile(t *testing.T, dir string, startBlock uint64) { + t.Helper() + + path := filepath.Join(dir, fmt.Sprintf("logs_%d.parquet", startBlock)) + f, err := os.Create(path) + require.NoError(t, err) + + w := parquetgo.NewGenericWriter[parquet.LogRecord](f) + txHash := common.BigToHash(new(big.Int).SetUint64(startBlock)) + _, err = w.Write([]parquet.LogRecord{{ + BlockNumber: startBlock, + TxHash: txHash[:], + }}) + require.NoError(t, err) + require.NoError(t, w.Close()) + require.NoError(t, f.Close()) +} + +func logBlockNumbers(results []parquet.LogResult) []uint64 { + blocks := make([]uint64, 0, len(results)) + for _, result := range results { + blocks = append(blocks, result.BlockNumber) + } + return blocks +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index 85fdf9cf4c..8016708356 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -2,150 +2,99 @@ package parquet_v2 import ( "context" - "fmt" - "os" - "path/filepath" - "sync" - "time" "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" + "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2/coordinator" ) -// Store is the V2 parquet receipt store facade. In the finished implementation -// it will hold only channels into the coordinator goroutine. +// Store is the public facade of the V2 parquet receipt store. It wraps a +// coordinator.Coordinator and forwards all calls to it. type Store struct { - requests chan coordRequest - done chan struct{} - closeOnce sync.Once + coord *coordinator.Coordinator } -// NewStore creates a V2 store with a live coordinator goroutine and -// stubbed request handlers. +// NewStore creates a V2 store backed by a live coordinator goroutine. func NewStore(cfg parquet.StoreConfig) (*Store, error) { - storeCfg := resolveStoreConfig(cfg) - - if err := os.MkdirAll(storeCfg.DBDirectory, 0o750); err != nil { - return nil, fmt.Errorf("failed to create parquet base directory: %w", err) - } - - requests := make(chan coordRequest) - done := make(chan struct{}) - reader, err := NewReaderWithMaxBlocksPerFile(cfg.DBDirectory, storeCfg.MaxBlocksPerFile) - if err != nil { - return nil, err - } - cleanupReader := true - defer func() { - if cleanupReader { - _ = reader.Close() - } - }() - - walDir := filepath.Join(storeCfg.DBDirectory, "parquet-wal") - receiptWAL, err := parquet.NewWAL(walDir) - if err != nil { - return nil, err - } - cleanupWAL := true - defer func() { - if cleanupWAL { - _ = receiptWAL.Close() - } - }() - - closedFiles, err := scanClosedFiles(storeCfg.DBDirectory, reader) + c, err := coordinator.New(cfg) if err != nil { return nil, err } + return &Store{coord: c}, nil +} - c := &coordinator{ - requests: requests, - done: done, - config: storeCfg, - basePath: cfg.DBDirectory, - closedFiles: closedFiles, - receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), - logsBuffer: make([]parquet.LogRecord, 0, 10000), - tempWriteCache: make(map[common.Hash][]tempReceipt), - reader: reader, - wal: receiptWAL, - latestVersion: 0, - earliestVersion: 0, - } +func (s *Store) WriteReceipts(inputs []parquet.ReceiptInput) error { + return s.coord.WriteReceipts(inputs) +} - receiptFiles := make([]string, 0, len(closedFiles)) - for _, f := range closedFiles { - receiptFiles = append(receiptFiles, f.receiptPath) - } - if maxBlock, ok, err := reader.MaxReceiptBlockNumber(context.Background(), receiptFiles); err != nil { - return nil, err - } else if ok { - latest, err := int64FromUint64(maxBlock) - if err != nil { - return nil, err - } - c.latestVersion = latest - if maxBlock < ^uint64(0) { - c.fileStartBlock = maxBlock + 1 - } - } +func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { + return s.coord.GetReceiptByTxHash(ctx, txHash) +} - if storeCfg.KeepRecent > 0 && storeCfg.PruneIntervalSeconds > 0 { - c.pruneTicker = time.NewTicker(time.Duration(storeCfg.PruneIntervalSeconds) * time.Second) - c.pruneTick = c.pruneTicker.C - } +func (s *Store) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { + return s.coord.GetReceiptByTxHashInBlock(ctx, txHash, blockNumber) +} - s := &Store{ - requests: requests, - done: done, - } +func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { + return s.coord.GetLogs(ctx, filter) +} - go c.run() - cleanupReader = false - cleanupWAL = false +func (s *Store) ObserveEmptyBlock(height uint64) error { + return s.coord.ObserveEmptyBlock(height) +} - return s, nil +func (s *Store) IsRotationBoundary(blockNumber uint64) bool { + return s.coord.IsRotationBoundary(blockNumber) } -func resolveStoreConfig(cfg parquet.StoreConfig) parquet.StoreConfig { - resolved := parquet.DefaultStoreConfig() - resolved.DBDirectory = cfg.DBDirectory - resolved.KeepRecent = cfg.KeepRecent - resolved.PruneIntervalSeconds = cfg.PruneIntervalSeconds - if cfg.TxIndexBackend != "" { - resolved.TxIndexBackend = cfg.TxIndexBackend - } - if cfg.BlockFlushInterval > 0 { - resolved.BlockFlushInterval = cfg.BlockFlushInterval - } - if cfg.MaxBlocksPerFile > 0 { - resolved.MaxBlocksPerFile = cfg.MaxBlocksPerFile - } - return resolved +func (s *Store) FileStartBlock() uint64 { + return s.coord.FileStartBlock() } -func awaitResponse[T any](s *Store, req coordRequest, resp <-chan T) (T, error) { - var zero T +func (s *Store) LatestVersion() int64 { + return s.coord.LatestVersion() +} - select { - case s.requests <- req: - case <-s.done: - return zero, ErrStoreClosed - } +func (s *Store) SetLatestVersion(version int64) { + s.coord.SetLatestVersion(version) +} - select { - case r := <-resp: - return r, nil - case <-s.done: - return zero, ErrStoreClosed - } +func (s *Store) SetEarliestVersion(version int64) { + s.coord.SetEarliestVersion(version) } -func awaitError(s *Store, req coordRequest, resp <-chan error) error { - err, waitErr := awaitResponse(s, req, resp) - if waitErr != nil { - return waitErr - } - return err +func (s *Store) UpdateLatestVersion(version int64) { + s.coord.UpdateLatestVersion(version) +} + +func (s *Store) CacheRotateInterval() uint64 { + return s.coord.CacheRotateInterval() +} + +func (s *Store) Flush() error { + return s.coord.Flush() +} + +func (s *Store) Close() error { + return s.coord.Close() +} + +func (s *Store) SimulateCrash() { + s.coord.SimulateCrash() +} + +func (s *Store) SetBlockFlushInterval(interval uint64) { + s.coord.SetBlockFlushInterval(interval) +} + +func (s *Store) SetMaxBlocksPerFile(n uint64) { + s.coord.SetMaxBlocksPerFile(n) +} + +func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { + s.coord.SetFaultHooks(hooks) +} + +func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { + return s.coord.ReplayWAL(converter) } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index 1a3668ab54..c97f4689be 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -2,9 +2,7 @@ package parquet_v2 import ( "testing" - "time" - "github.com/ethereum/go-ethereum/common" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" "github.com/stretchr/testify/require" ) @@ -23,7 +21,6 @@ func newDispatchStore(t *testing.T) *Store { func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { store := newDispatchStore(t) - require.Zero(t, cap(store.requests)) require.Equal(t, uint64(0), store.FileStartBlock()) require.Equal(t, int64(0), store.LatestVersion()) @@ -50,67 +47,6 @@ func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { require.False(t, store.IsRotationBoundary(8)) } -func TestSetMaxBlocksPerFileUpdatesReaderState(t *testing.T) { - reader, err := NewReaderWithMaxBlocksPerFile(t.TempDir(), 10) - require.NoError(t, err) - t.Cleanup(func() { _ = reader.Close() }) - - resp := make(chan error, 1) - coord := &coordinator{ - config: parquet.StoreConfig{ - MaxBlocksPerFile: 10, - }, - reader: reader, - } - - coord.handleSetMaxBlocksPerFile(setMaxBlocksPerFileReq{ - maxBlocksPerFile: 3, - resp: resp, - }) - - require.NoError(t, <-resp) - require.Equal(t, uint64(3), coord.config.MaxBlocksPerFile) - require.Equal(t, uint64(3), reader.maxBlocksPerFile) -} - -func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { - requests := make(chan coordRequest) - done := make(chan struct{}) - coord := &coordinator{ - requests: requests, - done: done, - } - store := &Store{ - requests: requests, - done: done, - } - go coord.run() - - require.Zero(t, cap(store.requests)) - - firstResp := make(chan writeResp) - store.requests <- writeReq{ - inputs: []parquet.ReceiptInput{testReceiptInput(1, common.HexToHash("0x1"))}, - resp: firstResp, - } - time.Sleep(10 * time.Millisecond) - - secondDone := make(chan error, 1) - go func() { - secondDone <- store.Flush() - }() - - select { - case err := <-secondDone: - t.Fatalf("second request completed before first unblocked: %v", err) - case <-time.After(25 * time.Millisecond): - } - - require.Error(t, (<-firstResp).err) - require.NoError(t, <-secondDone) - require.NoError(t, store.Close()) -} - func TestCloseStopsFutureRequests(t *testing.T) { store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) require.NoError(t, err) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go index f4c012ba2a..9ab401f309 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go @@ -1,14 +1,10 @@ package parquet_v2 import ( - "fmt" - "math/big" "os" "path/filepath" "testing" - "github.com/ethereum/go-ethereum/common" - parquetgo "github.com/parquet-go/parquet-go" "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" "github.com/stretchr/testify/require" ) @@ -111,61 +107,3 @@ func TestNewStoreIgnoresUnmatchedFiles(t *testing.T) { require.Equal(t, uint64(0), store.FileStartBlock()) require.NoError(t, store.Close()) } - -func TestScanClosedFilesSortsByStartBlock(t *testing.T) { - dir := t.TempDir() - for _, startBlock := range []uint64{1000, 0, 500} { - writeReceiptFile(t, dir, startBlock, []uint64{startBlock + 1}) - writeLogFile(t, dir, startBlock) - } - - reader, err := NewReaderWithMaxBlocksPerFile(dir, 500) - require.NoError(t, err) - t.Cleanup(func() { _ = reader.Close() }) - - closedFiles, err := scanClosedFiles(dir, reader) - require.NoError(t, err) - require.Len(t, closedFiles, 3) - require.Equal(t, uint64(0), closedFiles[0].startBlock) - require.Equal(t, uint64(500), closedFiles[1].startBlock) - require.Equal(t, uint64(1000), closedFiles[2].startBlock) -} - -func writeReceiptFile(t *testing.T, dir string, startBlock uint64, blocks []uint64) { - t.Helper() - - path := filepath.Join(dir, fmt.Sprintf("receipts_%d.parquet", startBlock)) - f, err := os.Create(path) - require.NoError(t, err) - - w := parquetgo.NewGenericWriter[parquet.ReceiptRecord](f) - for _, block := range blocks { - txHash := common.BigToHash(new(big.Int).SetUint64(block)) - _, err := w.Write([]parquet.ReceiptRecord{{ - TxHash: txHash[:], - BlockNumber: block, - ReceiptBytes: []byte{byte(block)}, - }}) - require.NoError(t, err) - } - require.NoError(t, w.Close()) - require.NoError(t, f.Close()) -} - -func writeLogFile(t *testing.T, dir string, startBlock uint64) { - t.Helper() - - path := filepath.Join(dir, fmt.Sprintf("logs_%d.parquet", startBlock)) - f, err := os.Create(path) - require.NoError(t, err) - - w := parquetgo.NewGenericWriter[parquet.LogRecord](f) - txHash := common.BigToHash(new(big.Int).SetUint64(startBlock)) - _, err = w.Write([]parquet.LogRecord{{ - BlockNumber: startBlock, - TxHash: txHash[:], - }}) - require.NoError(t, err) - require.NoError(t, w.Close()) - require.NoError(t, f.Close()) -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go index 8106c8f995..f815ab4f46 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go @@ -10,53 +10,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestReadByTxHashHitsTempCache(t *testing.T) { - txHash := common.HexToHash("0xabc") - coord := &coordinator{ - tempWriteCache: map[common.Hash][]tempReceipt{ - txHash: { - {blockNumber: 10, writeOrdinal: 0, receiptBytes: []byte("first")}, - {blockNumber: 10, writeOrdinal: 1, receiptBytes: []byte("second")}, - {blockNumber: 11, writeOrdinal: 2, receiptBytes: []byte("third")}, - }, - }, - } - - resp := make(chan readReceiptResp, 1) - coord.handleReadByTxHash(readByTxHashReq{ - ctx: context.Background(), - txHash: txHash, - resp: resp, - }) - result := <-resp - require.NoError(t, result.err) - require.Equal(t, uint64(10), result.result.BlockNumber) - require.Equal(t, []byte("first"), result.result.ReceiptBytes) - - resp = make(chan readReceiptResp, 1) - coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ - ctx: context.Background(), - txHash: txHash, - blockNumber: 11, - resp: resp, - }) - result = <-resp - require.NoError(t, result.err) - require.Equal(t, uint64(11), result.result.BlockNumber) - require.Equal(t, []byte("third"), result.result.ReceiptBytes) - - resp = make(chan readReceiptResp, 1) - coord.handleReadByTxHashInBlock(readByTxHashInBlockReq{ - ctx: context.Background(), - txHash: txHash, - blockNumber: 10, - resp: resp, - }) - result = <-resp - require.NoError(t, result.err) - require.Equal(t, []byte("first"), result.result.ReceiptBytes) -} - func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { ctx := context.Background() dir := t.TempDir() @@ -166,11 +119,3 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { require.Len(t, results, 1) require.Equal(t, uint64(5), results[0].BlockNumber) } - -func logBlockNumbers(results []parquet.LogResult) []uint64 { - blocks := make([]uint64, 0, len(results)) - for _, result := range results { - blocks = append(blocks, result.BlockNumber) - } - return blocks -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index 41e4c2d4c6..00fc9058dd 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -1,7 +1,6 @@ package parquet_v2 import ( - "math/big" "os" "path/filepath" "testing" @@ -11,36 +10,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestRotationBoundaryPrimitives(t *testing.T) { - coord := &coordinator{ - config: parquet.StoreConfig{MaxBlocksPerFile: 500}, - } - - resp := make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 0, resp: resp}) - require.True(t, <-resp) - - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) - require.True(t, <-resp) - - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 501, resp: resp}) - require.False(t, <-resp) - - coord.config.MaxBlocksPerFile = 0 - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) - require.False(t, <-resp) -} - -func TestAlignedFileStartBlock(t *testing.T) { - require.Equal(t, uint64(5000), alignedFileStartBlock(5234, 500)) - require.Equal(t, uint64(5000), alignedFileStartBlock(5000, 500)) - require.Equal(t, uint64(0), alignedFileStartBlock(499, 500)) - require.Equal(t, uint64(5234), alignedFileStartBlock(5234, 0)) -} - func TestLazyInitUsesAlignedStartForFirstOffBoundaryWrite(t *testing.T) { dir := t.TempDir() store, err := NewStore(parquet.StoreConfig{ @@ -104,83 +73,3 @@ func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { require.FileExists(t, filepath.Join(dir, "receipts_20.parquet")) require.FileExists(t, filepath.Join(dir, "logs_20.parquet")) } - -func TestWriteRotatesAtAlignedBoundary(t *testing.T) { - wal := &recordingWAL{} - coord := newWriteCoordinator(t, wal) - coord.config.MaxBlocksPerFile = 4 - defer func() { require.NoError(t, coord.closeWriters()) }() - - for block := uint64(1); block <= 4; block++ { - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ - testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), - })) - } - - require.Len(t, coord.closedFiles, 1) - require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) - require.Equal(t, uint64(4), coord.fileStartBlock) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) - require.FileExists(t, filepath.Join(coord.basePath, "logs_0.parquet")) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) - require.FileExists(t, filepath.Join(coord.basePath, "logs_4.parquet")) - - require.Len(t, wal.truncatedBefore, 1) - require.Equal(t, uint64(4), wal.truncatedBefore[0]) - require.Len(t, coord.tempWriteCache, 1) - require.Contains(t, coord.tempWriteCache, common.BigToHash(big.NewInt(4))) -} - -func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { - txHash := common.HexToHash("0xabc") - coord := &coordinator{ - tempWriteCache: map[common.Hash][]tempReceipt{ - txHash: { - {blockNumber: 1, writeOrdinal: 0}, - {blockNumber: 4, writeOrdinal: 1}, - }, - common.HexToHash("0xdef"): { - {blockNumber: 2, writeOrdinal: 2}, - }, - }, - } - - coord.dropTempCacheBefore(4) - - require.Len(t, coord.tempWriteCache, 1) - require.Len(t, coord.tempWriteCache[txHash], 1) - require.Equal(t, uint64(4), coord.tempWriteCache[txHash][0].blockNumber) -} - -func TestObserveEmptyBlockHonorsMonotonicLastSeen(t *testing.T) { - coord := newWriteCoordinator(t, &recordingWAL{}) - - require.NoError(t, coord.observeEmptyBlock(5)) - require.Equal(t, uint64(5), coord.lastSeenBlock) - - require.NoError(t, coord.observeEmptyBlock(4)) - require.Equal(t, uint64(5), coord.lastSeenBlock) - require.Empty(t, coord.closedFiles) -} - -func TestObserveEmptyBlockRotatesAtBoundary(t *testing.T) { - wal := &recordingWAL{} - coord := newWriteCoordinator(t, wal) - coord.config.MaxBlocksPerFile = 4 - defer func() { require.NoError(t, coord.closeWriters()) }() - - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ - testReceiptInput(1, common.HexToHash("0x1")), - })) - require.NotNil(t, coord.receiptWriter) - - require.NoError(t, coord.observeEmptyBlock(4)) - - require.Equal(t, uint64(4), coord.lastSeenBlock) - require.Equal(t, uint64(4), coord.fileStartBlock) - require.Len(t, coord.closedFiles, 1) - require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) - require.Empty(t, coord.tempWriteCache) -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go index 02254f142d..3fb5b06dad 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go @@ -2,8 +2,6 @@ package parquet_v2 import ( "context" - "math/big" - "path/filepath" "testing" "github.com/ethereum/go-ethereum/common" @@ -11,69 +9,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestReplayWALAppliesReceiptsAndPreservesDuplicateHashes(t *testing.T) { - wal := replayWALWithEntries(t, - parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{7, 1}, {7, 2}}}, - parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{8, 1}}}, - ) - coord := newReplayCoordinator(t, wal) - defer func() { require.NoError(t, coord.closeWriters()) }() - - result, err := coord.replayWAL(replayConverterForTest) - require.NoError(t, err) - - duplicateHash := common.BigToHash(new(big.Int).SetUint64(7)) - require.Len(t, result.WarmupRecords, 3) - require.Len(t, result.Blocks, 2) - require.Equal(t, uint64(1), result.Blocks[0].BlockNumber) - require.Equal(t, []common.Hash{duplicateHash, duplicateHash}, result.Blocks[0].TxHashes) - require.Len(t, coord.tempWriteCache[duplicateHash], 2) - require.Equal(t, int64(2), coord.latestVersion) - require.Empty(t, wal.truncatedBefore) -} - -func TestReplayWALSkipsEntriesBeforeFileStartAndTruncates(t *testing.T) { - wal := replayWALWithEntries(t, - parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{2}}}, - parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, - ) - coord := newReplayCoordinator(t, wal) - coord.fileStartBlock = 4 - defer func() { require.NoError(t, coord.closeWriters()) }() - - result, err := coord.replayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { - require.NotEqual(t, uint64(2), blockNumber) - return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) - }) - require.NoError(t, err) - - require.Len(t, result.WarmupRecords, 1) - require.Equal(t, uint64(4), result.WarmupRecords[0].BlockNumber) - require.Equal(t, []uint64{2}, wal.truncatedBefore) - require.Equal(t, int64(4), coord.latestVersion) -} - -func TestReplayWALRotatesBoundaryWithoutClearingWAL(t *testing.T) { - wal := replayWALWithEntries(t, - parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{1}}}, - parquet.WALEntry{BlockNumber: 4, Receipts: [][]byte{{4}}}, - ) - coord := newReplayCoordinator(t, wal) - defer func() { require.NoError(t, coord.closeWriters()) }() - - _, err := coord.replayWAL(replayConverterForTest) - require.NoError(t, err) - - require.Len(t, coord.closedFiles, 1) - require.Equal(t, uint64(0), coord.closedFiles[0].startBlock) - require.Equal(t, uint64(4), coord.fileStartBlock) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_0.parquet")) - require.FileExists(t, filepath.Join(coord.basePath, "receipts_4.parquet")) - require.Equal(t, []uint64{2}, wal.truncatedBefore) - require.Len(t, coord.tempWriteCache, 1) - require.Contains(t, coord.tempWriteCache, common.BigToHash(new(big.Int).SetUint64(4))) -} - func TestReplayWALRequiresConverter(t *testing.T) { store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) require.NoError(t, err) @@ -83,38 +18,6 @@ func TestReplayWALRequiresConverter(t *testing.T) { require.ErrorContains(t, err, "converter is nil") } -func replayWALWithEntries(t *testing.T, entries ...parquet.WALEntry) *recordingWAL { - t.Helper() - - wal := &recordingWAL{} - for _, entry := range entries { - require.NoError(t, wal.Write(entry)) - } - return wal -} - -func newReplayCoordinator(t *testing.T, wal *recordingWAL) *coordinator { - t.Helper() - - coord := newWriteCoordinator(t, wal) - coord.config.MaxBlocksPerFile = 4 - return coord -} - -func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { - txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) - input := testReceiptInput(blockNumber, txHash) - input.ReceiptBytes = append([]byte(nil), receiptBytes...) - input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) - - return ReplayReceipt{ - Input: input, - TxHash: txHash, - Warmup: input.Receipt, - LogCount: uint(len(input.Logs)), - }, nil -} - func TestReplayWALPublicDispatch(t *testing.T) { store := newDispatchStore(t) _, err := store.ReplayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index df4a4945f6..1badaccbd0 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -1,7 +1,6 @@ package parquet_v2 import ( - "math/big" "testing" "github.com/ethereum/go-ethereum/common" @@ -38,149 +37,3 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { require.Equal(t, uint64(4), reopened.FileStartBlock()) require.NoError(t, reopened.Close()) } - -func TestWriteReceiptsGroupsWALByBlockEncounterOrder(t *testing.T) { - wal := &recordingWAL{} - coord := newWriteCoordinator(t, wal) - defer func() { require.NoError(t, coord.closeWriters()) }() - - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ - testReceiptInput(2, common.HexToHash("0x22")), - testReceiptInput(1, common.HexToHash("0x11")), - testReceiptInput(2, common.HexToHash("0x23")), - })) - - require.Len(t, wal.entries, 2) - require.Equal(t, uint64(2), wal.entries[0].BlockNumber) - require.Len(t, wal.entries[0].Receipts, 2) - require.Equal(t, uint64(1), wal.entries[1].BlockNumber) - require.Len(t, wal.entries[1].Receipts, 1) -} - -func TestWriteReceiptsKeepsDuplicateHashCacheEntries(t *testing.T) { - wal := &recordingWAL{} - coord := newWriteCoordinator(t, wal) - defer func() { require.NoError(t, coord.closeWriters()) }() - - txHash := common.HexToHash("0xabc") - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ - testReceiptInput(1, txHash), - testReceiptInput(2, txHash), - })) - - require.Len(t, coord.receiptsBuffer, 2) - require.Equal(t, int64(2), coord.latestVersion) - require.Len(t, coord.tempWriteCache[txHash], 2) - require.Equal(t, uint64(1), coord.tempWriteCache[txHash][0].blockNumber) - require.Equal(t, uint64(0), coord.tempWriteCache[txHash][0].writeOrdinal) - require.Equal(t, uint64(2), coord.tempWriteCache[txHash][1].blockNumber) - require.Equal(t, uint64(1), coord.tempWriteCache[txHash][1].writeOrdinal) -} - -func TestWriteReceiptsFlushesAtConfiguredBlockInterval(t *testing.T) { - wal := &recordingWAL{} - coord := newWriteCoordinator(t, wal) - coord.config.BlockFlushInterval = 1 - defer func() { require.NoError(t, coord.closeWriters()) }() - - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ - testReceiptInput(1, common.HexToHash("0x1")), - testReceiptInput(2, common.HexToHash("0x2")), - })) - - require.Empty(t, coord.receiptsBuffer) - require.Empty(t, coord.logsBuffer) - require.Zero(t, coord.blocksSinceFlush) - require.Equal(t, int64(2), coord.latestVersion) -} - -func newWriteCoordinator(t *testing.T, wal *recordingWAL) *coordinator { - t.Helper() - - cfg := parquet.DefaultStoreConfig() - cfg.DBDirectory = t.TempDir() - cfg.MaxBlocksPerFile = 500 - cfg.BlockFlushInterval = 0 - - return &coordinator{ - config: cfg, - basePath: cfg.DBDirectory, - receiptsBuffer: make([]parquet.ReceiptRecord, 0, 1000), - logsBuffer: make([]parquet.LogRecord, 0, 10000), - tempWriteCache: make(map[common.Hash][]tempReceipt), - wal: wal, - } -} - -func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInput { - receiptBytes := []byte{byte(blockNumber), txHash[31]} - return parquet.ReceiptInput{ - BlockNumber: blockNumber, - Receipt: parquet.ReceiptRecord{ - TxHash: txHash[:], - BlockNumber: blockNumber, - ReceiptBytes: receiptBytes, - }, - Logs: []parquet.LogRecord{{ - BlockNumber: blockNumber, - TxHash: txHash[:], - Address: common.BigToAddress(new(big.Int).SetUint64(blockNumber)).Bytes(), - }}, - ReceiptBytes: receiptBytes, - } -} - -type recordingWAL struct { - entries []parquet.WALEntry - firstOffset uint64 - lastOffset uint64 - truncatedBefore []uint64 -} - -func (w *recordingWAL) Write(entry parquet.WALEntry) error { - if w.firstOffset == 0 { - w.firstOffset = 1 - } - w.lastOffset++ - w.entries = append(w.entries, entry) - return nil -} - -func (w *recordingWAL) TruncateBefore(offset uint64) error { - w.truncatedBefore = append(w.truncatedBefore, offset) - return nil -} - -func (w *recordingWAL) TruncateAfter(uint64) error { return nil } - -func (w *recordingWAL) ReadAt(uint64) (parquet.WALEntry, error) { return parquet.WALEntry{}, nil } - -func (w *recordingWAL) FirstOffset() (uint64, error) { return w.firstOffset, nil } - -func (w *recordingWAL) LastOffset() (uint64, error) { return w.lastOffset, nil } - -func (w *recordingWAL) Replay(firstOffset, lastOffset uint64, fn func(uint64, parquet.WALEntry) error) error { - for i, entry := range w.entries { - offset := uint64(i) + 1 - if offset < firstOffset || offset > lastOffset { - continue - } - if err := fn(offset, entry); err != nil { - return err - } - } - return nil -} - -func (w *recordingWAL) Close() error { return nil } - -var _ interface { - Write(parquet.WALEntry) error - TruncateBefore(uint64) error - TruncateAfter(uint64) error - ReadAt(uint64) (parquet.WALEntry, error) - FirstOffset() (uint64, error) - LastOffset() (uint64, error) - Replay(uint64, uint64, func(uint64, parquet.WALEntry) error) error - Close() error -} = (*recordingWAL)(nil) diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index 667f579cc0..05dd8b7633 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -1,46 +1,12 @@ package parquet_v2 -import ( - "errors" - "fmt" - "math" +import "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2/coordinator" - "github.com/ethereum/go-ethereum/common" - "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" +type ( + ReplayResult = coordinator.ReplayResult + ReplayReceipt = coordinator.ReplayReceipt + ReplayedBlock = coordinator.ReplayedBlock + WALReceiptConverter = coordinator.WALReceiptConverter ) -// ErrStoreClosed is returned when a request is made after the coordinator has -// stopped accepting work. -var ErrStoreClosed = errors.New("store closed") - -type tempReceipt struct { - blockNumber uint64 - writeOrdinal uint64 - receiptBytes []byte -} - -type ReplayedBlock struct { - BlockNumber uint64 - TxHashes []common.Hash -} - -type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) - -type ReplayReceipt struct { - Input parquet.ReceiptInput - TxHash common.Hash - Warmup parquet.ReceiptRecord - LogCount uint -} - -type ReplayResult struct { - WarmupRecords []parquet.ReceiptRecord - Blocks []ReplayedBlock -} - -func int64FromUint64(value uint64) (int64, error) { - if value > uint64(math.MaxInt64) { - return 0, fmt.Errorf("value %d overflows int64", value) - } - return int64(value), nil -} +var ErrStoreClosed = coordinator.ErrStoreClosed From 6ff441274c02b0d2ebf4252b2bf0f547ef0d220a Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Wed, 29 Apr 2026 23:05:01 -0400 Subject: [PATCH 15/27] review fixes --- .../parquet_v2/coordinator/coordinator.go | 4 --- .../parquet_v2/coordinator/handlers.go | 2 -- .../receipt/parquet_v2/coordinator/prune.go | 10 ++++-- .../parquet_v2/coordinator/read_test.go | 6 ++-- .../receipt/parquet_v2/coordinator/reader.go | 4 --- .../parquet_v2/coordinator/rotation_test.go | 6 ++-- .../receipt/parquet_v2/coordinator/types.go | 1 - .../receipt/parquet_v2/coordinator/wal.go | 33 ++++++++++++------- .../parquet_v2/coordinator/write_test.go | 2 -- sei-db/ledger_db/receipt/parquet_v2/types.go | 1 - .../receipt/parquet_v2_receipt_store.go | 4 +-- 11 files changed, 35 insertions(+), 38 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index e365b20347..5c6537ec55 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -44,16 +44,12 @@ type Coordinator struct { logsBuffer []parquet.LogRecord lastSeenBlock uint64 blocksSinceFlush uint64 - nextWriteOrdinal uint64 tempWriteCache map[common.Hash][]tempReceipt latestVersion int64 earliestVersion int64 - replayedWarmup []parquet.ReceiptRecord - replayedBlocks []ReplayedBlock - faultHooks *parquet.FaultHooks wal dbwal.GenericWAL[parquet.WALEntry] diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index a158d1297c..600075c330 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -278,10 +278,8 @@ func (c *Coordinator) applyReceipt(input parquet.ReceiptInput) error { txHash := common.BytesToHash(input.Receipt.TxHash) c.tempWriteCache[txHash] = append(c.tempWriteCache[txHash], tempReceipt{ blockNumber: input.BlockNumber, - writeOrdinal: c.nextWriteOrdinal, receiptBytes: input.ReceiptBytes, }) - c.nextWriteOrdinal++ if c.config.BlockFlushInterval > 0 && c.blocksSinceFlush >= c.config.BlockFlushInterval { if err := c.flushOpenFile(); err != nil { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go index 8d5a5811fa..74a28b0696 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go @@ -1,11 +1,15 @@ package coordinator import ( - "log" "os" + + "github.com/sei-protocol/seilog" ) -var removeFile = os.Remove +var ( + removeFile = os.Remove + logger = seilog.NewLogger("db", "ledger-db", "parquet-v2") +) func (c *Coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { if len(c.closedFiles) == 0 { @@ -49,7 +53,7 @@ func removePrunedFile(path string) bool { return true } if err := removeFile(path); err != nil && !os.IsNotExist(err) { - log.Printf("failed to prune parquet file %s: %v", path, err) + logger.Error("failed to prune parquet file", "file", path, "err", err) return false } return true diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go index 0fc8dbe36b..772fdf37b8 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/read_test.go @@ -13,9 +13,9 @@ func TestReadByTxHashHitsTempCache(t *testing.T) { coord := &Coordinator{ tempWriteCache: map[common.Hash][]tempReceipt{ txHash: { - {blockNumber: 10, writeOrdinal: 0, receiptBytes: []byte("first")}, - {blockNumber: 10, writeOrdinal: 1, receiptBytes: []byte("second")}, - {blockNumber: 11, writeOrdinal: 2, receiptBytes: []byte("third")}, + {blockNumber: 10, receiptBytes: []byte("first")}, + {blockNumber: 10, receiptBytes: []byte("second")}, + {blockNumber: 11, receiptBytes: []byte("third")}, }, }, } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go index 581c5297c8..ab31f91dbf 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go @@ -21,10 +21,6 @@ type Reader struct { maxBlocksPerFile uint64 } -func NewReader(basePath string) (*Reader, error) { - return NewReaderWithMaxBlocksPerFile(basePath, parquet.DefaultStoreConfig().MaxBlocksPerFile) -} - func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*Reader, error) { if maxBlocksPerFile == 0 { maxBlocksPerFile = parquet.DefaultStoreConfig().MaxBlocksPerFile diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go index c8733ef1f8..5ceba74416 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go @@ -71,11 +71,11 @@ func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { coord := &Coordinator{ tempWriteCache: map[common.Hash][]tempReceipt{ txHash: { - {blockNumber: 1, writeOrdinal: 0}, - {blockNumber: 4, writeOrdinal: 1}, + {blockNumber: 1}, + {blockNumber: 4}, }, common.HexToHash("0xdef"): { - {blockNumber: 2, writeOrdinal: 2}, + {blockNumber: 2}, }, }, } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go index a5131eb9ad..0a087d2d98 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go @@ -15,7 +15,6 @@ var ErrStoreClosed = errors.New("store closed") type tempReceipt struct { blockNumber uint64 - writeOrdinal uint64 receiptBytes []byte } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go index 5ae8b1d131..af144845a1 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go @@ -16,12 +16,18 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er return ReplayResult{}, nil } - firstOffset, errFirst := c.wal.FirstOffset() - if errFirst != nil || firstOffset <= 0 { + firstOffset, err := c.wal.FirstOffset() + if err != nil { + return ReplayResult{}, fmt.Errorf("failed to read parquet WAL first offset: %w", err) + } + if firstOffset == 0 { return ReplayResult{}, nil } - lastOffset, errLast := c.wal.LastOffset() - if errLast != nil || lastOffset <= 0 { + lastOffset, err := c.wal.LastOffset() + if err != nil { + return ReplayResult{}, fmt.Errorf("failed to read parquet WAL last offset: %w", err) + } + if lastOffset == 0 { return ReplayResult{}, nil } @@ -36,7 +42,7 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er result := ReplayResult{} replayIdx := make(map[uint64]int) - err := c.wal.Replay(firstOffset, lastOffset, func(offset uint64, entry parquet.WALEntry) error { + err = c.wal.Replay(firstOffset, lastOffset, func(offset uint64, entry parquet.WALEntry) error { if len(entry.Receipts) == 0 { return nil } @@ -107,9 +113,6 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er if err := truncateReplayWAL(c.wal, dropOffset); err != nil { return ReplayResult{}, err } - - c.replayedWarmup = append(c.replayedWarmup[:0], result.WarmupRecords...) - c.replayedBlocks = append(c.replayedBlocks[:0], result.Blocks...) return result, nil } @@ -151,12 +154,18 @@ func (c *Coordinator) clearWALPreservingLast() error { if c.wal == nil { return nil } - firstOffset, errFirst := c.wal.FirstOffset() - if errFirst != nil || firstOffset <= 0 { + firstOffset, err := c.wal.FirstOffset() + if err != nil { + return fmt.Errorf("failed to read parquet WAL first offset: %w", err) + } + if firstOffset == 0 { return nil } - lastOffset, errLast := c.wal.LastOffset() - if errLast != nil || lastOffset <= 0 { + lastOffset, err := c.wal.LastOffset() + if err != nil { + return fmt.Errorf("failed to read parquet WAL last offset: %w", err) + } + if lastOffset == 0 { return nil } if lastOffset <= firstOffset { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go index 0d5adfb332..9c8759857f 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go @@ -41,9 +41,7 @@ func TestWriteReceiptsKeepsDuplicateHashCacheEntries(t *testing.T) { require.Equal(t, int64(2), coord.latestVersion) require.Len(t, coord.tempWriteCache[txHash], 2) require.Equal(t, uint64(1), coord.tempWriteCache[txHash][0].blockNumber) - require.Equal(t, uint64(0), coord.tempWriteCache[txHash][0].writeOrdinal) require.Equal(t, uint64(2), coord.tempWriteCache[txHash][1].blockNumber) - require.Equal(t, uint64(1), coord.tempWriteCache[txHash][1].writeOrdinal) } func TestWriteReceiptsFlushesAtConfiguredBlockInterval(t *testing.T) { diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index 05dd8b7633..ac7d7d09eb 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -5,7 +5,6 @@ import "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2/co type ( ReplayResult = coordinator.ReplayResult ReplayReceipt = coordinator.ReplayReceipt - ReplayedBlock = coordinator.ReplayedBlock WALReceiptConverter = coordinator.WALReceiptConverter ) diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go index 72c0ce6003..c8bf8f5465 100644 --- a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -176,9 +176,7 @@ func (s *parquetReceiptStoreV2) SetReceipts(ctx sdk.Context, receipts []ReceiptR return err } } - if ctx.BlockHeight() > s.store.LatestVersion() { - s.store.SetLatestVersion(ctx.BlockHeight()) - } + s.store.UpdateLatestVersion(ctx.BlockHeight()) return nil } From ed768cd414f1b9979c9a4b1ef9b3e1e3f3bf4d25 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 09:16:02 -0400 Subject: [PATCH 16/27] fix --- .../receipt/parquet_v2/coordinator/api.go | 152 ------------------ .../parquet_v2/coordinator/coordinator.go | 148 ++++++++++++++++- 2 files changed, 146 insertions(+), 154 deletions(-) delete mode 100644 sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go deleted file mode 100644 index 47955c9ece..0000000000 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/api.go +++ /dev/null @@ -1,152 +0,0 @@ -package coordinator - -import ( - "context" - - "github.com/ethereum/go-ethereum/common" - "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" -) - -func (c *Coordinator) WriteReceipts(inputs []parquet.ReceiptInput) error { - resp := make(chan writeResp, 1) - r, err := awaitResponse(c, writeReq{inputs: inputs, resp: resp}, resp) - if err != nil { - return err - } - return r.err -} - -func (c *Coordinator) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { - resp := make(chan readReceiptResp, 1) - r, err := awaitResponse(c, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) - if err != nil { - return nil, err - } - return r.result, r.err -} - -func (c *Coordinator) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { - resp := make(chan readReceiptResp, 1) - r, err := awaitResponse(c, readByTxHashInBlockReq{ - ctx: ctx, - txHash: txHash, - blockNumber: blockNumber, - resp: resp, - }, resp) - if err != nil { - return nil, err - } - return r.result, r.err -} - -func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { - resp := make(chan getLogsResp, 1) - r, err := awaitResponse(c, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) - if err != nil { - return nil, err - } - return r.results, r.err -} - -func (c *Coordinator) ObserveEmptyBlock(height uint64) error { - resp := make(chan error, 1) - return awaitError(c, observeEmptyBlockReq{height: height, resp: resp}, resp) -} - -func (c *Coordinator) IsRotationBoundary(blockNumber uint64) bool { - resp := make(chan bool, 1) - r, err := awaitResponse(c, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) - if err != nil { - return false - } - return r -} - -func (c *Coordinator) FileStartBlock() uint64 { - resp := make(chan uint64, 1) - r, err := awaitResponse(c, fileStartBlockReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (c *Coordinator) LatestVersion() int64 { - resp := make(chan int64, 1) - r, err := awaitResponse(c, latestVersionReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (c *Coordinator) SetLatestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(c, setLatestVersionReq{version: version, resp: resp}, resp) -} - -func (c *Coordinator) SetEarliestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(c, setEarliestVersionReq{version: version, resp: resp}, resp) -} - -func (c *Coordinator) UpdateLatestVersion(version int64) { - resp := make(chan error, 1) - _ = awaitError(c, updateLatestVersionReq{version: version, resp: resp}, resp) -} - -func (c *Coordinator) CacheRotateInterval() uint64 { - resp := make(chan uint64, 1) - r, err := awaitResponse(c, cacheRotateIntervalReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r -} - -func (c *Coordinator) Flush() error { - resp := make(chan error, 1) - return awaitError(c, flushReq{resp: resp}, resp) -} - -func (c *Coordinator) Close() error { - var err error - c.closeOnce.Do(func() { - resp := make(chan error, 1) - err = awaitError(c, closeReq{resp: resp}, resp) - close(c.done) - }) - return err -} - -func (c *Coordinator) SimulateCrash() { - c.closeOnce.Do(func() { - resp := make(chan struct{}, 1) - _, _ = awaitResponse(c, simulateCrashReq{resp: resp}, resp) - close(c.done) - }) -} - -func (c *Coordinator) SetBlockFlushInterval(interval uint64) { - resp := make(chan error, 1) - _ = awaitError(c, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) -} - -func (c *Coordinator) SetMaxBlocksPerFile(n uint64) { - resp := make(chan error, 1) - _ = awaitError(c, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) -} - -func (c *Coordinator) SetFaultHooks(hooks *parquet.FaultHooks) { - resp := make(chan error, 1) - _ = awaitError(c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) -} - -func (c *Coordinator) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { - resp := make(chan replayWALResp, 1) - r, err := awaitResponse(c, replayWALReq{converter: converter, resp: resp}, resp) - if err != nil { - return ReplayResult{}, err - } - return r.result, r.err -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 5c6537ec55..d724882010 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -220,7 +220,151 @@ func (c *Coordinator) stopPruneTicker() { c.pruneTick = nil } -func awaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { +func (c *Coordinator) WriteReceipts(inputs []parquet.ReceiptInput) error { + resp := make(chan writeResp, 1) + r, err := sendAndAwaitResponse(c, writeReq{inputs: inputs, resp: resp}, resp) + if err != nil { + return err + } + return r.err +} + +func (c *Coordinator) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { + resp := make(chan readReceiptResp, 1) + r, err := sendAndAwaitResponse(c, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.result, r.err +} + +func (c *Coordinator) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { + resp := make(chan readReceiptResp, 1) + r, err := sendAndAwaitResponse(c, readByTxHashInBlockReq{ + ctx: ctx, + txHash: txHash, + blockNumber: blockNumber, + resp: resp, + }, resp) + if err != nil { + return nil, err + } + return r.result, r.err +} + +func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { + resp := make(chan getLogsResp, 1) + r, err := sendAndAwaitResponse(c, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) + if err != nil { + return nil, err + } + return r.results, r.err +} + +func (c *Coordinator) ObserveEmptyBlock(height uint64) error { + resp := make(chan error, 1) + return awaitError(c, observeEmptyBlockReq{height: height, resp: resp}, resp) +} + +func (c *Coordinator) IsRotationBoundary(blockNumber uint64) bool { + resp := make(chan bool, 1) + r, err := sendAndAwaitResponse(c, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) + if err != nil { + return false + } + return r +} + +func (c *Coordinator) FileStartBlock() uint64 { + resp := make(chan uint64, 1) + r, err := sendAndAwaitResponse(c, fileStartBlockReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) LatestVersion() int64 { + resp := make(chan int64, 1) + r, err := sendAndAwaitResponse(c, latestVersionReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) SetLatestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, setLatestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) SetEarliestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, setEarliestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) UpdateLatestVersion(version int64) { + resp := make(chan error, 1) + _ = awaitError(c, updateLatestVersionReq{version: version, resp: resp}, resp) +} + +func (c *Coordinator) CacheRotateInterval() uint64 { + resp := make(chan uint64, 1) + r, err := sendAndAwaitResponse(c, cacheRotateIntervalReq{resp: resp}, resp) + if err != nil { + return 0 + } + return r +} + +func (c *Coordinator) Flush() error { + resp := make(chan error, 1) + return awaitError(c, flushReq{resp: resp}, resp) +} + +func (c *Coordinator) Close() error { + var err error + c.closeOnce.Do(func() { + resp := make(chan error, 1) + err = awaitError(c, closeReq{resp: resp}, resp) + close(c.done) + }) + return err +} + +func (c *Coordinator) SimulateCrash() { + c.closeOnce.Do(func() { + resp := make(chan struct{}, 1) + _, _ = sendAndAwaitResponse(c, simulateCrashReq{resp: resp}, resp) + close(c.done) + }) +} + +func (c *Coordinator) SetBlockFlushInterval(interval uint64) { + resp := make(chan error, 1) + _ = awaitError(c, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) +} + +func (c *Coordinator) SetMaxBlocksPerFile(n uint64) { + resp := make(chan error, 1) + _ = awaitError(c, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) +} + +func (c *Coordinator) SetFaultHooks(hooks *parquet.FaultHooks) { + resp := make(chan error, 1) + _ = awaitError(c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) +} + +func (c *Coordinator) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { + resp := make(chan replayWALResp, 1) + r, err := sendAndAwaitResponse(c, replayWALReq{converter: converter, resp: resp}, resp) + if err != nil { + return ReplayResult{}, err + } + return r.result, r.err +} + +func sendAndAwaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { var zero T select { @@ -238,7 +382,7 @@ func awaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, e } func awaitError(c *Coordinator, req coordRequest, resp <-chan error) error { - err, waitErr := awaitResponse(c, req, resp) + err, waitErr := sendAndAwaitResponse(c, req, resp) if waitErr != nil { return waitErr } From e96ae464cfb54127dc29939b23c4aa76c5b58408 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 09:37:55 -0400 Subject: [PATCH 17/27] remove ObserveEmptyBlock and IsRotationBoundary --- .../parquet_v2/coordinator/coordinator.go | 26 ++--- .../parquet_v2/coordinator/handlers.go | 95 +++++++------------ .../parquet_v2/coordinator/requests.go | 13 +-- .../parquet_v2/coordinator/rotation_test.go | 32 +++---- .../receipt/parquet_v2/coordinator/wal.go | 11 +-- .../parquet_v2/coordinator/write_test.go | 16 +++- sei-db/ledger_db/receipt/parquet_v2/store.go | 12 +-- .../receipt/parquet_v2/store_dispatch_test.go | 8 +- .../receipt/parquet_v2/store_read_test.go | 20 +++- .../receipt/parquet_v2/store_rotation_test.go | 6 +- .../receipt/parquet_v2/store_write_test.go | 11 ++- .../receipt/parquet_v2_receipt_store.go | 71 ++++---------- 12 files changed, 113 insertions(+), 208 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index d724882010..f7a86efd50 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -169,8 +169,6 @@ func (c *Coordinator) run() { c.handleReadByTxHashInBlock(r) case getLogsReq: c.handleGetLogs(r) - case observeEmptyBlockReq: - c.handleObserveEmptyBlock(r) case flushReq: c.handleFlush(r) case latestVersionReq: @@ -185,8 +183,6 @@ func (c *Coordinator) run() { c.handleCacheRotateInterval(r) case fileStartBlockReq: c.handleFileStartBlock(r) - case isRotationBoundaryReq: - c.handleIsRotationBoundary(r) case setBlockFlushIntervalReq: c.handleSetBlockFlushInterval(r) case setMaxBlocksPerFileReq: @@ -220,9 +216,13 @@ func (c *Coordinator) stopPruneTicker() { c.pruneTick = nil } -func (c *Coordinator) WriteReceipts(inputs []parquet.ReceiptInput) error { +// WriteReceipts records a committed block. inputs may be empty, in which case +// the call only advances rotation/cursor state — equivalent to the former +// ObserveEmptyBlock. height is authoritative; inputs[i].BlockNumber is +// ignored. +func (c *Coordinator) WriteReceipts(height uint64, inputs []parquet.ReceiptInput) error { resp := make(chan writeResp, 1) - r, err := sendAndAwaitResponse(c, writeReq{inputs: inputs, resp: resp}, resp) + r, err := sendAndAwaitResponse(c, writeReq{height: height, inputs: inputs, resp: resp}, resp) if err != nil { return err } @@ -261,20 +261,6 @@ func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([] return r.results, r.err } -func (c *Coordinator) ObserveEmptyBlock(height uint64) error { - resp := make(chan error, 1) - return awaitError(c, observeEmptyBlockReq{height: height, resp: resp}, resp) -} - -func (c *Coordinator) IsRotationBoundary(blockNumber uint64) bool { - resp := make(chan bool, 1) - r, err := sendAndAwaitResponse(c, isRotationBoundaryReq{blockNumber: blockNumber, resp: resp}, resp) - if err != nil { - return false - } - return r -} - func (c *Coordinator) FileStartBlock() uint64 { resp := make(chan uint64, 1) r, err := sendAndAwaitResponse(c, fileStartBlockReq{resp: resp}, resp) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 600075c330..675a0ad3fc 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -11,7 +11,7 @@ import ( ) func (c *Coordinator) handleWrite(req writeReq) { - req.resp <- writeResp{err: c.writeReceipts(req.inputs)} + req.resp <- writeResp{err: c.writeReceipts(req.height, req.inputs)} } func (c *Coordinator) handleReadByTxHash(req readByTxHashReq) { @@ -52,10 +52,6 @@ func (c *Coordinator) handleGetLogs(req getLogsReq) { req.resp <- getLogsResp{results: results, err: err} } -func (c *Coordinator) handleObserveEmptyBlock(req observeEmptyBlockReq) { - req.resp <- c.observeEmptyBlock(req.height) -} - func (c *Coordinator) handleFlush(req flushReq) { req.resp <- c.flushOpenFile() } @@ -89,10 +85,6 @@ func (c *Coordinator) handleFileStartBlock(req fileStartBlockReq) { req.resp <- c.fileStartBlock } -func (c *Coordinator) handleIsRotationBoundary(req isRotationBoundaryReq) { - req.resp <- c.isRotationBoundary(req.blockNumber) -} - func (c *Coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { c.config.BlockFlushInterval = req.interval req.resp <- nil @@ -178,70 +170,46 @@ func (c *Coordinator) handleSimulateCrash(req simulateCrashReq) { req.resp <- struct{}{} } -func (c *Coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { +// writeReceipts records a committed block at height. When inputs is empty it +// degenerates to the rotation/cursor-advance path (formerly ObserveEmptyBlock): +// no WAL entry is written, but if height lands on a rotation boundary the +// open file is rotated so it never spans more than MaxBlocksPerFile blocks. +// height is authoritative; inputs[i].BlockNumber is ignored. +func (c *Coordinator) writeReceipts(height uint64, inputs []parquet.ReceiptInput) error { if len(inputs) == 0 { - return nil + return c.observeBlock(height) } if c.wal == nil { return fmt.Errorf("parquet WAL is not initialized") } - type blockBatch struct { - blockNumber uint64 - receipts [][]byte - inputs []parquet.ReceiptInput + receiptBytes := make([][]byte, len(inputs)) + for i := range inputs { + receiptBytes[i] = inputs[i].ReceiptBytes + } + if err := c.wal.Write(parquet.WALEntry{BlockNumber: height, Receipts: receiptBytes}); err != nil { + return err } - var batches []blockBatch - batchIdx := make(map[uint64]int) - for i := range inputs { - bn := inputs[i].BlockNumber - if idx, ok := batchIdx[bn]; ok { - batches[idx].receipts = append(batches[idx].receipts, inputs[i].ReceiptBytes) - batches[idx].inputs = append(batches[idx].inputs, inputs[i]) - continue - } - batchIdx[bn] = len(batches) - batches = append(batches, blockBatch{ - blockNumber: bn, - receipts: [][]byte{inputs[i].ReceiptBytes}, - inputs: []parquet.ReceiptInput{inputs[i]}, - }) - } - - maxBlock := inputs[0].BlockNumber - for _, b := range batches { - entry := parquet.WALEntry{ - BlockNumber: b.blockNumber, - Receipts: b.receipts, - } - if err := c.wal.Write(entry); err != nil { + if h := c.faultHooks; h != nil && h.AfterWALWrite != nil { + if err := h.AfterWALWrite(height); err != nil { return err } + } - if h := c.faultHooks; h != nil && h.AfterWALWrite != nil { - if err := h.AfterWALWrite(b.blockNumber); err != nil { - return err - } - } - - if c.receiptWriter != nil && b.blockNumber != c.lastSeenBlock && c.isRotationBoundary(b.blockNumber) { - if err := c.rotateOpenFile(b.blockNumber); err != nil { - return err - } + if c.receiptWriter != nil && height != c.lastSeenBlock && c.isRotationBoundary(height) { + if err := c.rotateOpenFile(height); err != nil { + return err } + } - for i := range b.inputs { - if err := c.applyReceipt(b.inputs[i]); err != nil { - return err - } - if b.inputs[i].BlockNumber > maxBlock { - maxBlock = b.inputs[i].BlockNumber - } + for i := range inputs { + if err := c.applyReceipt(height, inputs[i]); err != nil { + return err } } - latest, err := int64FromUint64(maxBlock) + latest, err := int64FromUint64(height) if err != nil { return err } @@ -251,9 +219,9 @@ func (c *Coordinator) writeReceipts(inputs []parquet.ReceiptInput) error { return nil } -func (c *Coordinator) applyReceipt(input parquet.ReceiptInput) error { +func (c *Coordinator) applyReceipt(blockNumber uint64, input parquet.ReceiptInput) error { if c.receiptWriter == nil { - aligned := alignedFileStartBlock(input.BlockNumber, c.config.MaxBlocksPerFile) + aligned := alignedFileStartBlock(blockNumber, c.config.MaxBlocksPerFile) if aligned >= c.fileStartBlock { c.fileStartBlock = aligned } @@ -262,7 +230,6 @@ func (c *Coordinator) applyReceipt(input parquet.ReceiptInput) error { } } - blockNumber := input.BlockNumber if blockNumber != c.lastSeenBlock { if c.lastSeenBlock != 0 { c.blocksSinceFlush++ @@ -277,7 +244,7 @@ func (c *Coordinator) applyReceipt(input parquet.ReceiptInput) error { txHash := common.BytesToHash(input.Receipt.TxHash) c.tempWriteCache[txHash] = append(c.tempWriteCache[txHash], tempReceipt{ - blockNumber: input.BlockNumber, + blockNumber: blockNumber, receiptBytes: input.ReceiptBytes, }) @@ -461,7 +428,11 @@ func (c *Coordinator) dropTempCacheBefore(blockNumber uint64) { } } -func (c *Coordinator) observeEmptyBlock(height uint64) error { +// observeBlock advances the cursor for a committed block without writing to +// the WAL. Called by writeReceipts when inputs is empty. Out-of-order +// observations must not move the cursor backward — WriteReceipts could +// otherwise mis-handle rotation for a height already seen. +func (c *Coordinator) observeBlock(height uint64) error { if height <= c.lastSeenBlock { return nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 9bb6e79af1..379f844537 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -12,6 +12,7 @@ type coordRequest interface { } type writeReq struct { + height uint64 inputs []parquet.ReceiptInput resp chan writeResp } @@ -49,11 +50,6 @@ type getLogsResp struct { err error } -type observeEmptyBlockReq struct { - height uint64 - resp chan error -} - type flushReq struct { resp chan error } @@ -85,11 +81,6 @@ type fileStartBlockReq struct { resp chan uint64 } -type isRotationBoundaryReq struct { - blockNumber uint64 - resp chan bool -} - type setBlockFlushIntervalReq struct { interval uint64 resp chan error @@ -127,7 +118,6 @@ func (writeReq) isCoordRequest() {} func (readByTxHashReq) isCoordRequest() {} func (readByTxHashInBlockReq) isCoordRequest() {} func (getLogsReq) isCoordRequest() {} -func (observeEmptyBlockReq) isCoordRequest() {} func (flushReq) isCoordRequest() {} func (latestVersionReq) isCoordRequest() {} func (setLatestVersionReq) isCoordRequest() {} @@ -135,7 +125,6 @@ func (setEarliestVersionReq) isCoordRequest() {} func (updateLatestVersionReq) isCoordRequest() {} func (cacheRotateIntervalReq) isCoordRequest() {} func (fileStartBlockReq) isCoordRequest() {} -func (isRotationBoundaryReq) isCoordRequest() {} func (setBlockFlushIntervalReq) isCoordRequest() {} func (setMaxBlocksPerFileReq) isCoordRequest() {} func (setFaultHooksReq) isCoordRequest() {} diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go index 5ceba74416..97f76a6a5d 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/rotation_test.go @@ -15,22 +15,12 @@ func TestRotationBoundaryPrimitives(t *testing.T) { config: parquet.StoreConfig{MaxBlocksPerFile: 500}, } - resp := make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 0, resp: resp}) - require.True(t, <-resp) - - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) - require.True(t, <-resp) - - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 501, resp: resp}) - require.False(t, <-resp) + require.True(t, coord.isRotationBoundary(0)) + require.True(t, coord.isRotationBoundary(500)) + require.False(t, coord.isRotationBoundary(501)) coord.config.MaxBlocksPerFile = 0 - resp = make(chan bool, 1) - coord.handleIsRotationBoundary(isRotationBoundaryReq{blockNumber: 500, resp: resp}) - require.False(t, <-resp) + require.False(t, coord.isRotationBoundary(500)) } func TestAlignedFileStartBlock(t *testing.T) { @@ -47,7 +37,7 @@ func TestWriteRotatesAtAlignedBoundary(t *testing.T) { defer func() { require.NoError(t, coord.closeWriters()) }() for block := uint64(1); block <= 4; block++ { - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + require.NoError(t, coord.writeReceipts(block, []parquet.ReceiptInput{ testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), })) } @@ -87,29 +77,29 @@ func TestRotateOpenFilePrunesOnlyOldTempCacheEntries(t *testing.T) { require.Equal(t, uint64(4), coord.tempWriteCache[txHash][0].blockNumber) } -func TestObserveEmptyBlockHonorsMonotonicLastSeen(t *testing.T) { +func TestWriteEmptyHonorsMonotonicLastSeen(t *testing.T) { coord := newWriteCoordinator(t, &recordingWAL{}) - require.NoError(t, coord.observeEmptyBlock(5)) + require.NoError(t, coord.writeReceipts(5, nil)) require.Equal(t, uint64(5), coord.lastSeenBlock) - require.NoError(t, coord.observeEmptyBlock(4)) + require.NoError(t, coord.writeReceipts(4, nil)) require.Equal(t, uint64(5), coord.lastSeenBlock) require.Empty(t, coord.closedFiles) } -func TestObserveEmptyBlockRotatesAtBoundary(t *testing.T) { +func TestWriteEmptyRotatesAtBoundary(t *testing.T) { wal := &recordingWAL{} coord := newWriteCoordinator(t, wal) coord.config.MaxBlocksPerFile = 4 defer func() { require.NoError(t, coord.closeWriters()) }() - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + require.NoError(t, coord.writeReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, common.HexToHash("0x1")), })) require.NotNil(t, coord.receiptWriter) - require.NoError(t, coord.observeEmptyBlock(4)) + require.NoError(t, coord.writeReceipts(4, nil)) require.Equal(t, uint64(4), coord.lastSeenBlock) require.Equal(t, uint64(4), coord.fileStartBlock) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go index af144845a1..64a90fc30c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go @@ -86,7 +86,7 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er } input := normalizeReplayInput(blockNumber, receiptBytes, replayed) - if err := c.applyReceiptFromReplay(input); err != nil { + if err := c.applyReceiptFromReplay(blockNumber, input); err != nil { return err } @@ -116,19 +116,18 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er return result, nil } -func (c *Coordinator) applyReceiptFromReplay(input parquet.ReceiptInput) error { - if c.receiptWriter != nil && input.BlockNumber != c.lastSeenBlock && c.isRotationBoundary(input.BlockNumber) { - if err := c.rotateOpenFileWithoutWAL(input.BlockNumber); err != nil { +func (c *Coordinator) applyReceiptFromReplay(blockNumber uint64, input parquet.ReceiptInput) error { + if c.receiptWriter != nil && blockNumber != c.lastSeenBlock && c.isRotationBoundary(blockNumber) { + if err := c.rotateOpenFileWithoutWAL(blockNumber); err != nil { return err } c.dropTempCacheBefore(c.fileStartBlock) } - return c.applyReceipt(input) + return c.applyReceipt(blockNumber, input) } func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed ReplayReceipt) parquet.ReceiptInput { input := replayed.Input - input.BlockNumber = blockNumber input.Receipt.BlockNumber = blockNumber if len(input.Receipt.TxHash) == 0 { input.Receipt.TxHash = append([]byte(nil), replayed.TxHash[:]...) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go index 9c8759857f..591ca26e24 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/write_test.go @@ -8,16 +8,18 @@ import ( "github.com/stretchr/testify/require" ) -func TestWriteReceiptsGroupsWALByBlockEncounterOrder(t *testing.T) { +func TestWriteReceiptsWritesOneWALEntryPerCall(t *testing.T) { wal := &recordingWAL{} coord := newWriteCoordinator(t, wal) defer func() { require.NoError(t, coord.closeWriters()) }() - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + require.NoError(t, coord.writeReceipts(2, []parquet.ReceiptInput{ testReceiptInput(2, common.HexToHash("0x22")), - testReceiptInput(1, common.HexToHash("0x11")), testReceiptInput(2, common.HexToHash("0x23")), })) + require.NoError(t, coord.writeReceipts(1, []parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x11")), + })) require.Len(t, wal.entries, 2) require.Equal(t, uint64(2), wal.entries[0].BlockNumber) @@ -32,8 +34,10 @@ func TestWriteReceiptsKeepsDuplicateHashCacheEntries(t *testing.T) { defer func() { require.NoError(t, coord.closeWriters()) }() txHash := common.HexToHash("0xabc") - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + require.NoError(t, coord.writeReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, txHash), + })) + require.NoError(t, coord.writeReceipts(2, []parquet.ReceiptInput{ testReceiptInput(2, txHash), })) @@ -50,8 +54,10 @@ func TestWriteReceiptsFlushesAtConfiguredBlockInterval(t *testing.T) { coord.config.BlockFlushInterval = 1 defer func() { require.NoError(t, coord.closeWriters()) }() - require.NoError(t, coord.writeReceipts([]parquet.ReceiptInput{ + require.NoError(t, coord.writeReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, common.HexToHash("0x1")), + })) + require.NoError(t, coord.writeReceipts(2, []parquet.ReceiptInput{ testReceiptInput(2, common.HexToHash("0x2")), })) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index 8016708356..294a61a45c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -23,8 +23,8 @@ func NewStore(cfg parquet.StoreConfig) (*Store, error) { return &Store{coord: c}, nil } -func (s *Store) WriteReceipts(inputs []parquet.ReceiptInput) error { - return s.coord.WriteReceipts(inputs) +func (s *Store) WriteReceipts(height uint64, inputs []parquet.ReceiptInput) error { + return s.coord.WriteReceipts(height, inputs) } func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { @@ -39,14 +39,6 @@ func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parque return s.coord.GetLogs(ctx, filter) } -func (s *Store) ObserveEmptyBlock(height uint64) error { - return s.coord.ObserveEmptyBlock(height) -} - -func (s *Store) IsRotationBoundary(blockNumber uint64) bool { - return s.coord.IsRotationBoundary(blockNumber) -} - func (s *Store) FileStartBlock() uint64 { return s.coord.FileStartBlock() } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index c97f4689be..d2fc046959 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -25,8 +25,6 @@ func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { require.Equal(t, uint64(0), store.FileStartBlock()) require.Equal(t, int64(0), store.LatestVersion()) require.Equal(t, uint64(4), store.CacheRotateInterval()) - require.True(t, store.IsRotationBoundary(8)) - require.False(t, store.IsRotationBoundary(9)) store.SetLatestVersion(10) require.Equal(t, int64(10), store.LatestVersion()) @@ -43,8 +41,6 @@ func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { store.SetMaxBlocksPerFile(3) require.Equal(t, uint64(3), store.CacheRotateInterval()) - require.True(t, store.IsRotationBoundary(6)) - require.False(t, store.IsRotationBoundary(8)) } func TestCloseStopsFutureRequests(t *testing.T) { @@ -52,7 +48,7 @@ func TestCloseStopsFutureRequests(t *testing.T) { require.NoError(t, err) require.NoError(t, store.Close()) - require.ErrorIs(t, store.WriteReceipts(nil), ErrStoreClosed) + require.ErrorIs(t, store.WriteReceipts(0, nil), ErrStoreClosed) require.NoError(t, store.Close()) } @@ -61,6 +57,6 @@ func TestSimulateCrashStopsFutureRequests(t *testing.T) { require.NoError(t, err) store.SimulateCrash() - require.ErrorIs(t, store.WriteReceipts(nil), ErrStoreClosed) + require.ErrorIs(t, store.WriteReceipts(0, nil), ErrStoreClosed) require.NoError(t, store.Close()) } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go index f815ab4f46..2bf454d70a 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go @@ -20,8 +20,10 @@ func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { MaxBlocksPerFile: 10, }) require.NoError(t, err) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + require.NoError(t, store.WriteReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, txHash), + })) + require.NoError(t, store.WriteReceipts(2, []parquet.ReceiptInput{ testReceiptInput(2, txHash), })) require.NoError(t, store.Close()) @@ -56,11 +58,19 @@ func TestReadByTxHashAfterRotationUsesClosedFilesAndTempCache(t *testing.T) { require.NoError(t, err) t.Cleanup(func() { require.NoError(t, store.Close()) }) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + require.NoError(t, store.WriteReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, txHash), + })) + require.NoError(t, store.WriteReceipts(2, []parquet.ReceiptInput{ testReceiptInput(2, common.HexToHash("0x2")), + })) + require.NoError(t, store.WriteReceipts(3, []parquet.ReceiptInput{ testReceiptInput(3, common.HexToHash("0x3")), + })) + require.NoError(t, store.WriteReceipts(4, []parquet.ReceiptInput{ testReceiptInput(4, common.HexToHash("0x4")), + })) + require.NoError(t, store.WriteReceipts(5, []parquet.ReceiptInput{ testReceiptInput(5, txHash), })) @@ -86,11 +96,11 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { }) require.NoError(t, err) - var inputs []parquet.ReceiptInput for block := uint64(1); block <= 8; block++ { - inputs = append(inputs, testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block)))) + require.NoError(t, store.WriteReceipts(block, []parquet.ReceiptInput{ + testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), + })) } - require.NoError(t, store.WriteReceipts(inputs)) require.NoError(t, store.Close()) reopened, err := NewStore(parquet.StoreConfig{ diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index 00fc9058dd..1eb6530d07 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -18,7 +18,7 @@ func TestLazyInitUsesAlignedStartForFirstOffBoundaryWrite(t *testing.T) { }) require.NoError(t, err) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + require.NoError(t, store.WriteReceipts(5234, []parquet.ReceiptInput{ testReceiptInput(5234, common.HexToHash("0x5234")), })) require.NoError(t, store.Close()) @@ -43,7 +43,7 @@ func TestReopenLazyInitPreservesExistingAlignedFile(t *testing.T) { require.NoError(t, err) require.Equal(t, uint64(11), store.FileStartBlock()) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + require.NoError(t, store.WriteReceipts(11, []parquet.ReceiptInput{ testReceiptInput(11, common.HexToHash("0x11")), })) require.NoError(t, store.Close()) @@ -65,7 +65,7 @@ func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { }) require.NoError(t, err) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ + require.NoError(t, store.WriteReceipts(25, []parquet.ReceiptInput{ testReceiptInput(25, common.HexToHash("0x25")), })) require.NoError(t, store.Close()) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index 1badaccbd0..4e4adfe224 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -1,6 +1,7 @@ package parquet_v2 import ( + "math/big" "testing" "github.com/ethereum/go-ethereum/common" @@ -19,11 +20,11 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { }) require.NoError(t, err) - require.NoError(t, store.WriteReceipts([]parquet.ReceiptInput{ - testReceiptInput(1, common.HexToHash("0x1")), - testReceiptInput(2, common.HexToHash("0x2")), - testReceiptInput(3, common.HexToHash("0x3")), - })) + for block := uint64(1); block <= 3; block++ { + require.NoError(t, store.WriteReceipts(block, []parquet.ReceiptInput{ + testReceiptInput(block, common.BigToHash(new(big.Int).SetUint64(block))), + })) + } require.Equal(t, int64(3), store.LatestVersion()) require.NoError(t, store.Close()) diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go index c8bf8f5465..3d65c27e36 100644 --- a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -170,46 +170,39 @@ func (s *parquetReceiptStoreV2) indexedReceiptLookup(ctx context.Context, txHash } func (s *parquetReceiptStoreV2) SetReceipts(ctx sdk.Context, receipts []ReceiptRecord) error { - if len(receipts) == 0 { - if ctx.BlockHeight() > 0 { - if err := s.store.ObserveEmptyBlock(uint64(ctx.BlockHeight())); err != nil { //nolint:gosec // block heights fit within uint64 - return err - } - } - s.store.UpdateLatestVersion(ctx.BlockHeight()) - return nil - } + height := uint64(ctx.BlockHeight()) //nolint:gosec // block heights fit within uint64 - inputs, maxBlock, err := buildParquetReceiptInputs(receipts) + inputs, err := buildParquetReceiptInputs(receipts) if err != nil { return err } - if err := s.store.WriteReceipts(inputs); err != nil { + if err := s.store.WriteReceipts(height, inputs); err != nil { return err } - if s.txHashIndex != nil { - if err := s.indexReceiptInputs(inputs); err != nil { + if s.txHashIndex != nil && len(inputs) > 0 { + if err := s.indexReceiptInputs(height, inputs); err != nil { return fmt.Errorf("tx hash index write failed: %w", err) } } - if maxBlock > 0 { - s.store.UpdateLatestVersion(int64(maxBlock)) //nolint:gosec // block numbers won't exceed int64 max - } - + s.store.UpdateLatestVersion(ctx.BlockHeight()) return nil } -func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput, uint64, error) { +// buildParquetReceiptInputs constructs ReceiptInputs for the v2 store. The +// wrapper-level BlockNumber field is intentionally left zero — v2 carries the +// committed height as an explicit parameter to WriteReceipts. The +// Receipt.BlockNumber column is still populated since it is what gets written +// to the parquet file. +func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput, error) { blockHash := common.Hash{} inputs := make([]parquet.ReceiptInput, 0, len(receipts)) var ( currentBlock uint64 logStartIndex uint - maxBlock uint64 ) for _, record := range receipts { @@ -219,9 +212,6 @@ func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput receipt := record.Receipt blockNumber := receipt.BlockNumber - if blockNumber > maxBlock { - maxBlock = blockNumber - } if currentBlock == 0 { currentBlock = blockNumber @@ -236,7 +226,7 @@ func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput var err error receiptBytes, err = receipt.Marshal() if err != nil { - return nil, 0, err + return nil, err } } @@ -247,7 +237,6 @@ func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput } inputs = append(inputs, parquet.ReceiptInput{ - BlockNumber: blockNumber, Receipt: parquet.ReceiptRecord{ TxHash: parquet.CopyBytes(record.TxHash[:]), BlockNumber: blockNumber, @@ -258,38 +247,15 @@ func buildParquetReceiptInputs(receipts []ReceiptRecord) ([]parquet.ReceiptInput }) } - return inputs, maxBlock, nil + return inputs, nil } -func (s *parquetReceiptStoreV2) indexReceiptInputs(inputs []parquet.ReceiptInput) error { - type blockBatch struct { - blockNumber uint64 - hashes []common.Hash - } - var batches []blockBatch - batchIdx := make(map[uint64]int) - +func (s *parquetReceiptStoreV2) indexReceiptInputs(height uint64, inputs []parquet.ReceiptInput) error { + hashes := make([]common.Hash, len(inputs)) for i := range inputs { - bn := inputs[i].BlockNumber - txHash := common.BytesToHash(inputs[i].Receipt.TxHash) - if idx, ok := batchIdx[bn]; ok { - batches[idx].hashes = append(batches[idx].hashes, txHash) - continue - } - batchIdx[bn] = len(batches) - batches = append(batches, blockBatch{ - blockNumber: bn, - hashes: []common.Hash{txHash}, - }) + hashes[i] = common.BytesToHash(inputs[i].Receipt.TxHash) } - - ctx := context.Background() - for _, b := range batches { - if err := s.txHashIndex.IndexBlock(ctx, b.blockNumber, b.hashes); err != nil { - return err - } - } - return nil + return s.txHashIndex.IndexBlock(context.Background(), height, hashes) } func (s *parquetReceiptStoreV2) FilterLogs(ctx sdk.Context, fromBlock, toBlock uint64, crit filters.FilterCriteria) ([]*ethtypes.Log, error) { @@ -361,7 +327,6 @@ func (s *parquetReceiptStoreV2) replayWAL() error { } return parquet_v2.ReplayReceipt{ Input: parquet.ReceiptInput{ - BlockNumber: blockNumber, Receipt: record, Logs: BuildParquetLogRecords(txLogs, blockHash), ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), From da245f54a4b06e88037a19ab7b5c65fb3d1235a4 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 09:45:27 -0400 Subject: [PATCH 18/27] simplify CacheRotateInterval to direct config read The rotation interval is set at construction and only mutated by the test-only SetMaxBlocksPerFile, so the request-channel round-trip on every read isn't needed. Drop the request type/handler/dispatch case and read c.config.MaxBlocksPerFile directly with a doc comment about the no-race-with-mutation contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../receipt/parquet_v2/coordinator/coordinator.go | 14 ++++++-------- .../receipt/parquet_v2/coordinator/handlers.go | 4 ---- .../receipt/parquet_v2/coordinator/requests.go | 5 ----- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index f7a86efd50..6ea5cf0881 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -179,8 +179,6 @@ func (c *Coordinator) run() { c.handleSetEarliestVersion(r) case updateLatestVersionReq: c.handleUpdateLatestVersion(r) - case cacheRotateIntervalReq: - c.handleCacheRotateInterval(r) case fileStartBlockReq: c.handleFileStartBlock(r) case setBlockFlushIntervalReq: @@ -294,13 +292,13 @@ func (c *Coordinator) UpdateLatestVersion(version int64) { _ = awaitError(c, updateLatestVersionReq{version: version, resp: resp}, resp) } +// CacheRotateInterval returns the rotation boundary used by the cached receipt +// store. Reads c.config.MaxBlocksPerFile directly without going through the +// request channel; this is safe because the value is set at construction and +// only mutated by SetMaxBlocksPerFile, which is test-only and must not race +// with reads. func (c *Coordinator) CacheRotateInterval() uint64 { - resp := make(chan uint64, 1) - r, err := sendAndAwaitResponse(c, cacheRotateIntervalReq{resp: resp}, resp) - if err != nil { - return 0 - } - return r + return c.config.MaxBlocksPerFile } func (c *Coordinator) Flush() error { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 675a0ad3fc..48d60165ef 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -77,10 +77,6 @@ func (c *Coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { req.resp <- nil } -func (c *Coordinator) handleCacheRotateInterval(req cacheRotateIntervalReq) { - req.resp <- c.config.MaxBlocksPerFile -} - func (c *Coordinator) handleFileStartBlock(req fileStartBlockReq) { req.resp <- c.fileStartBlock } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 379f844537..52819765ea 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -73,10 +73,6 @@ type updateLatestVersionReq struct { resp chan error } -type cacheRotateIntervalReq struct { - resp chan uint64 -} - type fileStartBlockReq struct { resp chan uint64 } @@ -123,7 +119,6 @@ func (latestVersionReq) isCoordRequest() {} func (setLatestVersionReq) isCoordRequest() {} func (setEarliestVersionReq) isCoordRequest() {} func (updateLatestVersionReq) isCoordRequest() {} -func (cacheRotateIntervalReq) isCoordRequest() {} func (fileStartBlockReq) isCoordRequest() {} func (setBlockFlushIntervalReq) isCoordRequest() {} func (setMaxBlocksPerFileReq) isCoordRequest() {} From c2adfc2772d1b97bf38bbf3a9491d58876295cb0 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 09:56:50 -0400 Subject: [PATCH 19/27] add godocs across parquet_v2 coordinator and store, panic on unknown coord request Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 2 + .../receipt/parquet_v2/coordinator/files.go | 9 +++ .../parquet_v2/coordinator/handlers.go | 79 +++++++++++++++++++ .../receipt/parquet_v2/coordinator/prune.go | 9 +++ .../receipt/parquet_v2/coordinator/reader.go | 36 +++++++++ .../parquet_v2/coordinator/requests.go | 41 ++++++++++ .../receipt/parquet_v2/coordinator/types.go | 17 ++++ .../receipt/parquet_v2/coordinator/wal.go | 19 +++++ sei-db/ledger_db/receipt/parquet_v2/store.go | 38 +++++++++ 9 files changed, 250 insertions(+) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 6ea5cf0881..0e9f55b372 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -195,6 +195,8 @@ func (c *Coordinator) run() { case closeReq: c.handleClose(r) return + default: + panic(fmt.Sprintf("coordinator: unrecognized request type %T", r)) } case <-c.pruneTick: c.handlePruneTick() diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go index 49be193b3d..73e2c93124 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/files.go @@ -9,6 +9,9 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) +// scanClosedFiles enumerates closed receipt/log parquet pairs under basePath. +// It drops trailing files that fail a readability probe (likely truncated by +// a crash mid-flush) and returns the surviving pairs sorted by start block. func scanClosedFiles(basePath string, reader *Reader) ([]closedFile, error) { receiptFiles, err := parquetFilesByPrefix(basePath, "receipts") if err != nil { @@ -52,6 +55,8 @@ func scanClosedFiles(basePath string, reader *Reader) ([]closedFile, error) { return closed, nil } +// parquetFilesByPrefix globs parquet files of the form "{prefix}_*.parquet" +// directly under basePath. func parquetFilesByPrefix(basePath, prefix string) ([]string, error) { pattern := filepath.Join(basePath, prefix+"_*.parquet") files, err := filepath.Glob(pattern) @@ -61,6 +66,10 @@ func parquetFilesByPrefix(basePath, prefix string) ([]string, error) { return files, nil } +// validateAndCleanFiles probes the highest-numbered file for readability and, +// if it fails, removes both it and its same-start-block counterpart (the +// receipt/log sibling) from disk. Only the trailing file is checked because +// a crash can only corrupt the most recently written one. func validateAndCleanFiles(basePath string, reader *Reader, files []string, counterpartPrefix string) []string { if len(files) == 0 { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 48d60165ef..ab2d11fe3c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -10,10 +10,15 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) +// handleWrite serves a writeReq by appending receipts for a single block and +// replying with any error encountered during WAL append, rotation, or buffer +// staging. func (c *Coordinator) handleWrite(req writeReq) { req.resp <- writeResp{err: c.writeReceipts(req.height, req.inputs)} } +// handleReadByTxHash serves a readByTxHashReq by checking the in-memory write +// cache first, then falling back to a DuckDB query over closed parquet files. func (c *Coordinator) handleReadByTxHash(req readByTxHashReq) { if result := c.cachedReceiptByTxHash(req.txHash); result != nil { req.resp <- readReceiptResp{result: result} @@ -28,6 +33,9 @@ func (c *Coordinator) handleReadByTxHash(req readByTxHashReq) { req.resp <- readReceiptResp{result: result, err: err} } +// handleReadByTxHashInBlock serves a readByTxHashInBlockReq, narrowing the +// parquet file scan to the single closed file that contains the requested +// block (if any). func (c *Coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { if result := c.cachedReceiptByTxHashInBlock(req.txHash, req.blockNumber); result != nil { req.resp <- readReceiptResp{result: result} @@ -42,6 +50,8 @@ func (c *Coordinator) handleReadByTxHashInBlock(req readByTxHashInBlockReq) { req.resp <- readReceiptResp{result: result, err: err} } +// handleGetLogs serves a getLogsReq by querying logs across the closed log +// parquet files using the supplied filter. func (c *Coordinator) handleGetLogs(req getLogsReq) { if c.reader == nil { req.resp <- getLogsResp{err: fmt.Errorf("parquet reader is not initialized")} @@ -52,24 +62,34 @@ func (c *Coordinator) handleGetLogs(req getLogsReq) { req.resp <- getLogsResp{results: results, err: err} } +// handleFlush serves a flushReq by flushing buffered receipts/logs for the +// open parquet file to disk. func (c *Coordinator) handleFlush(req flushReq) { req.resp <- c.flushOpenFile() } +// handleLatestVersion returns the highest block height the coordinator has +// observed via WriteReceipts or WAL replay. func (c *Coordinator) handleLatestVersion(req latestVersionReq) { req.resp <- c.latestVersion } +// handleSetLatestVersion overwrites latestVersion. Used by callers that +// authoritatively know the chain height (e.g., genesis/init paths). func (c *Coordinator) handleSetLatestVersion(req setLatestVersionReq) { c.latestVersion = req.version req.resp <- nil } +// handleSetEarliestVersion records the lowest retained block height. Pruning +// uses this as a hint about the visible window. func (c *Coordinator) handleSetEarliestVersion(req setEarliestVersionReq) { c.earliestVersion = req.version req.resp <- nil } +// handleUpdateLatestVersion advances latestVersion only when the supplied +// value is greater, preventing accidental rewinds. func (c *Coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { if req.version > c.latestVersion { c.latestVersion = req.version @@ -77,15 +97,21 @@ func (c *Coordinator) handleUpdateLatestVersion(req updateLatestVersionReq) { req.resp <- nil } +// handleFileStartBlock returns the start block of the currently open parquet +// file (the next file's name will derive from this). func (c *Coordinator) handleFileStartBlock(req fileStartBlockReq) { req.resp <- c.fileStartBlock } +// handleSetBlockFlushInterval updates how often (in blocks) the buffered +// receipt/log writer is flushed to disk. func (c *Coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) { c.config.BlockFlushInterval = req.interval req.resp <- nil } +// handleSetMaxBlocksPerFile updates the rotation interval and propagates it +// to the reader so log-file pruning by block range stays consistent. func (c *Coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { c.config.MaxBlocksPerFile = req.maxBlocksPerFile if c.reader != nil { @@ -94,16 +120,22 @@ func (c *Coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { req.resp <- nil } +// handleSetFaultHooks installs the supplied test hooks. In production the +// hooks pointer is nil and all hook checks become no-ops. func (c *Coordinator) handleSetFaultHooks(req setFaultHooksReq) { c.faultHooks = req.hooks req.resp <- nil } +// handleReplayWAL drives WAL replay against the configured converter and +// returns the recovered records and per-block tx hashes to the caller. func (c *Coordinator) handleReplayWAL(req replayWALReq) { result, err := c.replayWAL(req.converter) req.resp <- replayWALResp{result: result, err: err} } +// handlePruneTick fires on the prune ticker and removes closed parquet pairs +// whose end block falls below latestVersion - KeepRecent. func (c *Coordinator) handlePruneTick() { // TODO(future-async): if read I/O moves to a worker pool, gate prune on // map[fileID]int reference counts that the coordinator increments on @@ -118,6 +150,9 @@ func (c *Coordinator) handlePruneTick() { c.pruneOldFiles(uint64(pruneBeforeBlock)) } +// handleClose performs a graceful shutdown: stop the prune ticker, flush and +// close the open writers, then close the WAL and reader. Returns the first +// non-nil error encountered along the way. func (c *Coordinator) handleClose(req closeReq) { c.stopPruneTicker() if err := c.flushOpenFile(); err != nil { @@ -145,6 +180,9 @@ func (c *Coordinator) handleClose(req closeReq) { req.resp <- nil } +// handleSimulateCrash drops in-memory writer state without flushing — the +// open parquet files remain truncated/partial on disk so subsequent recovery +// paths can be exercised. Test-only. func (c *Coordinator) handleSimulateCrash(req simulateCrashReq) { c.stopPruneTicker() if c.receiptFile != nil { @@ -215,6 +253,10 @@ func (c *Coordinator) writeReceipts(height uint64, inputs []parquet.ReceiptInput return nil } +// applyReceipt stages a single receipt into the open parquet writer's +// in-memory buffers and the temp write cache, lazily creating writers if this +// is the first receipt for the current file. Triggers a flush when +// blocksSinceFlush has reached BlockFlushInterval. func (c *Coordinator) applyReceipt(blockNumber uint64, input parquet.ReceiptInput) error { if c.receiptWriter == nil { aligned := alignedFileStartBlock(blockNumber, c.config.MaxBlocksPerFile) @@ -254,6 +296,9 @@ func (c *Coordinator) applyReceipt(blockNumber uint64, input parquet.ReceiptInpu return nil } +// cachedReceiptByTxHash returns the earliest cached receipt for txHash, or +// nil if the temp write cache has no entry. Used to serve reads for receipts +// that are still buffered and not yet flushed to a closed parquet file. func (c *Coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.ReceiptResult { entries := c.tempWriteCache[txHash] if len(entries) == 0 { @@ -262,6 +307,8 @@ func (c *Coordinator) cachedReceiptByTxHash(txHash common.Hash) *parquet.Receipt return receiptResultFromTemp(txHash, entries[0]) } +// cachedReceiptByTxHashInBlock returns the cached receipt for txHash at the +// given block, or nil if the temp write cache has no matching entry. func (c *Coordinator) cachedReceiptByTxHashInBlock(txHash common.Hash, blockNumber uint64) *parquet.ReceiptResult { for _, entry := range c.tempWriteCache[txHash] { if entry.blockNumber == blockNumber { @@ -271,6 +318,8 @@ func (c *Coordinator) cachedReceiptByTxHashInBlock(txHash common.Hash, blockNumb return nil } +// receiptResultFromTemp converts a tempReceipt cache entry into the public +// ReceiptResult shape, copying byte slices to decouple from cache storage. func receiptResultFromTemp(txHash common.Hash, entry tempReceipt) *parquet.ReceiptResult { return &parquet.ReceiptResult{ TxHash: append([]byte(nil), txHash[:]...), @@ -279,6 +328,8 @@ func receiptResultFromTemp(txHash common.Hash, entry tempReceipt) *parquet.Recei } } +// receiptFilesSnapshot returns the receipt parquet paths for all closed +// files. Reads use this list as the file set for full-range queries. func (c *Coordinator) receiptFilesSnapshot() []string { files := make([]string, 0, len(c.closedFiles)) for _, f := range c.closedFiles { @@ -287,6 +338,9 @@ func (c *Coordinator) receiptFilesSnapshot() []string { return files } +// receiptFileSnapshotForBlock returns the single closed receipt file whose +// start block is the largest one not exceeding blockNumber, or nil if no +// such file exists. Used to narrow point lookups by block. func (c *Coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { var best string for _, f := range c.closedFiles { @@ -301,6 +355,9 @@ func (c *Coordinator) receiptFileSnapshotForBlock(blockNumber uint64) []string { return []string{best} } +// logFilesSnapshot returns the log parquet paths for all closed files. Log +// queries use this list as the file set, which the Reader further narrows +// by from/to-block range. func (c *Coordinator) logFilesSnapshot() []string { files := make([]string, 0, len(c.closedFiles)) for _, f := range c.closedFiles { @@ -309,10 +366,15 @@ func (c *Coordinator) logFilesSnapshot() []string { return files } +// isRotationBoundary reports whether blockNumber lands on a MaxBlocksPerFile +// boundary, in which case the open parquet file should rotate before this +// block's receipts are written. func (c *Coordinator) isRotationBoundary(blockNumber uint64) bool { return c.config.MaxBlocksPerFile > 0 && blockNumber%c.config.MaxBlocksPerFile == 0 } +// alignedFileStartBlock floors blockNumber to the nearest multiple of +// maxBlocksPerFile, used to derive a parquet file's start-block name. func alignedFileStartBlock(blockNumber, maxBlocksPerFile uint64) uint64 { if maxBlocksPerFile == 0 { return blockNumber @@ -320,6 +382,9 @@ func alignedFileStartBlock(blockNumber, maxBlocksPerFile uint64) uint64 { return (blockNumber / maxBlocksPerFile) * maxBlocksPerFile } +// initWriters creates the receipt and log parquet files for the current +// fileStartBlock and constructs sorted parquet writers over them. If the log +// file fails to open, the receipt file is closed before returning. func (c *Coordinator) initWriters() error { receiptPath := filepath.Join(c.basePath, fmt.Sprintf("receipts_%d.parquet", c.fileStartBlock)) logPath := filepath.Join(c.basePath, fmt.Sprintf("logs_%d.parquet", c.fileStartBlock)) @@ -357,6 +422,9 @@ func (c *Coordinator) initWriters() error { return nil } +// rotateOpenFile closes the current parquet file pair, opens a new one +// starting at newBlockNumber, truncates the WAL up to (but preserving) the +// most recent entry, and drops cached entries that are now durably stored. func (c *Coordinator) rotateOpenFile(newBlockNumber uint64) error { if err := c.rotateOpenFileWithoutWAL(newBlockNumber); err != nil { return err @@ -373,6 +441,9 @@ func (c *Coordinator) rotateOpenFile(newBlockNumber uint64) error { return nil } +// rotateOpenFileWithoutWAL performs the file-side rotation steps (flush, +// close, register closed pair, open new pair) without touching the WAL. +// Used during replay where the WAL drives rotation timing externally. func (c *Coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { if c.receiptWriter == nil { return nil @@ -408,6 +479,9 @@ func (c *Coordinator) rotateOpenFileWithoutWAL(newBlockNumber uint64) error { return nil } +// dropTempCacheBefore evicts temp-cache entries for blocks below +// blockNumber, freeing memory once those receipts are durably persisted in +// a closed parquet file. func (c *Coordinator) dropTempCacheBefore(blockNumber uint64) { for txHash, entries := range c.tempWriteCache { kept := entries[:0] @@ -443,6 +517,8 @@ func (c *Coordinator) observeBlock(height uint64) error { return nil } +// flushOpenFile drains the in-memory receipt and log buffers into the open +// parquet writers and forces them to disk. No-op when nothing is buffered. func (c *Coordinator) flushOpenFile() error { if len(c.receiptsBuffer) == 0 { return nil @@ -487,6 +563,9 @@ func (c *Coordinator) flushOpenFile() error { return nil } +// closeWriters finalizes the parquet writers (writing the trailer/footer) +// and fsync+closes the underlying files. All errors encountered are +// collected and returned together so partial cleanup still happens. func (c *Coordinator) closeWriters() error { var errs []error diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go index 74a28b0696..47ade32ac5 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/prune.go @@ -11,6 +11,10 @@ var ( logger = seilog.NewLogger("db", "ledger-db", "parquet-v2") ) +// pruneOldFiles deletes closed parquet pairs whose entire block range falls +// below pruneBeforeBlock. A pair stays in the list if either of its files +// fails to delete, so a transient error doesn't desync c.closedFiles from +// disk. Returns the number of pairs successfully removed. func (c *Coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { if len(c.closedFiles) == 0 { return 0 @@ -40,6 +44,9 @@ func (c *Coordinator) pruneOldFiles(pruneBeforeBlock uint64) int { return prunedCount } +// shouldPruneClosedFile reports whether the file's full block range +// (startBlock + MaxBlocksPerFile) lies entirely below pruneBeforeBlock. +// Saturates on overflow rather than wrapping. func (c *Coordinator) shouldPruneClosedFile(f closedFile, pruneBeforeBlock uint64) bool { fileEndBlock := f.startBlock + c.config.MaxBlocksPerFile if fileEndBlock < f.startBlock { @@ -48,6 +55,8 @@ func (c *Coordinator) shouldPruneClosedFile(f closedFile, pruneBeforeBlock uint6 return fileEndBlock <= pruneBeforeBlock } +// removePrunedFile deletes path. Treats "already gone" as success and logs +// any other failure. The package var removeFile lets tests inject failures. func removePrunedFile(path string) bool { if path == "" { return true diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go index ab31f91dbf..0946228ae5 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/reader.go @@ -21,6 +21,10 @@ type Reader struct { maxBlocksPerFile uint64 } +// NewReaderWithMaxBlocksPerFile constructs a Reader backed by an in-process +// DuckDB connection tuned for parquet scans. maxBlocksPerFile=0 falls back +// to the default; the value is used when narrowing log queries by block +// range from the file name alone. func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*Reader, error) { if maxBlocksPerFile == 0 { maxBlocksPerFile = parquet.DefaultStoreConfig().MaxBlocksPerFile @@ -61,10 +65,15 @@ func NewReaderWithMaxBlocksPerFile(basePath string, maxBlocksPerFile uint64) (*R }, nil } +// setMaxBlocksPerFile updates the rotation interval used to derive each +// file's covered block range. Called by the coordinator when configuration +// changes at runtime in tests. func (r *Reader) setMaxBlocksPerFile(maxBlocksPerFile uint64) { r.maxBlocksPerFile = maxBlocksPerFile } +// Close shuts down the DuckDB connection pool. Safe to call on a nil Reader +// or after a previous Close. func (r *Reader) Close() error { if r == nil || r.db == nil { return nil @@ -74,6 +83,8 @@ func (r *Reader) Close() error { return err } +// QueryReceiptByTxHash returns the lowest-block receipt for txHash across +// the supplied parquet files, or (nil, nil) if none of them contain it. func (r *Reader) QueryReceiptByTxHash(ctx context.Context, files []string, txHash common.Hash) (*parquet.ReceiptResult, error) { if len(files) == 0 { return nil, nil @@ -101,6 +112,8 @@ func (r *Reader) QueryReceiptByTxHash(ctx context.Context, files []string, txHas return &rec, nil } +// QueryReceiptByTxHashInBlock returns the receipt for txHash at exactly +// blockNumber, or (nil, nil) if no such receipt exists in files. func (r *Reader) QueryReceiptByTxHashInBlock(ctx context.Context, files []string, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { if len(files) == 0 { return nil, nil @@ -127,6 +140,8 @@ func (r *Reader) QueryReceiptByTxHashInBlock(ctx context.Context, files []string return &rec, nil } +// QueryLogs returns logs matching filter from files. Files outside the +// from/to-block window are dropped before the SQL query is built. func (r *Reader) QueryLogs(ctx context.Context, files []string, filter parquet.LogFilter) ([]parquet.LogResult, error) { files = r.filterLogFiles(files, filter) if len(files) == 0 { @@ -135,6 +150,9 @@ func (r *Reader) QueryLogs(ctx context.Context, files []string, filter parquet.L return r.queryLogFiles(ctx, files, filter) } +// filterLogFiles drops files whose block range cannot overlap the filter's +// [FromBlock, ToBlock] window, computed from the start block in the file +// name and maxBlocksPerFile. func (r *Reader) filterLogFiles(files []string, filter parquet.LogFilter) []string { filtered := make([]string, 0, len(files)) for _, f := range files { @@ -150,6 +168,9 @@ func (r *Reader) filterLogFiles(files []string, filter parquet.LogFilter) []stri return filtered } +// queryLogFiles builds and executes the parametrized DuckDB query that +// applies block, address, and per-position topic predicates, and decodes +// the result rows into parquet.LogResult values. func (r *Reader) queryLogFiles(ctx context.Context, files []string, filter parquet.LogFilter) ([]parquet.LogResult, error) { // #nosec G201 -- parquetFiles derived from coordinator-owned local file paths. query := fmt.Sprintf(` @@ -236,6 +257,9 @@ func (r *Reader) queryLogFiles(ctx context.Context, files []string, filter parqu return results, rows.Err() } +// MaxReceiptBlockNumber returns the largest block_number observed across +// files. The boolean is false when files is empty or contains no rows; +// negative values surface as an error. func (r *Reader) MaxReceiptBlockNumber(ctx context.Context, files []string) (uint64, bool, error) { if len(files) == 0 { return 0, false, nil @@ -264,12 +288,18 @@ func (r *Reader) MaxReceiptBlockNumber(ctx context.Context, files []string) (uin return uint64(max.Int64), true, nil } +// isFileReadable probes a parquet file by issuing a "SELECT 1 LIMIT 1" +// against it. A failure typically indicates a truncated or corrupt file +// from a crash mid-flush. func (r *Reader) isFileReadable(path string) bool { // #nosec G201 -- path comes from local parquet file scans, not user input. _, err := r.db.Exec(fmt.Sprintf("SELECT 1 FROM read_parquet(%s) LIMIT 1", quoteSQLString(path))) return err == nil } +// configureParquetMetadataCache enables DuckDB's parquet metadata cache. It +// prefers the size-based knob (newer DuckDB) and falls back to the boolean +// toggle on older builds that don't recognize the size setting. func configureParquetMetadataCache(db *sql.DB) error { const sizeSetting = "SET parquet_metadata_cache_size = 500" if _, err := db.Exec(sizeSetting); err == nil { @@ -286,6 +316,8 @@ func configureParquetMetadataCache(db *sql.DB) error { return nil } +// joinQuoted SQL-quotes each path and joins them with ", " for embedding in +// a DuckDB read_parquet([...]) list literal. func joinQuoted(files []string) string { quoted := make([]string, len(files)) for i, f := range files { @@ -294,10 +326,14 @@ func joinQuoted(files []string) string { return strings.Join(quoted, ", ") } +// quoteSQLString wraps s in single quotes and escapes embedded quotes for +// safe inclusion in a DuckDB SQL string literal. func quoteSQLString(s string) string { return "'" + strings.ReplaceAll(s, "'", "''") + "'" } +// parquetFilesSQL renders files as either a single quoted path or a quoted +// list, in either case suitable as the first argument to read_parquet(). func parquetFilesSQL(files []string) string { if len(files) == 1 { return quoteSQLString(files[0]) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 52819765ea..9a43deadd4 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -7,26 +7,37 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) +// coordRequest is the sealed-union marker for messages sent to the +// coordinator goroutine. Each concrete request type lives below and carries +// its own per-request reply channel. type coordRequest interface { isCoordRequest() } +// writeReq asks the coordinator to append receipts for a block. height is +// authoritative; per-input BlockNumber is ignored. type writeReq struct { height uint64 inputs []parquet.ReceiptInput resp chan writeResp } +// writeResp carries the outcome of a writeReq. type writeResp struct { err error } +// readByTxHashReq asks the coordinator to look up the earliest receipt for +// txHash. The temp write cache is consulted first, then closed parquet +// files. type readByTxHashReq struct { ctx context.Context txHash common.Hash resp chan readReceiptResp } +// readByTxHashInBlockReq asks for the receipt at exactly blockNumber, used +// to disambiguate replayed transactions across reorgs. type readByTxHashInBlockReq struct { ctx context.Context txHash common.Hash @@ -34,78 +45,108 @@ type readByTxHashInBlockReq struct { resp chan readReceiptResp } +// readReceiptResp carries the outcome of a receipt read. result==nil with +// err==nil indicates "not found". type readReceiptResp struct { result *parquet.ReceiptResult err error } +// getLogsReq asks the coordinator for logs matching filter across the +// closed log parquet files. type getLogsReq struct { ctx context.Context filter parquet.LogFilter resp chan getLogsResp } +// getLogsResp carries the outcome of a getLogsReq. type getLogsResp struct { results []parquet.LogResult err error } +// flushReq asks the coordinator to flush buffered receipts/logs to the open +// parquet file. type flushReq struct { resp chan error } +// latestVersionReq asks for the highest block height observed by the +// coordinator. type latestVersionReq struct { resp chan int64 } +// setLatestVersionReq overwrites latestVersion. Used when a caller knows +// the chain height authoritatively (e.g., genesis init). type setLatestVersionReq struct { version int64 resp chan error } +// setEarliestVersionReq records the lowest retained block height for +// pruning bookkeeping. type setEarliestVersionReq struct { version int64 resp chan error } +// updateLatestVersionReq advances latestVersion only when version is +// strictly greater, preventing rewinds. type updateLatestVersionReq struct { version int64 resp chan error } +// fileStartBlockReq asks for the start block of the currently open parquet +// file. type fileStartBlockReq struct { resp chan uint64 } +// setBlockFlushIntervalReq updates how often (in blocks) the open writers +// are flushed to disk. type setBlockFlushIntervalReq struct { interval uint64 resp chan error } +// setMaxBlocksPerFileReq updates the rotation interval and propagates it +// to the reader. type setMaxBlocksPerFileReq struct { maxBlocksPerFile uint64 resp chan error } +// setFaultHooksReq installs test hooks. nil disables all hook checks. type setFaultHooksReq struct { hooks *parquet.FaultHooks resp chan error } +// replayWALReq drives WAL replay using converter to decode receipt bytes +// into per-block records. type replayWALReq struct { converter WALReceiptConverter resp chan replayWALResp } +// replayWALResp carries the recovered records and per-block tx hashes +// produced by replayWAL. type replayWALResp struct { result ReplayResult err error } +// simulateCrashReq drops in-memory writer state without flushing so that +// recovery paths can be exercised. Test-only. type simulateCrashReq struct { resp chan struct{} } +// closeReq triggers a graceful shutdown: flush, close writers, close WAL +// and reader. type closeReq struct { resp chan error } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go index 0a087d2d98..9b3f36cc9a 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go @@ -13,18 +13,30 @@ import ( // stopped accepting work. var ErrStoreClosed = errors.New("store closed") +// tempReceipt is one entry in the in-memory write cache, indexed by tx +// hash. It carries enough to reconstruct a ReceiptResult for reads served +// before the receipt has been flushed to a parquet file. type tempReceipt struct { blockNumber uint64 receiptBytes []byte } +// ReplayedBlock summarizes one block recovered from WAL replay: the block +// number and the tx hashes whose receipts were replayed in order. type ReplayedBlock struct { BlockNumber uint64 TxHashes []common.Hash } +// WALReceiptConverter decodes a raw WAL receipt blob into the structured +// fields the coordinator needs to re-stage it. logStartIndex carries the +// running per-block log offset so logs from earlier txs in the same block +// don't collide. type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) +// ReplayReceipt is one converted WAL entry: the receipt input to re-stage, +// its tx hash, the warmup record returned to the wrapper, and the log +// count consumed (used to advance logStartIndex). type ReplayReceipt struct { Input parquet.ReceiptInput TxHash common.Hash @@ -32,11 +44,16 @@ type ReplayReceipt struct { LogCount uint } +// ReplayResult is the outcome of a successful WAL replay: warmup records +// to seed external caches, plus the per-block tx hash listing. type ReplayResult struct { WarmupRecords []parquet.ReceiptRecord Blocks []ReplayedBlock } +// int64FromUint64 converts value to int64 or errors on overflow. Used at +// the boundary where block heights cross from internal uint64 storage to +// the sdk-style int64 latestVersion. func int64FromUint64(value uint64) (int64, error) { if value > uint64(math.MaxInt64) { return 0, fmt.Errorf("value %d overflows int64", value) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go index 64a90fc30c..3187af7abe 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go @@ -8,6 +8,11 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" ) +// replayWAL re-applies WAL entries on top of the on-disk parquet state. It +// drives rotation when entries cross a MaxBlocksPerFile boundary (so the +// resulting layout matches what a non-crashing run would have produced), +// applies each receipt to the open writer, and finally truncates WAL +// entries that are now durably persisted. func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { if converter == nil { return ReplayResult{}, fmt.Errorf("WAL receipt converter is nil") @@ -116,6 +121,9 @@ func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, er return result, nil } +// applyReceiptFromReplay is the replay-time variant of applyReceipt: it +// rotates without writing to the WAL (the WAL is the source of replay) and +// drops temp-cache entries from the just-closed file's range. func (c *Coordinator) applyReceiptFromReplay(blockNumber uint64, input parquet.ReceiptInput) error { if c.receiptWriter != nil && blockNumber != c.lastSeenBlock && c.isRotationBoundary(blockNumber) { if err := c.rotateOpenFileWithoutWAL(blockNumber); err != nil { @@ -126,6 +134,9 @@ func (c *Coordinator) applyReceiptFromReplay(blockNumber uint64, input parquet.R return c.applyReceipt(blockNumber, input) } +// normalizeReplayInput backfills the ReceiptInput fields that the converter +// may have left empty (block number, tx hash, and the receipt byte +// payloads), so downstream apply code doesn't need replay-aware branches. func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed ReplayReceipt) parquet.ReceiptInput { input := replayed.Input input.Receipt.BlockNumber = blockNumber @@ -141,6 +152,8 @@ func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed Repl return input } +// copyReceiptRecord returns a deep copy of record so callers can retain it +// without aliasing the converter's internal buffers. func copyReceiptRecord(record parquet.ReceiptRecord) parquet.ReceiptRecord { return parquet.ReceiptRecord{ TxHash: append([]byte(nil), record.TxHash...), @@ -149,6 +162,9 @@ func copyReceiptRecord(record parquet.ReceiptRecord) parquet.ReceiptRecord { } } +// clearWALPreservingLast truncates the WAL up to (but not including) its +// last offset after a rotation. The final entry is retained so that crash +// recovery can still observe the rotation boundary. func (c *Coordinator) clearWALPreservingLast() error { if c.wal == nil { return nil @@ -179,6 +195,9 @@ func (c *Coordinator) clearWALPreservingLast() error { return nil } +// truncateReplayWAL drops WAL entries up to and including dropOffset after +// a successful replay. Out-of-range errors from the underlying WAL are +// treated as no-ops since they mean nothing was left to truncate. func truncateReplayWAL(w interface{ TruncateBefore(offset uint64) error }, dropOffset uint64) error { if dropOffset == 0 { return nil diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index 294a61a45c..bdb5590fa5 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -10,6 +10,12 @@ import ( // Store is the public facade of the V2 parquet receipt store. It wraps a // coordinator.Coordinator and forwards all calls to it. +// +// Store does not directly implement the receipt.ReceiptStore interface; +// the parquetReceiptStoreV2 wrapper in the parent package adapts Store to +// that interface (handling tx-hash indexing, replay glue, and the higher +// level ReceiptStore method shapes). Methods here are documented inline +// rather than via a parent interface. type Store struct { coord *coordinator.Coordinator } @@ -23,70 +29,102 @@ func NewStore(cfg parquet.StoreConfig) (*Store, error) { return &Store{coord: c}, nil } +// WriteReceipts appends receipts for the block at height. height is +// authoritative; any BlockNumber on individual inputs is ignored. func (s *Store) WriteReceipts(height uint64, inputs []parquet.ReceiptInput) error { return s.coord.WriteReceipts(height, inputs) } +// GetReceiptByTxHash returns the earliest receipt for txHash, or +// (nil, nil) if none is found. func (s *Store) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { return s.coord.GetReceiptByTxHash(ctx, txHash) } +// GetReceiptByTxHashInBlock returns the receipt for txHash at exactly +// blockNumber, or (nil, nil) if no such receipt exists. func (s *Store) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { return s.coord.GetReceiptByTxHashInBlock(ctx, txHash, blockNumber) } +// GetLogs returns all logs matching filter across the closed log parquet +// files. func (s *Store) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { return s.coord.GetLogs(ctx, filter) } +// FileStartBlock returns the start block of the currently open parquet +// file (the next file's name will be derived from this). func (s *Store) FileStartBlock() uint64 { return s.coord.FileStartBlock() } +// LatestVersion returns the highest block height the store has observed. func (s *Store) LatestVersion() int64 { return s.coord.LatestVersion() } +// SetLatestVersion overwrites latestVersion. Used during init paths where +// the chain height is known authoritatively. func (s *Store) SetLatestVersion(version int64) { s.coord.SetLatestVersion(version) } +// SetEarliestVersion records the lowest retained block height for pruning +// bookkeeping. func (s *Store) SetEarliestVersion(version int64) { s.coord.SetEarliestVersion(version) } +// UpdateLatestVersion advances latestVersion only when version is strictly +// greater than the current value, preventing accidental rewinds. func (s *Store) UpdateLatestVersion(version int64) { s.coord.UpdateLatestVersion(version) } +// CacheRotateInterval returns the cache rotation interval (configured +// MaxBlocksPerFile) used by the wrapper to manage warmup state. func (s *Store) CacheRotateInterval() uint64 { return s.coord.CacheRotateInterval() } +// Flush forces buffered receipts/logs in the open parquet file to disk. func (s *Store) Flush() error { return s.coord.Flush() } +// Close performs a graceful shutdown, flushing and closing all writers, +// the WAL, and the reader. func (s *Store) Close() error { return s.coord.Close() } +// SimulateCrash drops in-memory writer state without flushing. Test-only; +// used to exercise WAL recovery in the surrounding test suite. func (s *Store) SimulateCrash() { s.coord.SimulateCrash() } +// SetBlockFlushInterval updates how often (in blocks) the open writers are +// flushed to disk. func (s *Store) SetBlockFlushInterval(interval uint64) { s.coord.SetBlockFlushInterval(interval) } +// SetMaxBlocksPerFile updates the rotation interval and propagates it to +// the reader. func (s *Store) SetMaxBlocksPerFile(n uint64) { s.coord.SetMaxBlocksPerFile(n) } +// SetFaultHooks installs the supplied test hooks. nil disables all hook +// checks. func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { s.coord.SetFaultHooks(hooks) } +// ReplayWAL drives WAL replay using converter to decode receipt bytes, +// returning the recovered records and per-block tx hashes. func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { return s.coord.ReplayWAL(converter) } From 9ffb2f125e6c68f7b2dd6e0acfa378dcbf95d48a Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 10:03:37 -0400 Subject: [PATCH 20/27] move prune ticker into run() as local var Replace the pruneTicker/pruneTick struct fields and stopPruneTicker() helper with a local ticker in run() guarded by defer ticker.Stop(). The defer covers all three exit paths (done, simulateCrashReq, closeReq), so the explicit stopPruneTicker() calls in handleClose and handleSimulateCrash are no longer needed. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 31 ++++++------------- .../parquet_v2/coordinator/handlers.go | 8 ++--- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 0e9f55b372..acda7d5b1f 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -24,11 +24,9 @@ type closedFile struct { // its requests channel. Construct with New; interact through the typed // methods (WriteReceipts, GetLogs, ...). type Coordinator struct { - requests chan coordRequest - pruneTick <-chan time.Time - pruneTicker *time.Ticker - done chan struct{} - closeOnce sync.Once + requests chan coordRequest + done chan struct{} + closeOnce sync.Once config parquet.StoreConfig @@ -127,11 +125,6 @@ func New(cfg parquet.StoreConfig) (*Coordinator, error) { } } - if storeCfg.KeepRecent > 0 && storeCfg.PruneIntervalSeconds > 0 { - c.pruneTicker = time.NewTicker(time.Duration(storeCfg.PruneIntervalSeconds) * time.Second) - c.pruneTick = c.pruneTicker.C - } - go c.run() cleanupReader = false cleanupWAL = false @@ -157,6 +150,12 @@ func resolveStoreConfig(cfg parquet.StoreConfig) parquet.StoreConfig { } func (c *Coordinator) run() { + var pruneTick <-chan time.Time + if c.config.KeepRecent > 0 && c.config.PruneIntervalSeconds > 0 { + ticker := time.NewTicker(time.Duration(c.config.PruneIntervalSeconds) * time.Second) + defer ticker.Stop() + pruneTick = ticker.C + } for { select { case req := <-c.requests: @@ -198,24 +197,14 @@ func (c *Coordinator) run() { default: panic(fmt.Sprintf("coordinator: unrecognized request type %T", r)) } - case <-c.pruneTick: + case <-pruneTick: c.handlePruneTick() case <-c.done: - c.stopPruneTicker() return } } } -func (c *Coordinator) stopPruneTicker() { - if c.pruneTicker == nil { - return - } - c.pruneTicker.Stop() - c.pruneTicker = nil - c.pruneTick = nil -} - // WriteReceipts records a committed block. inputs may be empty, in which case // the call only advances rotation/cursor state — equivalent to the former // ObserveEmptyBlock. height is authoritative; inputs[i].BlockNumber is diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index ab2d11fe3c..62f48352f4 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -150,11 +150,10 @@ func (c *Coordinator) handlePruneTick() { c.pruneOldFiles(uint64(pruneBeforeBlock)) } -// handleClose performs a graceful shutdown: stop the prune ticker, flush and -// close the open writers, then close the WAL and reader. Returns the first -// non-nil error encountered along the way. +// handleClose performs a graceful shutdown: flush and close the open writers, +// then close the WAL and reader. Returns the first non-nil error encountered +// along the way. The prune ticker is stopped via defer in run(). func (c *Coordinator) handleClose(req closeReq) { - c.stopPruneTicker() if err := c.flushOpenFile(); err != nil { req.resp <- err return @@ -184,7 +183,6 @@ func (c *Coordinator) handleClose(req closeReq) { // open parquet files remain truncated/partial on disk so subsequent recovery // paths can be exercised. Test-only. func (c *Coordinator) handleSimulateCrash(req simulateCrashReq) { - c.stopPruneTicker() if c.receiptFile != nil { _ = c.receiptFile.Close() c.receiptFile = nil From 49602ed644e572ee7b81bfc27ad016da7dd9ddb2 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 10:29:53 -0400 Subject: [PATCH 21/27] use vistor pattern for dispatch --- .../parquet_v2/coordinator/coordinator.go | 37 +--------- .../parquet_v2/coordinator/requests.go | 68 ++++++++++++++----- 2 files changed, 51 insertions(+), 54 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index acda7d5b1f..48bb97edb5 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -159,43 +159,8 @@ func (c *Coordinator) run() { for { select { case req := <-c.requests: - switch r := req.(type) { - case writeReq: - c.handleWrite(r) - case readByTxHashReq: - c.handleReadByTxHash(r) - case readByTxHashInBlockReq: - c.handleReadByTxHashInBlock(r) - case getLogsReq: - c.handleGetLogs(r) - case flushReq: - c.handleFlush(r) - case latestVersionReq: - c.handleLatestVersion(r) - case setLatestVersionReq: - c.handleSetLatestVersion(r) - case setEarliestVersionReq: - c.handleSetEarliestVersion(r) - case updateLatestVersionReq: - c.handleUpdateLatestVersion(r) - case fileStartBlockReq: - c.handleFileStartBlock(r) - case setBlockFlushIntervalReq: - c.handleSetBlockFlushInterval(r) - case setMaxBlocksPerFileReq: - c.handleSetMaxBlocksPerFile(r) - case setFaultHooksReq: - c.handleSetFaultHooks(r) - case replayWALReq: - c.handleReplayWAL(r) - case simulateCrashReq: - c.handleSimulateCrash(r) + if req.dispatch(c) { return - case closeReq: - c.handleClose(r) - return - default: - panic(fmt.Sprintf("coordinator: unrecognized request type %T", r)) } case <-pruneTick: c.handlePruneTick() diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 9a43deadd4..456c10a5de 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -9,9 +9,11 @@ import ( // coordRequest is the sealed-union marker for messages sent to the // coordinator goroutine. Each concrete request type lives below and carries -// its own per-request reply channel. +// its own per-request reply channel. dispatch invokes the appropriate +// handler on c and returns true when run() should exit afterward +// (graceful close, simulated crash). type coordRequest interface { - isCoordRequest() + dispatch(c *Coordinator) (terminate bool) } // writeReq asks the coordinator to append receipts for a block. height is @@ -151,19 +153,49 @@ type closeReq struct { resp chan error } -func (writeReq) isCoordRequest() {} -func (readByTxHashReq) isCoordRequest() {} -func (readByTxHashInBlockReq) isCoordRequest() {} -func (getLogsReq) isCoordRequest() {} -func (flushReq) isCoordRequest() {} -func (latestVersionReq) isCoordRequest() {} -func (setLatestVersionReq) isCoordRequest() {} -func (setEarliestVersionReq) isCoordRequest() {} -func (updateLatestVersionReq) isCoordRequest() {} -func (fileStartBlockReq) isCoordRequest() {} -func (setBlockFlushIntervalReq) isCoordRequest() {} -func (setMaxBlocksPerFileReq) isCoordRequest() {} -func (setFaultHooksReq) isCoordRequest() {} -func (replayWALReq) isCoordRequest() {} -func (simulateCrashReq) isCoordRequest() {} -func (closeReq) isCoordRequest() {} +func (r writeReq) dispatch(c *Coordinator) bool { c.handleWrite(r); return false } +func (r readByTxHashReq) dispatch(c *Coordinator) bool { c.handleReadByTxHash(r); return false } +func (r readByTxHashInBlockReq) dispatch(c *Coordinator) bool { + c.handleReadByTxHashInBlock(r) + return false +} +func (r getLogsReq) dispatch(c *Coordinator) bool { c.handleGetLogs(r); return false } +func (r flushReq) dispatch(c *Coordinator) bool { c.handleFlush(r); return false } +func (r latestVersionReq) dispatch(c *Coordinator) bool { + c.handleLatestVersion(r) + return false +} +func (r setLatestVersionReq) dispatch(c *Coordinator) bool { + c.handleSetLatestVersion(r) + return false +} +func (r setEarliestVersionReq) dispatch(c *Coordinator) bool { + c.handleSetEarliestVersion(r) + return false +} +func (r updateLatestVersionReq) dispatch(c *Coordinator) bool { + c.handleUpdateLatestVersion(r) + return false +} +func (r fileStartBlockReq) dispatch(c *Coordinator) bool { + c.handleFileStartBlock(r) + return false +} +func (r setBlockFlushIntervalReq) dispatch(c *Coordinator) bool { + c.handleSetBlockFlushInterval(r) + return false +} +func (r setMaxBlocksPerFileReq) dispatch(c *Coordinator) bool { + c.handleSetMaxBlocksPerFile(r) + return false +} +func (r setFaultHooksReq) dispatch(c *Coordinator) bool { + c.handleSetFaultHooks(r) + return false +} +func (r replayWALReq) dispatch(c *Coordinator) bool { c.handleReplayWAL(r); return false } +func (r simulateCrashReq) dispatch(c *Coordinator) bool { + c.handleSimulateCrash(r) + return true +} +func (r closeReq) dispatch(c *Coordinator) bool { c.handleClose(r); return true } From 37ef152fe98307336a85e0bf76d1ece8db0004a0 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 10:58:54 -0400 Subject: [PATCH 22/27] run WAL replay synchronously inside parquet_v2 NewStore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the public ReplayWAL methods with construction-time replay driven by a ReplayHooks struct (converter + per-block tx-index callback). Wrappers now open the tx-hash index first, build the hooks closure, and pass it into NewStore — by the time NewStore returns, parquet state, tx index, and warmup records are all hot. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 43 ++++-- .../parquet_v2/coordinator/handlers.go | 7 - .../parquet_v2/coordinator/requests.go | 15 -- .../receipt/parquet_v2/coordinator/types.go | 11 ++ sei-db/ledger_db/receipt/parquet_v2/store.go | 17 ++- .../receipt/parquet_v2/store_dispatch_test.go | 6 +- .../receipt/parquet_v2/store_init_test.go | 12 +- .../receipt/parquet_v2/store_read_test.go | 10 +- .../receipt/parquet_v2/store_rotation_test.go | 6 +- .../receipt/parquet_v2/store_wal_test.go | 31 ---- .../receipt/parquet_v2/store_write_test.go | 4 +- sei-db/ledger_db/receipt/parquet_v2/types.go | 1 + .../receipt/parquet_v2_receipt_store.go | 143 ++++++++---------- 13 files changed, 139 insertions(+), 167 deletions(-) delete mode 100644 sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 48bb97edb5..59fda22221 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -52,11 +52,19 @@ type Coordinator struct { wal dbwal.GenericWAL[parquet.WALEntry] reader *Reader + + warmupRecords []parquet.ReceiptRecord } -// New constructs a Coordinator with a live goroutine. The returned -// Coordinator is ready to accept requests via its typed methods. -func New(cfg parquet.StoreConfig) (*Coordinator, error) { +// New constructs a Coordinator and drives WAL replay synchronously before +// starting the request goroutine, so the returned Coordinator already +// reflects any persisted-but-uncheckpointed receipts. Pass hooks.Converter +// nil to skip auto-replay (used by tests that exercise replayWAL +// directly). When hooks.OnReplayedBlock is set, it is invoked per +// recovered block after receipts have been re-applied — the wrapper uses +// this to repopulate its tx-hash index. Recovered warmup records are +// retained and drained by the caller via WarmupRecords. +func New(cfg parquet.StoreConfig, hooks ReplayHooks) (*Coordinator, error) { storeCfg := resolveStoreConfig(cfg) if err := os.MkdirAll(storeCfg.DBDirectory, 0o750); err != nil { @@ -125,6 +133,21 @@ func New(cfg parquet.StoreConfig) (*Coordinator, error) { } } + if hooks.Converter != nil { + result, err := c.replayWAL(hooks.Converter) + if err != nil { + return nil, err + } + c.warmupRecords = result.WarmupRecords + if hooks.OnReplayedBlock != nil { + for _, rb := range result.Blocks { + if err := hooks.OnReplayedBlock(rb.BlockNumber, rb.TxHashes); err != nil { + return nil, err + } + } + } + } + go c.run() cleanupReader = false cleanupWAL = false @@ -295,13 +318,13 @@ func (c *Coordinator) SetFaultHooks(hooks *parquet.FaultHooks) { _ = awaitError(c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) } -func (c *Coordinator) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { - resp := make(chan replayWALResp, 1) - r, err := sendAndAwaitResponse(c, replayWALReq{converter: converter, resp: resp}, resp) - if err != nil { - return ReplayResult{}, err - } - return r.result, r.err +// WarmupRecords returns and clears the warmup receipt records recovered +// during construction-time WAL replay. Callers drain this once after +// construction to seed an external receipt cache. +func (c *Coordinator) WarmupRecords() []parquet.ReceiptRecord { + records := c.warmupRecords + c.warmupRecords = nil + return records } func sendAndAwaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 62f48352f4..07927869fe 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -127,13 +127,6 @@ func (c *Coordinator) handleSetFaultHooks(req setFaultHooksReq) { req.resp <- nil } -// handleReplayWAL drives WAL replay against the configured converter and -// returns the recovered records and per-block tx hashes to the caller. -func (c *Coordinator) handleReplayWAL(req replayWALReq) { - result, err := c.replayWAL(req.converter) - req.resp <- replayWALResp{result: result, err: err} -} - // handlePruneTick fires on the prune ticker and removes closed parquet pairs // whose end block falls below latestVersion - KeepRecent. func (c *Coordinator) handlePruneTick() { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go index 456c10a5de..9166442cce 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/requests.go @@ -127,20 +127,6 @@ type setFaultHooksReq struct { resp chan error } -// replayWALReq drives WAL replay using converter to decode receipt bytes -// into per-block records. -type replayWALReq struct { - converter WALReceiptConverter - resp chan replayWALResp -} - -// replayWALResp carries the recovered records and per-block tx hashes -// produced by replayWAL. -type replayWALResp struct { - result ReplayResult - err error -} - // simulateCrashReq drops in-memory writer state without flushing so that // recovery paths can be exercised. Test-only. type simulateCrashReq struct { @@ -193,7 +179,6 @@ func (r setFaultHooksReq) dispatch(c *Coordinator) bool { c.handleSetFaultHooks(r) return false } -func (r replayWALReq) dispatch(c *Coordinator) bool { c.handleReplayWAL(r); return false } func (r simulateCrashReq) dispatch(c *Coordinator) bool { c.handleSimulateCrash(r) return true diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go index 9b3f36cc9a..84b115620a 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go @@ -51,6 +51,17 @@ type ReplayResult struct { Blocks []ReplayedBlock } +// ReplayHooks bundles the wrapper-specific callbacks invoked during WAL +// replay at construction time. Converter decodes the raw WAL receipt blob; +// when nil, replay is skipped entirely (used by lower-level tests that +// drive replay manually). OnReplayedBlock, when non-nil, is called once +// per recovered block after its receipts have been re-applied — the +// wrapper uses this to re-populate its tx-hash index. +type ReplayHooks struct { + Converter WALReceiptConverter + OnReplayedBlock func(blockNumber uint64, txHashes []common.Hash) error +} + // int64FromUint64 converts value to int64 or errors on overflow. Used at // the boundary where block heights cross from internal uint64 storage to // the sdk-style int64 latestVersion. diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index bdb5590fa5..3d3ce61928 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -21,8 +21,12 @@ type Store struct { } // NewStore creates a V2 store backed by a live coordinator goroutine. -func NewStore(cfg parquet.StoreConfig) (*Store, error) { - c, err := coordinator.New(cfg) +// hooks.Converter, when non-nil, drives WAL replay synchronously before +// the store accepts other calls; production callers always supply one. +// Lower-level tests that drive replay manually pass a zero-value +// ReplayHooks. +func NewStore(cfg parquet.StoreConfig, hooks ReplayHooks) (*Store, error) { + c, err := coordinator.New(cfg, hooks) if err != nil { return nil, err } @@ -123,8 +127,9 @@ func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { s.coord.SetFaultHooks(hooks) } -// ReplayWAL drives WAL replay using converter to decode receipt bytes, -// returning the recovered records and per-block tx hashes. -func (s *Store) ReplayWAL(converter WALReceiptConverter) (ReplayResult, error) { - return s.coord.ReplayWAL(converter) +// WarmupRecords returns and clears the warmup receipt records recovered +// during construction-time WAL replay. Wrappers drain this once after +// NewStore returns to seed an external receipt cache. +func (s *Store) WarmupRecords() []parquet.ReceiptRecord { + return s.coord.WarmupRecords() } diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index d2fc046959..c8f58782f2 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -13,7 +13,7 @@ func newDispatchStore(t *testing.T) *Store { store, err := NewStore(parquet.StoreConfig{ DBDirectory: t.TempDir(), MaxBlocksPerFile: 4, - }) + }, ReplayHooks{}) require.NoError(t, err) t.Cleanup(func() { _ = store.Close() }) return store @@ -44,7 +44,7 @@ func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { } func TestCloseStopsFutureRequests(t *testing.T) { - store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}, ReplayHooks{}) require.NoError(t, err) require.NoError(t, store.Close()) @@ -53,7 +53,7 @@ func TestCloseStopsFutureRequests(t *testing.T) { } func TestSimulateCrashStopsFutureRequests(t *testing.T) { - store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}, ReplayHooks{}) require.NoError(t, err) store.SimulateCrash() diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go index 9ab401f309..fde83a8cb7 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go @@ -12,7 +12,7 @@ import ( func TestNewStoreCreatesDirectoryAndClosesIdempotently(t *testing.T) { dir := filepath.Join(t.TempDir(), "nested", "parquet") - store, err := NewStore(parquet.StoreConfig{DBDirectory: dir}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: dir}, ReplayHooks{}) require.NoError(t, err) require.DirExists(t, dir) require.DirExists(t, filepath.Join(dir, "parquet-wal")) @@ -30,7 +30,7 @@ func TestNewStoreSeedsLatestVersionFromClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 100, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, int64(123), store.LatestVersion()) require.Equal(t, uint64(124), store.FileStartBlock()) @@ -39,7 +39,7 @@ func TestNewStoreSeedsLatestVersionFromClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 100, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, int64(123), reopened.LatestVersion()) require.Equal(t, uint64(124), reopened.FileStartBlock()) @@ -59,7 +59,7 @@ func TestNewStoreRemovesCorruptTrailingPair(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }) + }, ReplayHooks{}) require.NoError(t, err) require.NoError(t, store.Close()) @@ -82,7 +82,7 @@ func TestNewStoreRemovesReceiptCounterpartForCorruptTrailingLog(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, int64(1), store.LatestVersion()) require.NoError(t, store.Close()) @@ -101,7 +101,7 @@ func TestNewStoreIgnoresUnmatchedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, int64(0), store.LatestVersion()) require.Equal(t, uint64(0), store.FileStartBlock()) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go index 2bf454d70a..ca564f3d62 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go @@ -18,7 +18,7 @@ func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }) + }, ReplayHooks{}) require.NoError(t, err) require.NoError(t, store.WriteReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, txHash), @@ -31,7 +31,7 @@ func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }) + }, ReplayHooks{}) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopened.Close()) }) @@ -54,7 +54,7 @@ func TestReadByTxHashAfterRotationUsesClosedFilesAndTempCache(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: t.TempDir(), MaxBlocksPerFile: 4, - }) + }, ReplayHooks{}) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, store.Close()) }) @@ -93,7 +93,7 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 4, - }) + }, ReplayHooks{}) require.NoError(t, err) for block := uint64(1); block <= 8; block++ { @@ -106,7 +106,7 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 4, - }) + }, ReplayHooks{}) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopened.Close()) }) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index 1eb6530d07..b22faf14e2 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -15,7 +15,7 @@ func TestLazyInitUsesAlignedStartForFirstOffBoundaryWrite(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }) + }, ReplayHooks{}) require.NoError(t, err) require.NoError(t, store.WriteReceipts(5234, []parquet.ReceiptInput{ @@ -39,7 +39,7 @@ func TestReopenLazyInitPreservesExistingAlignedFile(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, uint64(11), store.FileStartBlock()) @@ -62,7 +62,7 @@ func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }) + }, ReplayHooks{}) require.NoError(t, err) require.NoError(t, store.WriteReceipts(25, []parquet.ReceiptInput{ diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go deleted file mode 100644 index 3fb5b06dad..0000000000 --- a/sei-db/ledger_db/receipt/parquet_v2/store_wal_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package parquet_v2 - -import ( - "context" - "testing" - - "github.com/ethereum/go-ethereum/common" - "github.com/sei-protocol/sei-chain/sei-db/ledger_db/parquet" - "github.com/stretchr/testify/require" -) - -func TestReplayWALRequiresConverter(t *testing.T) { - store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) - require.NoError(t, err) - t.Cleanup(func() { require.NoError(t, store.Close()) }) - - _, err = store.ReplayWAL(nil) - require.ErrorContains(t, err, "converter is nil") -} - -func TestReplayWALPublicDispatch(t *testing.T) { - store := newDispatchStore(t) - _, err := store.ReplayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { - return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) - }) - require.NoError(t, err) - - result, err := store.GetReceiptByTxHash(context.Background(), common.HexToHash("0x1")) - require.NoError(t, err) - require.Nil(t, result) -} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index 4e4adfe224..cb40191f5c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -17,7 +17,7 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { MaxBlocksPerFile: 500, BlockFlushInterval: 100, PruneIntervalSeconds: 0, - }) + }, ReplayHooks{}) require.NoError(t, err) for block := uint64(1); block <= 3; block++ { @@ -32,7 +32,7 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { DBDirectory: dir, MaxBlocksPerFile: 500, PruneIntervalSeconds: 0, - }) + }, ReplayHooks{}) require.NoError(t, err) require.Equal(t, int64(3), reopened.LatestVersion()) require.Equal(t, uint64(4), reopened.FileStartBlock()) diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index ac7d7d09eb..7d56fc2aa1 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -6,6 +6,7 @@ type ( ReplayResult = coordinator.ReplayResult ReplayReceipt = coordinator.ReplayReceipt WALReceiptConverter = coordinator.WALReceiptConverter + ReplayHooks = coordinator.ReplayHooks ) var ErrStoreClosed = coordinator.ErrStoreClosed diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go index 3d65c27e36..861d91f803 100644 --- a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -15,11 +15,10 @@ import ( ) type parquetReceiptStoreV2 struct { - store *parquet_v2.Store - storeKey sdk.StoreKey - txHashIndex TxHashIndex - indexPruner *txHashIndexPruner - warmupRecords []parquet.ReceiptRecord + store *parquet_v2.Store + storeKey sdk.StoreKey + txHashIndex TxHashIndex + indexPruner *txHashIndexPruner } func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.StoreKey) (ReceiptStore, error) { @@ -29,51 +28,85 @@ func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.Stor parquetTxIndexBackend = "none" } + var txHashIndex TxHashIndex + switch txIndexBackend { + case dbconfig.ReceiptTxIndexBackendNone: + case dbconfig.ReceiptTxIndexBackendPebble: + idx, err := NewPebbleTxHashIndex(TxHashIndexDir(cfg.DBDirectory)) + if err != nil { + return nil, fmt.Errorf("failed to open tx hash index: %w", err) + } + txHashIndex = idx + default: + return nil, fmt.Errorf("unsupported receipt tx index backend: %s", txIndexBackend) + } + + hooks := parquet_v2.ReplayHooks{Converter: replayConverterV2} + if txHashIndex != nil { + hooks.OnReplayedBlock = func(blockNumber uint64, txHashes []common.Hash) error { + return txHashIndex.IndexBlock(context.Background(), blockNumber, txHashes) + } + } + store, err := parquet_v2.NewStore(parquet.StoreConfig{ DBDirectory: cfg.DBDirectory, KeepRecent: int64(cfg.KeepRecent), PruneIntervalSeconds: int64(cfg.PruneIntervalSeconds), TxIndexBackend: parquetTxIndexBackend, - }) + }, hooks) if err != nil { + if txHashIndex != nil { + _ = txHashIndex.Close() + } return nil, err } wrapper := &parquetReceiptStoreV2{ - store: store, - storeKey: storeKey, + store: store, + storeKey: storeKey, + txHashIndex: txHashIndex, } - - switch txIndexBackend { - case dbconfig.ReceiptTxIndexBackendNone: - case dbconfig.ReceiptTxIndexBackendPebble: - idx, err := NewPebbleTxHashIndex(TxHashIndexDir(cfg.DBDirectory)) - if err != nil { - _ = store.Close() - return nil, fmt.Errorf("failed to open tx hash index: %w", err) - } - wrapper.txHashIndex = idx + if txHashIndex != nil { wrapper.indexPruner = newTxHashIndexPruner( - idx, + txHashIndex, int64(cfg.KeepRecent), int64(cfg.PruneIntervalSeconds), func() int64 { return store.LatestVersion() }, ) - default: - _ = store.Close() - return nil, fmt.Errorf("unsupported receipt tx index backend: %s", txIndexBackend) + wrapper.indexPruner.Start() } - if err := wrapper.replayWAL(); err != nil { - _ = wrapper.Close() - return nil, err + return wrapper, nil +} + +func replayConverterV2(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet_v2.ReplayReceipt, error) { + receipt := &types.Receipt{} + if err := receipt.Unmarshal(receiptBytes); err != nil { + return parquet_v2.ReplayReceipt{}, err } - if wrapper.indexPruner != nil { - wrapper.indexPruner.Start() + txHash := common.HexToHash(receipt.TxHashHex) + blockHash := common.Hash{} + txLogs := getLogsForTx(receipt, logStartIndex) + for _, lg := range txLogs { + lg.BlockHash = blockHash } - return wrapper, nil + record := parquet.ReceiptRecord{ + TxHash: parquet.CopyBytes(txHash[:]), + BlockNumber: blockNumber, + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + } + return parquet_v2.ReplayReceipt{ + Input: parquet.ReceiptInput{ + Receipt: record, + Logs: BuildParquetLogRecords(txLogs, blockHash), + ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), + }, + TxHash: txHash, + Warmup: record, + LogCount: uint(len(txLogs)), + }, nil } func (s *parquetReceiptStoreV2) LatestVersion() int64 { @@ -95,8 +128,9 @@ func (s *parquetReceiptStoreV2) cacheRotateInterval() uint64 { } func (s *parquetReceiptStoreV2) warmupReceipts() []ReceiptRecord { - records := make([]ReceiptRecord, 0, len(s.warmupRecords)) - for _, rec := range s.warmupRecords { + raw := s.store.WarmupRecords() + records := make([]ReceiptRecord, 0, len(raw)) + for _, rec := range raw { receipt := &types.Receipt{} if err := receipt.Unmarshal(rec.ReceiptBytes); err != nil { continue @@ -106,7 +140,6 @@ func (s *parquetReceiptStoreV2) warmupReceipts() []ReceiptRecord { Receipt: receipt, }) } - s.warmupRecords = nil return records } @@ -305,51 +338,3 @@ func (s *parquetReceiptStoreV2) Close() error { } return storeErr } - -func (s *parquetReceiptStoreV2) replayWAL() error { - result, err := s.store.ReplayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet_v2.ReplayReceipt, error) { - receipt := &types.Receipt{} - if err := receipt.Unmarshal(receiptBytes); err != nil { - return parquet_v2.ReplayReceipt{}, err - } - - txHash := common.HexToHash(receipt.TxHashHex) - blockHash := common.Hash{} - txLogs := getLogsForTx(receipt, logStartIndex) - for _, lg := range txLogs { - lg.BlockHash = blockHash - } - - record := parquet.ReceiptRecord{ - TxHash: parquet.CopyBytes(txHash[:]), - BlockNumber: blockNumber, - ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), - } - return parquet_v2.ReplayReceipt{ - Input: parquet.ReceiptInput{ - Receipt: record, - Logs: BuildParquetLogRecords(txLogs, blockHash), - ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), - }, - TxHash: txHash, - Warmup: record, - LogCount: uint(len(txLogs)), - }, nil - }) - if err != nil { - return err - } - - s.warmupRecords = result.WarmupRecords - if s.txHashIndex == nil { - return nil - } - - ctx := context.Background() - for _, rb := range result.Blocks { - if err := s.txHashIndex.IndexBlock(ctx, rb.BlockNumber, rb.TxHashes); err != nil { - return fmt.Errorf("failed to re-index replayed block %d: %w", rb.BlockNumber, err) - } - } - return nil -} From 98469dd4d3b48977a18704aea2bc0d9e74ec44e5 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 11:50:16 -0400 Subject: [PATCH 23/27] move WALConverter onto StoreConfig, drop ReplayHooks struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NewStore is back to a single argument. The converter is a config field ("what kind of receipts does this store hold"), not a wiring callback. The wrapper drains store.ReplayedBlocks() after construction to re-populate its tx-hash index — wrapper-specific business stays in the wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) --- sei-db/ledger_db/parquet/store.go | 23 ++++++++++ .../parquet_v2/coordinator/coordinator.go | 36 ++++++++-------- .../parquet_v2/coordinator/helpers_test.go | 4 +- .../receipt/parquet_v2/coordinator/types.go | 27 ------------ .../receipt/parquet_v2/coordinator/wal.go | 4 +- .../parquet_v2/coordinator/wal_test.go | 2 +- .../receipt/parquet_v2/helpers_test.go | 14 ------- sei-db/ledger_db/receipt/parquet_v2/store.go | 18 +++++--- .../receipt/parquet_v2/store_dispatch_test.go | 6 +-- .../receipt/parquet_v2/store_init_test.go | 12 +++--- .../receipt/parquet_v2/store_read_test.go | 10 ++--- .../receipt/parquet_v2/store_rotation_test.go | 6 +-- .../receipt/parquet_v2/store_write_test.go | 4 +- sei-db/ledger_db/receipt/parquet_v2/types.go | 6 +-- .../receipt/parquet_v2_receipt_store.go | 42 ++++++++++--------- 15 files changed, 104 insertions(+), 110 deletions(-) diff --git a/sei-db/ledger_db/parquet/store.go b/sei-db/ledger_db/parquet/store.go index d4bfed2fab..844b298b09 100644 --- a/sei-db/ledger_db/parquet/store.go +++ b/sei-db/ledger_db/parquet/store.go @@ -38,6 +38,29 @@ type StoreConfig struct { BlockFlushInterval uint64 MaxBlocksPerFile uint64 TxIndexBackend string + + // WALConverter, when non-nil, drives synchronous WAL replay during + // store construction. The function decodes one raw WAL receipt blob + // into the structured fields the store needs to re-stage it. Only + // consumed by the v2 store; v1 ignores it. When nil, replay is + // skipped — used by lower-level tests that drive replay manually. + WALConverter WALReceiptConverter +} + +// WALReceiptConverter decodes a raw WAL receipt blob into the structured +// fields the v2 store needs to re-stage it. logStartIndex carries the +// running per-block log offset so logs from earlier txs in the same block +// don't collide. +type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) + +// ReplayReceipt is one converted WAL entry: the receipt input to re-stage, +// its tx hash, the warmup record returned to the wrapper, and the log +// count consumed (used to advance logStartIndex). +type ReplayReceipt struct { + Input ReceiptInput + TxHash common.Hash + Warmup ReceiptRecord + LogCount uint } // DefaultStoreConfig returns the default store configuration. diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 59fda22221..567e8bbc3c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -53,18 +53,17 @@ type Coordinator struct { wal dbwal.GenericWAL[parquet.WALEntry] reader *Reader - warmupRecords []parquet.ReceiptRecord + warmupRecords []parquet.ReceiptRecord + replayedBlocks []ReplayedBlock } // New constructs a Coordinator and drives WAL replay synchronously before // starting the request goroutine, so the returned Coordinator already -// reflects any persisted-but-uncheckpointed receipts. Pass hooks.Converter -// nil to skip auto-replay (used by tests that exercise replayWAL -// directly). When hooks.OnReplayedBlock is set, it is invoked per -// recovered block after receipts have been re-applied — the wrapper uses -// this to repopulate its tx-hash index. Recovered warmup records are -// retained and drained by the caller via WarmupRecords. -func New(cfg parquet.StoreConfig, hooks ReplayHooks) (*Coordinator, error) { +// reflects any persisted-but-uncheckpointed receipts. Replay runs only +// when cfg.WALConverter is non-nil; tests that exercise replayWAL +// directly leave it nil. After construction, callers drain the recovered +// state via WarmupRecords and ReplayedBlocks. +func New(cfg parquet.StoreConfig) (*Coordinator, error) { storeCfg := resolveStoreConfig(cfg) if err := os.MkdirAll(storeCfg.DBDirectory, 0o750); err != nil { @@ -133,19 +132,13 @@ func New(cfg parquet.StoreConfig, hooks ReplayHooks) (*Coordinator, error) { } } - if hooks.Converter != nil { - result, err := c.replayWAL(hooks.Converter) + if cfg.WALConverter != nil { + result, err := c.replayWAL(cfg.WALConverter) if err != nil { return nil, err } c.warmupRecords = result.WarmupRecords - if hooks.OnReplayedBlock != nil { - for _, rb := range result.Blocks { - if err := hooks.OnReplayedBlock(rb.BlockNumber, rb.TxHashes); err != nil { - return nil, err - } - } - } + c.replayedBlocks = result.Blocks } go c.run() @@ -327,6 +320,15 @@ func (c *Coordinator) WarmupRecords() []parquet.ReceiptRecord { return records } +// ReplayedBlocks returns and clears the per-block tx-hash listing +// recovered during construction-time WAL replay. Wrappers drain this +// once after construction to repopulate an external tx-hash index. +func (c *Coordinator) ReplayedBlocks() []ReplayedBlock { + blocks := c.replayedBlocks + c.replayedBlocks = nil + return blocks +} + func sendAndAwaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { var zero T diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go index aba9d3576c..7ac9445f86 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/helpers_test.go @@ -69,13 +69,13 @@ func replayWALWithEntries(t *testing.T, entries ...parquet.WALEntry) *recordingW return wal } -func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { +func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (parquet.ReplayReceipt, error) { txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) input := testReceiptInput(blockNumber, txHash) input.ReceiptBytes = append([]byte(nil), receiptBytes...) input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) - return ReplayReceipt{ + return parquet.ReplayReceipt{ Input: input, TxHash: txHash, Warmup: input.Receipt, diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go index 84b115620a..b3ee69a004 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/types.go @@ -28,22 +28,6 @@ type ReplayedBlock struct { TxHashes []common.Hash } -// WALReceiptConverter decodes a raw WAL receipt blob into the structured -// fields the coordinator needs to re-stage it. logStartIndex carries the -// running per-block log offset so logs from earlier txs in the same block -// don't collide. -type WALReceiptConverter func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) - -// ReplayReceipt is one converted WAL entry: the receipt input to re-stage, -// its tx hash, the warmup record returned to the wrapper, and the log -// count consumed (used to advance logStartIndex). -type ReplayReceipt struct { - Input parquet.ReceiptInput - TxHash common.Hash - Warmup parquet.ReceiptRecord - LogCount uint -} - // ReplayResult is the outcome of a successful WAL replay: warmup records // to seed external caches, plus the per-block tx hash listing. type ReplayResult struct { @@ -51,17 +35,6 @@ type ReplayResult struct { Blocks []ReplayedBlock } -// ReplayHooks bundles the wrapper-specific callbacks invoked during WAL -// replay at construction time. Converter decodes the raw WAL receipt blob; -// when nil, replay is skipped entirely (used by lower-level tests that -// drive replay manually). OnReplayedBlock, when non-nil, is called once -// per recovered block after its receipts have been re-applied — the -// wrapper uses this to re-populate its tx-hash index. -type ReplayHooks struct { - Converter WALReceiptConverter - OnReplayedBlock func(blockNumber uint64, txHashes []common.Hash) error -} - // int64FromUint64 converts value to int64 or errors on overflow. Used at // the boundary where block heights cross from internal uint64 storage to // the sdk-style int64 latestVersion. diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go index 3187af7abe..7f5d4bd09c 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal.go @@ -13,7 +13,7 @@ import ( // resulting layout matches what a non-crashing run would have produced), // applies each receipt to the open writer, and finally truncates WAL // entries that are now durably persisted. -func (c *Coordinator) replayWAL(converter WALReceiptConverter) (ReplayResult, error) { +func (c *Coordinator) replayWAL(converter parquet.WALReceiptConverter) (ReplayResult, error) { if converter == nil { return ReplayResult{}, fmt.Errorf("WAL receipt converter is nil") } @@ -137,7 +137,7 @@ func (c *Coordinator) applyReceiptFromReplay(blockNumber uint64, input parquet.R // normalizeReplayInput backfills the ReceiptInput fields that the converter // may have left empty (block number, tx hash, and the receipt byte // payloads), so downstream apply code doesn't need replay-aware branches. -func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed ReplayReceipt) parquet.ReceiptInput { +func normalizeReplayInput(blockNumber uint64, receiptBytes []byte, replayed parquet.ReplayReceipt) parquet.ReceiptInput { input := replayed.Input input.Receipt.BlockNumber = blockNumber if len(input.Receipt.TxHash) == 0 { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go index 520844f029..8b51530f6b 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go @@ -40,7 +40,7 @@ func TestReplayWALSkipsEntriesBeforeFileStartAndTruncates(t *testing.T) { coord.fileStartBlock = 4 defer func() { require.NoError(t, coord.closeWriters()) }() - result, err := coord.replayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (ReplayReceipt, error) { + result, err := coord.replayWAL(func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet.ReplayReceipt, error) { require.NotEqual(t, uint64(2), blockNumber) return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) }) diff --git a/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go b/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go index 1eaba96888..26ee5f7573 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/helpers_test.go @@ -31,20 +31,6 @@ func testReceiptInput(blockNumber uint64, txHash common.Hash) parquet.ReceiptInp } } -func replayConverterForTest(blockNumber uint64, receiptBytes []byte, _ uint) (ReplayReceipt, error) { - txHash := common.BigToHash(new(big.Int).SetUint64(uint64(receiptBytes[0]))) - input := testReceiptInput(blockNumber, txHash) - input.ReceiptBytes = append([]byte(nil), receiptBytes...) - input.Receipt.ReceiptBytes = append([]byte(nil), receiptBytes...) - - return ReplayReceipt{ - Input: input, - TxHash: txHash, - Warmup: input.Receipt, - LogCount: uint(len(input.Logs)), - }, nil -} - func writeReceiptFile(t *testing.T, dir string, startBlock uint64, blocks []uint64) { t.Helper() diff --git a/sei-db/ledger_db/receipt/parquet_v2/store.go b/sei-db/ledger_db/receipt/parquet_v2/store.go index 3d3ce61928..25bce6df85 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store.go @@ -21,12 +21,13 @@ type Store struct { } // NewStore creates a V2 store backed by a live coordinator goroutine. -// hooks.Converter, when non-nil, drives WAL replay synchronously before +// cfg.WALConverter, when non-nil, drives WAL replay synchronously before // the store accepts other calls; production callers always supply one. -// Lower-level tests that drive replay manually pass a zero-value -// ReplayHooks. -func NewStore(cfg parquet.StoreConfig, hooks ReplayHooks) (*Store, error) { - c, err := coordinator.New(cfg, hooks) +// Lower-level tests that drive replay manually leave it nil. After +// construction, callers drain WarmupRecords and ReplayedBlocks to +// re-seed external caches and indexes. +func NewStore(cfg parquet.StoreConfig) (*Store, error) { + c, err := coordinator.New(cfg) if err != nil { return nil, err } @@ -133,3 +134,10 @@ func (s *Store) SetFaultHooks(hooks *parquet.FaultHooks) { func (s *Store) WarmupRecords() []parquet.ReceiptRecord { return s.coord.WarmupRecords() } + +// ReplayedBlocks returns and clears the per-block tx-hash listing +// recovered during construction-time WAL replay. Wrappers drain this +// once after NewStore returns to repopulate an external tx-hash index. +func (s *Store) ReplayedBlocks() []ReplayedBlock { + return s.coord.ReplayedBlocks() +} diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go index c8f58782f2..d2fc046959 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_dispatch_test.go @@ -13,7 +13,7 @@ func newDispatchStore(t *testing.T) *Store { store, err := NewStore(parquet.StoreConfig{ DBDirectory: t.TempDir(), MaxBlocksPerFile: 4, - }, ReplayHooks{}) + }) require.NoError(t, err) t.Cleanup(func() { _ = store.Close() }) return store @@ -44,7 +44,7 @@ func TestMetadataAndConfigRequestsDispatchThroughCoordinator(t *testing.T) { } func TestCloseStopsFutureRequests(t *testing.T) { - store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}, ReplayHooks{}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) require.NoError(t, err) require.NoError(t, store.Close()) @@ -53,7 +53,7 @@ func TestCloseStopsFutureRequests(t *testing.T) { } func TestSimulateCrashStopsFutureRequests(t *testing.T) { - store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}, ReplayHooks{}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: t.TempDir()}) require.NoError(t, err) store.SimulateCrash() diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go index fde83a8cb7..9ab401f309 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_init_test.go @@ -12,7 +12,7 @@ import ( func TestNewStoreCreatesDirectoryAndClosesIdempotently(t *testing.T) { dir := filepath.Join(t.TempDir(), "nested", "parquet") - store, err := NewStore(parquet.StoreConfig{DBDirectory: dir}, ReplayHooks{}) + store, err := NewStore(parquet.StoreConfig{DBDirectory: dir}) require.NoError(t, err) require.DirExists(t, dir) require.DirExists(t, filepath.Join(dir, "parquet-wal")) @@ -30,7 +30,7 @@ func TestNewStoreSeedsLatestVersionFromClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 100, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, int64(123), store.LatestVersion()) require.Equal(t, uint64(124), store.FileStartBlock()) @@ -39,7 +39,7 @@ func TestNewStoreSeedsLatestVersionFromClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 100, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, int64(123), reopened.LatestVersion()) require.Equal(t, uint64(124), reopened.FileStartBlock()) @@ -59,7 +59,7 @@ func TestNewStoreRemovesCorruptTrailingPair(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }, ReplayHooks{}) + }) require.NoError(t, err) require.NoError(t, store.Close()) @@ -82,7 +82,7 @@ func TestNewStoreRemovesReceiptCounterpartForCorruptTrailingLog(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, int64(1), store.LatestVersion()) require.NoError(t, store.Close()) @@ -101,7 +101,7 @@ func TestNewStoreIgnoresUnmatchedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, int64(0), store.LatestVersion()) require.Equal(t, uint64(0), store.FileStartBlock()) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go index ca564f3d62..2bf454d70a 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_read_test.go @@ -18,7 +18,7 @@ func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }, ReplayHooks{}) + }) require.NoError(t, err) require.NoError(t, store.WriteReceipts(1, []parquet.ReceiptInput{ testReceiptInput(1, txHash), @@ -31,7 +31,7 @@ func TestReadByTxHashFallsThroughToClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }, ReplayHooks{}) + }) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopened.Close()) }) @@ -54,7 +54,7 @@ func TestReadByTxHashAfterRotationUsesClosedFilesAndTempCache(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: t.TempDir(), MaxBlocksPerFile: 4, - }, ReplayHooks{}) + }) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, store.Close()) }) @@ -93,7 +93,7 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 4, - }, ReplayHooks{}) + }) require.NoError(t, err) for block := uint64(1); block <= 8; block++ { @@ -106,7 +106,7 @@ func TestGetLogsReadsAcrossClosedFiles(t *testing.T) { reopened, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 4, - }, ReplayHooks{}) + }) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopened.Close()) }) diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go index b22faf14e2..1eb6530d07 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_rotation_test.go @@ -15,7 +15,7 @@ func TestLazyInitUsesAlignedStartForFirstOffBoundaryWrite(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 500, - }, ReplayHooks{}) + }) require.NoError(t, err) require.NoError(t, store.WriteReceipts(5234, []parquet.ReceiptInput{ @@ -39,7 +39,7 @@ func TestReopenLazyInitPreservesExistingAlignedFile(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, uint64(11), store.FileStartBlock()) @@ -62,7 +62,7 @@ func TestReopenLazyInitUsesAlignedStartOnGap(t *testing.T) { store, err := NewStore(parquet.StoreConfig{ DBDirectory: dir, MaxBlocksPerFile: 10, - }, ReplayHooks{}) + }) require.NoError(t, err) require.NoError(t, store.WriteReceipts(25, []parquet.ReceiptInput{ diff --git a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go index cb40191f5c..4e4adfe224 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/store_write_test.go @@ -17,7 +17,7 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { MaxBlocksPerFile: 500, BlockFlushInterval: 100, PruneIntervalSeconds: 0, - }, ReplayHooks{}) + }) require.NoError(t, err) for block := uint64(1); block <= 3; block++ { @@ -32,7 +32,7 @@ func TestWriteReceiptsUpdatesLatestAndReopens(t *testing.T) { DBDirectory: dir, MaxBlocksPerFile: 500, PruneIntervalSeconds: 0, - }, ReplayHooks{}) + }) require.NoError(t, err) require.Equal(t, int64(3), reopened.LatestVersion()) require.Equal(t, uint64(4), reopened.FileStartBlock()) diff --git a/sei-db/ledger_db/receipt/parquet_v2/types.go b/sei-db/ledger_db/receipt/parquet_v2/types.go index 7d56fc2aa1..a7c5af5781 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/types.go +++ b/sei-db/ledger_db/receipt/parquet_v2/types.go @@ -3,10 +3,8 @@ package parquet_v2 import "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt/parquet_v2/coordinator" type ( - ReplayResult = coordinator.ReplayResult - ReplayReceipt = coordinator.ReplayReceipt - WALReceiptConverter = coordinator.WALReceiptConverter - ReplayHooks = coordinator.ReplayHooks + ReplayResult = coordinator.ReplayResult + ReplayedBlock = coordinator.ReplayedBlock ) var ErrStoreClosed = coordinator.ErrStoreClosed diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go index 861d91f803..7838994f18 100644 --- a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -28,37 +28,41 @@ func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.Stor parquetTxIndexBackend = "none" } + store, err := parquet_v2.NewStore(parquet.StoreConfig{ + DBDirectory: cfg.DBDirectory, + KeepRecent: int64(cfg.KeepRecent), + PruneIntervalSeconds: int64(cfg.PruneIntervalSeconds), + TxIndexBackend: parquetTxIndexBackend, + WALConverter: replayConverterV2, + }) + if err != nil { + return nil, err + } + var txHashIndex TxHashIndex switch txIndexBackend { case dbconfig.ReceiptTxIndexBackendNone: case dbconfig.ReceiptTxIndexBackendPebble: idx, err := NewPebbleTxHashIndex(TxHashIndexDir(cfg.DBDirectory)) if err != nil { + _ = store.Close() return nil, fmt.Errorf("failed to open tx hash index: %w", err) } txHashIndex = idx default: + _ = store.Close() return nil, fmt.Errorf("unsupported receipt tx index backend: %s", txIndexBackend) } - hooks := parquet_v2.ReplayHooks{Converter: replayConverterV2} if txHashIndex != nil { - hooks.OnReplayedBlock = func(blockNumber uint64, txHashes []common.Hash) error { - return txHashIndex.IndexBlock(context.Background(), blockNumber, txHashes) - } - } - - store, err := parquet_v2.NewStore(parquet.StoreConfig{ - DBDirectory: cfg.DBDirectory, - KeepRecent: int64(cfg.KeepRecent), - PruneIntervalSeconds: int64(cfg.PruneIntervalSeconds), - TxIndexBackend: parquetTxIndexBackend, - }, hooks) - if err != nil { - if txHashIndex != nil { - _ = txHashIndex.Close() + ctx := context.Background() + for _, rb := range store.ReplayedBlocks() { + if err := txHashIndex.IndexBlock(ctx, rb.BlockNumber, rb.TxHashes); err != nil { + _ = txHashIndex.Close() + _ = store.Close() + return nil, fmt.Errorf("failed to re-index replayed block %d: %w", rb.BlockNumber, err) + } } - return nil, err } wrapper := &parquetReceiptStoreV2{ @@ -79,10 +83,10 @@ func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.Stor return wrapper, nil } -func replayConverterV2(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet_v2.ReplayReceipt, error) { +func replayConverterV2(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet.ReplayReceipt, error) { receipt := &types.Receipt{} if err := receipt.Unmarshal(receiptBytes); err != nil { - return parquet_v2.ReplayReceipt{}, err + return parquet.ReplayReceipt{}, err } txHash := common.HexToHash(receipt.TxHashHex) @@ -97,7 +101,7 @@ func replayConverterV2(blockNumber uint64, receiptBytes []byte, logStartIndex ui BlockNumber: blockNumber, ReceiptBytes: parquet.CopyBytesOrEmpty(receiptBytes), } - return parquet_v2.ReplayReceipt{ + return parquet.ReplayReceipt{ Input: parquet.ReceiptInput{ Receipt: record, Logs: BuildParquetLogRecords(txLogs, blockHash), From a461b253bef70f6287632a7f9f17d588025bdb92 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 12:04:18 -0400 Subject: [PATCH 24/27] fix handleClose to release all resources on partial failure Previously, an error in flushOpenFile, closeWriters, or wal.Close would return early from handleClose, leaving the remaining resources (WAL background goroutines, DuckDB reader connection, open file descriptors) attached to the coordinator after run() exited. Now every step runs unconditionally and errors are aggregated via errors.Join. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/dispatch_test.go | 33 +++++++++++++++++++ .../parquet_v2/coordinator/handlers.go | 22 ++++++------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go index 899f98f539..57d45b9378 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go @@ -1,6 +1,7 @@ package coordinator import ( + "errors" "testing" "time" @@ -32,6 +33,38 @@ func TestSetMaxBlocksPerFileUpdatesReaderState(t *testing.T) { require.Equal(t, uint64(3), reader.maxBlocksPerFile) } +func TestHandleCloseReleasesAllResourcesOnFlushError(t *testing.T) { + coord, err := New(parquet.StoreConfig{ + DBDirectory: t.TempDir(), + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + + require.NotNil(t, coord.wal) + require.NotNil(t, coord.reader) + + require.NoError(t, coord.WriteReceipts(1, []parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + })) + require.NotNil(t, coord.receiptWriter) + require.NotNil(t, coord.receiptFile) + + coord.SetFaultHooks(&parquet.FaultHooks{ + BeforeFlush: func(uint64) error { return errors.New("injected flush failure") }, + }) + + closeErr := coord.Close() + require.Error(t, closeErr) + require.ErrorContains(t, closeErr, "injected flush failure") + + require.Nil(t, coord.wal, "WAL must be released even when flushOpenFile errors") + require.Nil(t, coord.reader, "reader must be released even when flushOpenFile errors") + require.Nil(t, coord.receiptWriter) + require.Nil(t, coord.logWriter) + require.Nil(t, coord.receiptFile) + require.Nil(t, coord.logFile) +} + func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { requests := make(chan coordRequest) done := make(chan struct{}) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 07927869fe..381d6f690b 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -1,6 +1,7 @@ package coordinator import ( + "errors" "fmt" "os" "path/filepath" @@ -144,32 +145,31 @@ func (c *Coordinator) handlePruneTick() { } // handleClose performs a graceful shutdown: flush and close the open writers, -// then close the WAL and reader. Returns the first non-nil error encountered -// along the way. The prune ticker is stopped via defer in run(). +// then close the WAL and reader. Each step runs even if an earlier one +// errors so resources (file descriptors, WAL background goroutines, DuckDB +// connections) are always released. Errors from every step are joined and +// returned together. The prune ticker is stopped via defer in run(). func (c *Coordinator) handleClose(req closeReq) { + var errs []error if err := c.flushOpenFile(); err != nil { - req.resp <- err - return + errs = append(errs, fmt.Errorf("flush: %w", err)) } if err := c.closeWriters(); err != nil { - req.resp <- err - return + errs = append(errs, err) } if c.wal != nil { if err := c.wal.Close(); err != nil { - req.resp <- err - return + errs = append(errs, fmt.Errorf("wal close: %w", err)) } c.wal = nil } if c.reader != nil { if err := c.reader.Close(); err != nil { - req.resp <- err - return + errs = append(errs, fmt.Errorf("reader close: %w", err)) } c.reader = nil } - req.resp <- nil + req.resp <- errors.Join(errs...) } // handleSimulateCrash drops in-memory writer state without flushing — the From 42fa63075c24a761d6f10db1d112151eaf011581 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Mon, 4 May 2026 12:05:52 -0400 Subject: [PATCH 25/27] fix Close to return the first caller's error to repeat callers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The captured err was a local in Close(), so a second invocation got a fresh nil — closeOnce.Do skips the closure body but the second caller's local err was never written. Move err onto the struct as closeErr so all callers observe the result of the single close attempt. The happens-before semantics of sync.Once guarantee the read is safe. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 9 +++++--- .../parquet_v2/coordinator/dispatch_test.go | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index 567e8bbc3c..f65f6700fc 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -27,6 +27,7 @@ type Coordinator struct { requests chan coordRequest done chan struct{} closeOnce sync.Once + closeErr error config parquet.StoreConfig @@ -278,14 +279,16 @@ func (c *Coordinator) Flush() error { return awaitError(c, flushReq{resp: resp}, resp) } +// Close performs a graceful shutdown. Subsequent callers receive the same +// error as the first caller — closeErr is set inside closeOnce.Do, which +// provides the happens-before edge for all later reads. func (c *Coordinator) Close() error { - var err error c.closeOnce.Do(func() { resp := make(chan error, 1) - err = awaitError(c, closeReq{resp: resp}, resp) + c.closeErr = awaitError(c, closeReq{resp: resp}, resp) close(c.done) }) - return err + return c.closeErr } func (c *Coordinator) SimulateCrash() { diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go index 57d45b9378..12b7cc7931 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go @@ -65,6 +65,28 @@ func TestHandleCloseReleasesAllResourcesOnFlushError(t *testing.T) { require.Nil(t, coord.logFile) } +func TestCloseReturnsSameErrorToRepeatCallers(t *testing.T) { + coord, err := New(parquet.StoreConfig{ + DBDirectory: t.TempDir(), + MaxBlocksPerFile: 4, + }) + require.NoError(t, err) + + require.NoError(t, coord.WriteReceipts(1, []parquet.ReceiptInput{ + testReceiptInput(1, common.HexToHash("0x1")), + })) + + coord.SetFaultHooks(&parquet.FaultHooks{ + BeforeFlush: func(uint64) error { return errors.New("injected flush failure") }, + }) + + first := coord.Close() + second := coord.Close() + require.Error(t, first) + require.Error(t, second, "second Close() must surface the original close error, not nil") + require.Equal(t, first, second) +} + func TestUnbufferedRequestsApplyBackpressure(t *testing.T) { requests := make(chan coordRequest) done := make(chan struct{}) From 52e76c638f3b7a693aef6444ace02670fa6724b8 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Tue, 5 May 2026 00:22:16 -0400 Subject: [PATCH 26/27] back CacheRotateInterval with an atomic.Uint64 mirror MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CacheRotateInterval was reading c.config.MaxBlocksPerFile directly from caller goroutines while handleSetMaxBlocksPerFile wrote it from the run goroutine — a data race the prior comment acknowledged but didn't fix. Mirror the value in an atomic.Uint64 that handleSetMaxBlocksPerFile keeps in sync, and read from the mirror. Also thread context through sendAndAwaitResponse/awaitError so reads respect caller cancellation, and gate parquetReceiptStoreV2.Close on a sync.Once (indexPruner.Stop closes a channel and panics on a second call). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 61 ++++++++++++------- .../parquet_v2/coordinator/dispatch_test.go | 1 + .../parquet_v2/coordinator/handlers.go | 5 +- .../receipt/parquet_v2_receipt_store.go | 28 ++++++--- 4 files changed, 62 insertions(+), 33 deletions(-) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index f65f6700fc..ad2dd2c42b 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "sync" + "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" @@ -31,6 +32,12 @@ type Coordinator struct { config parquet.StoreConfig + // cacheRotateInterval mirrors config.MaxBlocksPerFile but is read + // without holding the run goroutine. CacheRotateInterval is the only + // external reader; all internal callers go through config under the + // run goroutine. Kept in sync by handleSetMaxBlocksPerFile. + cacheRotateInterval atomic.Uint64 + basePath string fileStartBlock uint64 receiptWriter *parquetgo.GenericWriter[parquet.ReceiptRecord] @@ -115,6 +122,7 @@ func New(cfg parquet.StoreConfig) (*Coordinator, error) { latestVersion: 0, earliestVersion: 0, } + c.cacheRotateInterval.Store(storeCfg.MaxBlocksPerFile) receiptFiles := make([]string, 0, len(closedFiles)) for _, f := range closedFiles { @@ -193,7 +201,7 @@ func (c *Coordinator) run() { // ignored. func (c *Coordinator) WriteReceipts(height uint64, inputs []parquet.ReceiptInput) error { resp := make(chan writeResp, 1) - r, err := sendAndAwaitResponse(c, writeReq{height: height, inputs: inputs, resp: resp}, resp) + r, err := sendAndAwaitResponse(context.Background(), c, writeReq{height: height, inputs: inputs, resp: resp}, resp) if err != nil { return err } @@ -202,7 +210,7 @@ func (c *Coordinator) WriteReceipts(height uint64, inputs []parquet.ReceiptInput func (c *Coordinator) GetReceiptByTxHash(ctx context.Context, txHash common.Hash) (*parquet.ReceiptResult, error) { resp := make(chan readReceiptResp, 1) - r, err := sendAndAwaitResponse(c, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) + r, err := sendAndAwaitResponse(ctx, c, readByTxHashReq{ctx: ctx, txHash: txHash, resp: resp}, resp) if err != nil { return nil, err } @@ -211,7 +219,7 @@ func (c *Coordinator) GetReceiptByTxHash(ctx context.Context, txHash common.Hash func (c *Coordinator) GetReceiptByTxHashInBlock(ctx context.Context, txHash common.Hash, blockNumber uint64) (*parquet.ReceiptResult, error) { resp := make(chan readReceiptResp, 1) - r, err := sendAndAwaitResponse(c, readByTxHashInBlockReq{ + r, err := sendAndAwaitResponse(ctx, c, readByTxHashInBlockReq{ ctx: ctx, txHash: txHash, blockNumber: blockNumber, @@ -225,7 +233,7 @@ func (c *Coordinator) GetReceiptByTxHashInBlock(ctx context.Context, txHash comm func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([]parquet.LogResult, error) { resp := make(chan getLogsResp, 1) - r, err := sendAndAwaitResponse(c, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) + r, err := sendAndAwaitResponse(ctx, c, getLogsReq{ctx: ctx, filter: filter, resp: resp}, resp) if err != nil { return nil, err } @@ -234,7 +242,7 @@ func (c *Coordinator) GetLogs(ctx context.Context, filter parquet.LogFilter) ([] func (c *Coordinator) FileStartBlock() uint64 { resp := make(chan uint64, 1) - r, err := sendAndAwaitResponse(c, fileStartBlockReq{resp: resp}, resp) + r, err := sendAndAwaitResponse(context.Background(), c, fileStartBlockReq{resp: resp}, resp) if err != nil { return 0 } @@ -243,7 +251,7 @@ func (c *Coordinator) FileStartBlock() uint64 { func (c *Coordinator) LatestVersion() int64 { resp := make(chan int64, 1) - r, err := sendAndAwaitResponse(c, latestVersionReq{resp: resp}, resp) + r, err := sendAndAwaitResponse(context.Background(), c, latestVersionReq{resp: resp}, resp) if err != nil { return 0 } @@ -252,31 +260,29 @@ func (c *Coordinator) LatestVersion() int64 { func (c *Coordinator) SetLatestVersion(version int64) { resp := make(chan error, 1) - _ = awaitError(c, setLatestVersionReq{version: version, resp: resp}, resp) + _ = awaitError(context.Background(), c, setLatestVersionReq{version: version, resp: resp}, resp) } func (c *Coordinator) SetEarliestVersion(version int64) { resp := make(chan error, 1) - _ = awaitError(c, setEarliestVersionReq{version: version, resp: resp}, resp) + _ = awaitError(context.Background(), c, setEarliestVersionReq{version: version, resp: resp}, resp) } func (c *Coordinator) UpdateLatestVersion(version int64) { resp := make(chan error, 1) - _ = awaitError(c, updateLatestVersionReq{version: version, resp: resp}, resp) + _ = awaitError(context.Background(), c, updateLatestVersionReq{version: version, resp: resp}, resp) } // CacheRotateInterval returns the rotation boundary used by the cached receipt -// store. Reads c.config.MaxBlocksPerFile directly without going through the -// request channel; this is safe because the value is set at construction and -// only mutated by SetMaxBlocksPerFile, which is test-only and must not race -// with reads. +// store. Backed by an atomic mirror of config.MaxBlocksPerFile so external +// readers don't race with handleSetMaxBlocksPerFile in the run goroutine. func (c *Coordinator) CacheRotateInterval() uint64 { - return c.config.MaxBlocksPerFile + return c.cacheRotateInterval.Load() } func (c *Coordinator) Flush() error { resp := make(chan error, 1) - return awaitError(c, flushReq{resp: resp}, resp) + return awaitError(context.Background(), c, flushReq{resp: resp}, resp) } // Close performs a graceful shutdown. Subsequent callers receive the same @@ -285,7 +291,7 @@ func (c *Coordinator) Flush() error { func (c *Coordinator) Close() error { c.closeOnce.Do(func() { resp := make(chan error, 1) - c.closeErr = awaitError(c, closeReq{resp: resp}, resp) + c.closeErr = awaitError(context.Background(), c, closeReq{resp: resp}, resp) close(c.done) }) return c.closeErr @@ -294,24 +300,24 @@ func (c *Coordinator) Close() error { func (c *Coordinator) SimulateCrash() { c.closeOnce.Do(func() { resp := make(chan struct{}, 1) - _, _ = sendAndAwaitResponse(c, simulateCrashReq{resp: resp}, resp) + _, _ = sendAndAwaitResponse(context.Background(), c, simulateCrashReq{resp: resp}, resp) close(c.done) }) } func (c *Coordinator) SetBlockFlushInterval(interval uint64) { resp := make(chan error, 1) - _ = awaitError(c, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) + _ = awaitError(context.Background(), c, setBlockFlushIntervalReq{interval: interval, resp: resp}, resp) } func (c *Coordinator) SetMaxBlocksPerFile(n uint64) { resp := make(chan error, 1) - _ = awaitError(c, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) + _ = awaitError(context.Background(), c, setMaxBlocksPerFileReq{maxBlocksPerFile: n, resp: resp}, resp) } func (c *Coordinator) SetFaultHooks(hooks *parquet.FaultHooks) { resp := make(chan error, 1) - _ = awaitError(c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) + _ = awaitError(context.Background(), c, setFaultHooksReq{hooks: hooks, resp: resp}, resp) } // WarmupRecords returns and clears the warmup receipt records recovered @@ -332,13 +338,20 @@ func (c *Coordinator) ReplayedBlocks() []ReplayedBlock { return blocks } -func sendAndAwaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T) (T, error) { +// sendAndAwaitResponse enqueues req on the requests channel and waits for the +// reply. If ctx is cancelled at either stage the call returns ctx.Err() so +// callers don't sit blocked behind an in-flight handler. Non-cancellable +// callers (writes, lifecycle ops) pass context.Background(); the read APIs +// pass the caller's ctx so eth_getLogs / GetReceiptByTxHash respect timeouts. +func sendAndAwaitResponse[T any](ctx context.Context, c *Coordinator, req coordRequest, resp <-chan T) (T, error) { var zero T select { case c.requests <- req: case <-c.done: return zero, ErrStoreClosed + case <-ctx.Done(): + return zero, ctx.Err() } select { @@ -346,11 +359,13 @@ func sendAndAwaitResponse[T any](c *Coordinator, req coordRequest, resp <-chan T return r, nil case <-c.done: return zero, ErrStoreClosed + case <-ctx.Done(): + return zero, ctx.Err() } } -func awaitError(c *Coordinator, req coordRequest, resp <-chan error) error { - err, waitErr := sendAndAwaitResponse(c, req, resp) +func awaitError(ctx context.Context, c *Coordinator, req coordRequest, resp <-chan error) error { + err, waitErr := sendAndAwaitResponse(ctx, c, req, resp) if waitErr != nil { return waitErr } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go index 12b7cc7931..5b037f8cf7 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/dispatch_test.go @@ -30,6 +30,7 @@ func TestSetMaxBlocksPerFileUpdatesReaderState(t *testing.T) { require.NoError(t, <-resp) require.Equal(t, uint64(3), coord.config.MaxBlocksPerFile) + require.Equal(t, uint64(3), coord.cacheRotateInterval.Load()) require.Equal(t, uint64(3), reader.maxBlocksPerFile) } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go index 381d6f690b..1e35d012ec 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/handlers.go @@ -112,9 +112,12 @@ func (c *Coordinator) handleSetBlockFlushInterval(req setBlockFlushIntervalReq) } // handleSetMaxBlocksPerFile updates the rotation interval and propagates it -// to the reader so log-file pruning by block range stays consistent. +// to the reader so log-file pruning by block range stays consistent. Mirrors +// the new value into cacheRotateInterval so external readers see it without +// going through the request channel. func (c *Coordinator) handleSetMaxBlocksPerFile(req setMaxBlocksPerFileReq) { c.config.MaxBlocksPerFile = req.maxBlocksPerFile + c.cacheRotateInterval.Store(req.maxBlocksPerFile) if c.reader != nil { c.reader.setMaxBlocksPerFile(req.maxBlocksPerFile) } diff --git a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go index 7838994f18..b2a23a4e13 100644 --- a/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go +++ b/sei-db/ledger_db/receipt/parquet_v2_receipt_store.go @@ -3,6 +3,7 @@ package receipt import ( "context" "fmt" + "sync" "github.com/ethereum/go-ethereum/common" ethtypes "github.com/ethereum/go-ethereum/core/types" @@ -19,6 +20,9 @@ type parquetReceiptStoreV2 struct { storeKey sdk.StoreKey txHashIndex TxHashIndex indexPruner *txHashIndexPruner + + closeOnce sync.Once + closeErr error } func newParquetReceiptStoreV2(cfg dbconfig.ReceiptStoreConfig, storeKey sdk.StoreKey) (ReceiptStore, error) { @@ -330,15 +334,21 @@ func (s *parquetReceiptStoreV2) FilterLogs(ctx sdk.Context, fromBlock, toBlock u return logs, nil } +// Close releases the parquet store, the tx-hash index, and the index pruner. +// Idempotent: indexPruner.Stop closes a channel that would panic on a second +// call, so the entire teardown is gated on closeOnce. Repeat callers receive +// the same error as the first. func (s *parquetReceiptStoreV2) Close() error { - if s.indexPruner != nil { - s.indexPruner.Stop() - } - storeErr := s.store.Close() - if s.txHashIndex != nil { - if err := s.txHashIndex.Close(); err != nil && storeErr == nil { - storeErr = err + s.closeOnce.Do(func() { + if s.indexPruner != nil { + s.indexPruner.Stop() } - } - return storeErr + s.closeErr = s.store.Close() + if s.txHashIndex != nil { + if err := s.txHashIndex.Close(); err != nil && s.closeErr == nil { + s.closeErr = err + } + } + }) + return s.closeErr } From 0e63a08a604bae0e1e3f1df6a31880d3cd7821b3 Mon Sep 17 00:00:00 2001 From: Jeremy Wei Date: Wed, 6 May 2026 00:00:01 -0400 Subject: [PATCH 27/27] close parquet writers when New errors after replay opened them If WAL replay lazily opens the receipt/log writers via initWriters and then a later replay step fails, New returns without starting the run goroutine, so the writers and their *os.File handles were leaking until process exit. Adds a deferred cleanupWriters guard mirroring the existing reader/WAL cleanup pattern, plus a regression test that injects a converter failure and verifies the on-disk parquet files are re-readable after the failed New call. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet_v2/coordinator/coordinator.go | 7 ++++ .../parquet_v2/coordinator/wal_test.go | 36 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go index ad2dd2c42b..2b45e809b7 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/coordinator.go @@ -122,6 +122,12 @@ func New(cfg parquet.StoreConfig) (*Coordinator, error) { latestVersion: 0, earliestVersion: 0, } + cleanupWriters := true + defer func() { + if cleanupWriters { + _ = c.closeWriters() + } + }() c.cacheRotateInterval.Store(storeCfg.MaxBlocksPerFile) receiptFiles := make([]string, 0, len(closedFiles)) @@ -153,6 +159,7 @@ func New(cfg parquet.StoreConfig) (*Coordinator, error) { go c.run() cleanupReader = false cleanupWAL = false + cleanupWriters = false return c, nil } diff --git a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go index 8b51530f6b..50e9ced8fb 100644 --- a/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go +++ b/sei-db/ledger_db/receipt/parquet_v2/coordinator/wal_test.go @@ -1,6 +1,8 @@ package coordinator import ( + "context" + "errors" "math/big" "path/filepath" "testing" @@ -72,3 +74,37 @@ func TestReplayWALRotatesBoundaryWithoutClearingWAL(t *testing.T) { require.Len(t, coord.tempWriteCache, 1) require.Contains(t, coord.tempWriteCache, common.BigToHash(new(big.Int).SetUint64(4))) } + +func TestNewClosesReplayWritersOnReplayError(t *testing.T) { + dir := t.TempDir() + wal, err := parquet.NewWAL(filepath.Join(dir, "parquet-wal")) + require.NoError(t, err) + require.NoError(t, wal.Write(parquet.WALEntry{BlockNumber: 1, Receipts: [][]byte{{1}}})) + require.NoError(t, wal.Write(parquet.WALEntry{BlockNumber: 2, Receipts: [][]byte{{2}}})) + require.NoError(t, wal.Close()) + + calls := 0 + _, err = New(parquet.StoreConfig{ + DBDirectory: dir, + MaxBlocksPerFile: 4, + WALConverter: func(blockNumber uint64, receiptBytes []byte, logStartIndex uint) (parquet.ReplayReceipt, error) { + calls++ + if calls == 2 { + return parquet.ReplayReceipt{}, errors.New("injected replay failure") + } + return replayConverterForTest(blockNumber, receiptBytes, logStartIndex) + }, + }) + require.ErrorContains(t, err, "injected replay failure") + require.Equal(t, 2, calls) + + reader, err := NewReaderWithMaxBlocksPerFile(dir, 4) + require.NoError(t, err) + defer func() { require.NoError(t, reader.Close()) }() + + ctx := context.Background() + _, err = reader.QueryReceiptByTxHash(ctx, []string{filepath.Join(dir, "receipts_0.parquet")}, common.BigToHash(new(big.Int).SetUint64(1))) + require.NoError(t, err) + _, err = reader.QueryLogs(ctx, []string{filepath.Join(dir, "logs_0.parquet")}, parquet.LogFilter{}) + require.NoError(t, err) +}