diff --git a/app/app.go b/app/app.go index 247d008a4f..dd064167a5 100644 --- a/app/app.go +++ b/app/app.go @@ -705,6 +705,13 @@ func New( if err != nil { panic(fmt.Sprintf("error reading EVM config due to %s", err)) } + if app.evmRPCConfig.TraceBakeEnabled { + traceCache, tcErr := evmkeeper.NewTraceCache(homePath) + if tcErr != nil { + panic(fmt.Sprintf("failed to open trace cache: %s", tcErr)) + } + app.EvmKeeper.SetTraceCache(traceCache) + } app.adminConfig, err = admin.ReadConfig(appOpts) if err != nil { panic(fmt.Sprintf("error reading admin config due to %s", err)) @@ -1063,6 +1070,14 @@ func (app *App) HandlePreCommit(ctx sdk.Context) error { func (app *App) HandleClose() error { var errs []error + // Close trace cache so its WAL is flushed; baker writes use NoSync. + if tc := app.EvmKeeper.TraceCache(); tc != nil { + if err := tc.Close(); err != nil { + logger.Error("failed to close trace cache", "err", err) + errs = append(errs, fmt.Errorf("failed to close trace cache: %w", err)) + } + } + // Close receipt store if app.receiptStore != nil { if err := app.receiptStore.Close(); err != nil { diff --git a/evmrpc/config/config.go b/evmrpc/config/config.go index 59cf3ba709..f328b505a1 100644 --- a/evmrpc/config/config.go +++ b/evmrpc/config/config.go @@ -141,6 +141,32 @@ type Config struct { // EnabledLegacySeiApis lists which gated sei_* and sei2_* JSON-RPC methods are allowed on the EVM HTTP endpoint. // Set in app.toml [evm] as enabled_legacy_sei_apis (see ReadConfig and ConfigTemplate defaults). EnabledLegacySeiApis []string `mapstructure:"enabled_legacy_sei_apis"` + + // TraceBakeEnabled, when true, runs a background worker that re-executes + // each committed block with the configured tracers and stores the + // results to /data/trace_cache. debug_traceTransaction serves + // from cache on hit. Default false; flip on for RPC nodes only. + TraceBakeEnabled bool `mapstructure:"trace_bake_enabled"` + + // TraceBakeWorkers is the number of re-execution goroutines (default 1). + TraceBakeWorkers int `mapstructure:"trace_bake_workers"` + + // TraceBakeQueueSize bounds the in-flight height queue (default 4096). + TraceBakeQueueSize int `mapstructure:"trace_bake_queue_size"` + + // TraceBakeTracers is the list of tracer names to bake per block. + // Default ["callTracer"]. Only the standard named tracers are eligible. + TraceBakeTracers []string `mapstructure:"trace_bake_tracers"` + + // TraceBakeWindowBlocks bounds the rolling cache window: blocks older + // than (latest - this) are pruned. 0 disables pruning. + TraceBakeWindowBlocks int64 `mapstructure:"trace_bake_window_blocks"` + + // TraceBakeBlockResults additionally writes the assembled per-block + // JSON to the cache so debug_traceBlockBy* hits at one PK seek + // instead of N. ~2x storage for callers that primarily trace + // per-block; per-tx hits are unaffected. Default false. + TraceBakeBlockResults bool `mapstructure:"trace_bake_block_results"` } var DefaultConfig = Config{ @@ -178,6 +204,12 @@ var DefaultConfig = Config{ "sei_getEVMAddress", "sei_getCosmosTx", }, + TraceBakeEnabled: false, + TraceBakeWorkers: 1, + TraceBakeQueueSize: 4096, + TraceBakeTracers: []string{"callTracer"}, + TraceBakeWindowBlocks: 0, + TraceBakeBlockResults: false, } const ( @@ -211,6 +243,12 @@ const ( flagWorkerPoolSize = "evm.worker_pool_size" flagWorkerQueueSize = "evm.worker_queue_size" flagEVMLegacySeiApis = "evm.enabled_legacy_sei_apis" + flagTraceBakeEnabled = "evm.trace_bake_enabled" + flagTraceBakeWorkers = "evm.trace_bake_workers" + flagTraceBakeQueueSize = "evm.trace_bake_queue_size" + flagTraceBakeTracers = "evm.trace_bake_tracers" + flagTraceBakeWindowBlocks = "evm.trace_bake_window_blocks" + flagTraceBakeBlockResults = "evm.trace_bake_block_results" ) func ReadConfig(opts servertypes.AppOptions) (Config, error) { @@ -366,6 +404,36 @@ func ReadConfig(opts servertypes.AppOptions) (Config, error) { return cfg, err } } + if v := opts.Get(flagTraceBakeEnabled); v != nil { + if cfg.TraceBakeEnabled, err = cast.ToBoolE(v); err != nil { + return cfg, err + } + } + if v := opts.Get(flagTraceBakeWorkers); v != nil { + if cfg.TraceBakeWorkers, err = cast.ToIntE(v); err != nil { + return cfg, err + } + } + if v := opts.Get(flagTraceBakeQueueSize); v != nil { + if cfg.TraceBakeQueueSize, err = cast.ToIntE(v); err != nil { + return cfg, err + } + } + if v := opts.Get(flagTraceBakeTracers); v != nil { + if cfg.TraceBakeTracers, err = cast.ToStringSliceE(v); err != nil { + return cfg, err + } + } + if v := opts.Get(flagTraceBakeWindowBlocks); v != nil { + if cfg.TraceBakeWindowBlocks, err = cast.ToInt64E(v); err != nil { + return cfg, err + } + } + if v := opts.Get(flagTraceBakeBlockResults); v != nil { + if cfg.TraceBakeBlockResults, err = cast.ToBoolE(v); err != nil { + return cfg, err + } + } return cfg, nil } @@ -521,4 +589,30 @@ worker_pool_size = {{ .EVM.WorkerPoolSize }} # WorkerQueueSize defines the size of the task queue in the worker pool. # Default: 1000 tasks. Set to 0 to use the default. worker_queue_size = {{ .EVM.WorkerQueueSize }} + +# TraceBakeEnabled, when true, runs a background worker that re-executes +# each committed block with the configured tracers and stores the result +# to /data/trace_cache. debug_traceTransaction with a bakeable +# tracer config (callTracer / prestateTracer / flatCallTracer) returns +# from cache on hit. Recommended for RPC nodes only; default false. +trace_bake_enabled = {{ .EVM.TraceBakeEnabled }} + +# Number of re-execution worker goroutines (default 1). +trace_bake_workers = {{ .EVM.TraceBakeWorkers }} + +# Bounded in-flight height queue. Drops on full so consensus never blocks. +trace_bake_queue_size = {{ .EVM.TraceBakeQueueSize }} + +# Which tracers to bake per block; only standard named tracers are eligible. +trace_bake_tracers = [{{- range $i, $t := .EVM.TraceBakeTracers }}{{- if $i }}, {{ end }}"{{ $t }}"{{- end }}] + +# Rolling cache window: prune blocks older than (latest - this). +# 0 disables pruning (cache grows forever). +trace_bake_window_blocks = {{ .EVM.TraceBakeWindowBlocks }} + +# Additionally cache the assembled per-block trace result so +# debug_traceBlockBy* hits at one PK seek instead of N. ~2x storage +# in exchange for ~3x faster block-level trace; per-tx hits unaffected. +# Recommended when block-tracing is the dominant workload. +trace_bake_block_results = {{ .EVM.TraceBakeBlockResults }} ` diff --git a/evmrpc/server.go b/evmrpc/server.go index 91ad926ace..28e5fd97dd 100644 --- a/evmrpc/server.go +++ b/evmrpc/server.go @@ -91,6 +91,16 @@ func NewEVMHTTPServer( ctx := ctxProvider(LatestCtxHeight) txAPI := NewTransactionAPI(tmClient, k, ctxProvider, txConfigProvider, homeDir, ConnectionTypeHTTP, watermarks, globalBlockCache, cacheCreationMutex) debugAPI := NewDebugAPI(tmClient, k, beginBlockKeepers, ctxProvider, txConfigProvider, simulateConfig, app, antehandler, ConnectionTypeHTTP, config, globalBlockCache, cacheCreationMutex, watermarks) + if config.TraceBakeEnabled { + StartTraceBakerForDebugAPI(debugAPI, TraceBakerConfig{ + Workers: config.TraceBakeWorkers, + QueueSize: config.TraceBakeQueueSize, + Tracers: config.TraceBakeTracers, + WindowBlocks: config.TraceBakeWindowBlocks, + CacheBlockResults: config.TraceBakeBlockResults, + TipFn: func() int64 { return ctxProvider(LatestCtxHeight).BlockHeight() }, + }) + } if isPanicOrSyntheticTxFunc == nil { isPanicOrSyntheticTxFunc = func(ctx context.Context, hash common.Hash) (bool, error) { return debugAPI.isPanicOrSyntheticTx(ctx, hash) diff --git a/evmrpc/trace_baker.go b/evmrpc/trace_baker.go new file mode 100644 index 0000000000..eb01f879af --- /dev/null +++ b/evmrpc/trace_baker.go @@ -0,0 +1,322 @@ +package evmrpc + +import ( + "context" + "encoding/json" + "sync" + "sync/atomic" + "time" + + gethtracers "github.com/ethereum/go-ethereum/eth/tracers" + "github.com/ethereum/go-ethereum/rpc" + "github.com/sei-protocol/seilog" + + "github.com/sei-protocol/sei-chain/x/evm/keeper" +) + +var bakerLogger = seilog.NewLogger("evmrpc", "trace-baker") + +// blockTracer is the subset of *gethtracers.API the baker uses; the +// indirection lets tests drive the worker without standing up a real EVM. +type blockTracer interface { + TraceBlockByNumber(ctx context.Context, number rpc.BlockNumber, config *gethtracers.TraceConfig) ([]*gethtracers.TxTraceResult, error) +} + +// TraceBaker re-runs newly committed blocks through the tracer in worker +// goroutines off the consensus path and stores the JSON output into a +// TraceCache. debug_trace* RPCs hit the cache first; on miss they fall +// through to today's on-demand re-execution. Consensus latency is +// unaffected because Enqueue is a non-blocking channel send and all +// re-execution happens on baker goroutines. +type TraceBaker struct { + tracersAPI blockTracer + cache *keeper.TraceCache + tracers []string + bakeTimeout time.Duration + tipFn func() int64 + windowBlocks int64 + pruneInterval time.Duration + cacheBlockResults bool + + queue chan int64 + workers int + + closeOnce sync.Once + done chan struct{} + wg sync.WaitGroup + + dropped uint64 // atomic + baked uint64 // atomic + failed uint64 // atomic +} + +// TraceBakerConfig holds tunable knobs for the baker. +type TraceBakerConfig struct { + // Workers is the number of re-execution goroutines. Default 1. + Workers int + // QueueSize bounds in-flight heights. Default 4096. Drops on full. + QueueSize int + // Tracers names the tracers to bake per block. Default ["callTracer"]. + Tracers []string + // BakeTimeout caps re-execution per block per tracer. Default 60s. + BakeTimeout time.Duration + // TipFn returns the current chain tip; used by catch-up and prune. + // Optional — when nil, both features are skipped. + TipFn func() int64 + // WindowBlocks bounds catch-up backfill and the rolling prune window. + // 0 disables prune; catch-up still runs from last_baked+1 to tip. + WindowBlocks int64 + // PruneInterval is the tick for the prune goroutine. Default 1m. + PruneInterval time.Duration + // CacheBlockResults additionally writes the assembled per-block JSON + // to the "tb/" keyspace so debug_traceBlockBy* hits at one seek + // instead of N. Per-tx "ts/" rows are written either way; this just + // adds a denormalized block row alongside. + CacheBlockResults bool +} + +// StartTraceBakerForDebugAPI wires a TraceBaker against the given DebugAPI's +// tracer surface, registers it on the keeper's TraceCache so EndBlock-driven +// Enqueue calls reach it, and starts the workers. Returns nil if the keeper +// has no TraceCache (the feature is off). +func StartTraceBakerForDebugAPI(api *DebugAPI, cfg TraceBakerConfig) *TraceBaker { + if api == nil { + return nil + } + cache := api.keeper.TraceCache() + if cache == nil { + return nil + } + b := NewTraceBaker(api.tracersAPI, cache, cfg) + cache.SetTraceEnqueuer(b) + b.Start() + return b +} + +// NewTraceBaker constructs a baker. Call Start to launch workers. +func NewTraceBaker(api *gethtracers.API, cache *keeper.TraceCache, cfg TraceBakerConfig) *TraceBaker { + if cfg.Workers <= 0 { + cfg.Workers = 1 + } + if cfg.QueueSize <= 0 { + cfg.QueueSize = 4096 + } + if len(cfg.Tracers) == 0 { + cfg.Tracers = []string{"callTracer"} + } + if cfg.BakeTimeout <= 0 { + cfg.BakeTimeout = 60 * time.Second + } + if cfg.PruneInterval <= 0 { + cfg.PruneInterval = time.Minute + } + return &TraceBaker{ + tracersAPI: api, + cache: cache, + tracers: append([]string(nil), cfg.Tracers...), + bakeTimeout: cfg.BakeTimeout, + tipFn: cfg.TipFn, + windowBlocks: cfg.WindowBlocks, + pruneInterval: cfg.PruneInterval, + cacheBlockResults: cfg.CacheBlockResults, + queue: make(chan int64, cfg.QueueSize), + workers: cfg.Workers, + done: make(chan struct{}), + } +} + +// Start launches the worker goroutines plus, when TipFn is set, a one-shot +// catch-up sweep (from last_baked+1 up to current tip, bounded by +// WindowBlocks) and a periodic prune ticker (when WindowBlocks > 0). +func (b *TraceBaker) Start() { + bakerLogger.Info("trace baker starting", + "workers", b.workers, "queue_size", cap(b.queue), + "tracers", b.tracers, "window_blocks", b.windowBlocks) + for i := 0; i < b.workers; i++ { + b.wg.Add(1) + go b.workerLoop() + } + if b.tipFn != nil { + b.wg.Add(1) + go b.catchUpLoop() + if b.windowBlocks > 0 { + b.wg.Add(1) + go b.pruneLoop() + } + } +} + +// Stop signals workers to drain and exit; blocks until they do. +func (b *TraceBaker) Stop() { + b.closeOnce.Do(func() { + close(b.done) + close(b.queue) + }) + b.wg.Wait() +} + +// Enqueue forwards a height to the worker queue. Non-blocking by design: +// when the queue is full the height is dropped and the corresponding block +// falls through to on-demand re-execution at debug_trace time. Consensus +// latency is unaffected. +func (b *TraceBaker) Enqueue(height int64) { + if b == nil { + return + } + select { + case b.queue <- height: + default: + d := atomic.AddUint64(&b.dropped, 1) + // Log sparsely so a stuck baker doesn't flood the journal. + if d == 1 || d%256 == 0 { + bakerLogger.Info("trace baker queue full; dropping height", + "height", height, "dropped_total", d) + } + } +} + +// DroppedCount returns the cumulative dropped-enqueue count since startup. +func (b *TraceBaker) DroppedCount() uint64 { return atomic.LoadUint64(&b.dropped) } + +// BakedCount returns the cumulative successful (block, tracer) bake count. +func (b *TraceBaker) BakedCount() uint64 { return atomic.LoadUint64(&b.baked) } + +// FailedCount returns the cumulative failed (block, tracer) bake count. +func (b *TraceBaker) FailedCount() uint64 { return atomic.LoadUint64(&b.failed) } + +func (b *TraceBaker) workerLoop() { + defer b.wg.Done() + for { + select { + case <-b.done: + return + case h, ok := <-b.queue: + if !ok { + return + } + b.bakeBlock(h) + } + } +} + +func (b *TraceBaker) bakeBlock(height int64) { + defer func() { + if r := recover(); r != nil { + bakerLogger.Error("trace baker panic", "height", height, "panic", r) + } + }() + for _, name := range b.tracers { + b.bakeBlockOneTracer(height, name) + } +} + +func (b *TraceBaker) bakeBlockOneTracer(height int64, tracer string) { + ctx, cancel := context.WithTimeout(context.Background(), b.bakeTimeout) + defer cancel() + + tracerName := tracer + cfg := &gethtracers.TraceConfig{Tracer: &tracerName} + results, err := b.tracersAPI.TraceBlockByNumber(ctx, rpc.BlockNumber(height), cfg) + if err != nil { + atomic.AddUint64(&b.failed, 1) + bakerLogger.Debug("trace baker block trace failed", + "height", height, "tracer", tracer, "err", err) + return + } + for _, r := range results { + if r == nil || r.Result == nil { + continue + } + bz, err := encodeTraceResult(r.Result) + if err != nil { + bakerLogger.Debug("trace baker encode failed", + "height", height, "tracer", tracer, "tx", r.TxHash.Hex(), "err", err) + continue + } + if err := b.cache.Put(height, tracer, r.TxHash, bz); err != nil { + bakerLogger.Debug("trace baker cache put failed", + "height", height, "tracer", tracer, "tx", r.TxHash.Hex(), "err", err) + continue + } + } + // Skip empty blocks: per-tx assembly returns [] for them at zero cache + // cost, and json.Marshal(nil) would produce "null" which is a format + // mismatch with the live path's []. + if b.cacheBlockResults && len(results) > 0 { + blockBz, err := json.Marshal(results) + if err != nil { + bakerLogger.Debug("trace baker block encode failed", + "height", height, "tracer", tracer, "err", err) + } else if err := b.cache.PutBlock(height, tracer, blockBz); err != nil { + bakerLogger.Debug("trace baker block put failed", + "height", height, "tracer", tracer, "err", err) + } + } + atomic.AddUint64(&b.baked, 1) + if err := b.cache.SetLastBakedHeight(height); err != nil { + bakerLogger.Debug("trace baker last_baked update failed", + "height", height, "tracer", tracer, "err", err) + } +} + +// catchUpLoop bakes any blocks committed since the last successful run. +// Bounded by WindowBlocks so a long-stopped node doesn't try to bake from +// genesis. Exits as soon as it reaches the current tip. +func (b *TraceBaker) catchUpLoop() { + defer b.wg.Done() + last, err := b.cache.LastBakedHeight() + if err != nil || last <= 0 { + return + } + tip := b.tipFn() + if tip <= last { + return + } + from := last + 1 + if b.windowBlocks > 0 && from < tip-b.windowBlocks+1 { + from = tip - b.windowBlocks + 1 + } + bakerLogger.Info("trace baker catch-up", "from", from, "to", tip) + for h := from; h <= tip; h++ { + select { + case <-b.done: + return + default: + } + b.bakeBlock(h) + } +} + +// pruneLoop ticks every PruneInterval and deletes cache rows older than +// (tip - WindowBlocks). One DeleteRange per tick — cheap on pebble. +func (b *TraceBaker) pruneLoop() { + defer b.wg.Done() + ticker := time.NewTicker(b.pruneInterval) + defer ticker.Stop() + for { + select { + case <-b.done: + return + case <-ticker.C: + tip := b.tipFn() + cutoff := tip - b.windowBlocks + if cutoff <= 0 { + continue + } + if err := b.cache.Prune(cutoff); err != nil { + bakerLogger.Debug("trace baker prune failed", "cutoff", cutoff, "err", err) + } + } + } +} + +// encodeTraceResult turns a tracer result (either json.RawMessage already, +// or any json-marshalable value) into bytes for the cache. The geth call +// tracer returns json.RawMessage directly; struct/native tracers return +// typed structs. +func encodeTraceResult(v interface{}) (json.RawMessage, error) { + if raw, ok := v.(json.RawMessage); ok { + return raw, nil + } + return json.Marshal(v) +} diff --git a/evmrpc/trace_baker_test.go b/evmrpc/trace_baker_test.go new file mode 100644 index 0000000000..635894044e --- /dev/null +++ b/evmrpc/trace_baker_test.go @@ -0,0 +1,427 @@ +package evmrpc + +import ( + "context" + "encoding/json" + "errors" + "math/big" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + gethtracers "github.com/ethereum/go-ethereum/eth/tracers" + "github.com/ethereum/go-ethereum/rpc" + "github.com/stretchr/testify/require" + + "github.com/sei-protocol/sei-chain/x/evm/keeper" +) + +// fakeTracerAPI drives the baker with controllable per-call results. +type fakeTracerAPI struct { + mu sync.Mutex + calls int32 + // keyed by height + results map[int64][]*gethtracers.TxTraceResult + errs map[int64]error + // optional: blocks until released, simulates a long bake + gate chan struct{} +} + +func (f *fakeTracerAPI) TraceBlockByNumber(_ context.Context, number rpc.BlockNumber, _ *gethtracers.TraceConfig) ([]*gethtracers.TxTraceResult, error) { + atomic.AddInt32(&f.calls, 1) + f.mu.Lock() + defer f.mu.Unlock() + if f.gate != nil { + <-f.gate + } + if err, ok := f.errs[number.Int64()]; ok { + return nil, err + } + return f.results[number.Int64()], nil +} + +func waitForCount(t *testing.T, fn func() uint64, want uint64) { + t.Helper() + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if fn() >= want { + return + } + time.Sleep(5 * time.Millisecond) + } + t.Fatalf("timed out waiting for count >= %d (got %d)", want, fn()) +} + +func TestTraceBakerBakesAndCaches(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + tx1 := common.HexToHash("0x11") + tx2 := common.HexToHash("0x22") + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 42: { + {TxHash: tx1, Result: json.RawMessage(`{"calls":[1]}`)}, + {TxHash: tx2, Result: json.RawMessage(`{"calls":[2]}`)}, + }, + }, + } + + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 8}) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(42) + waitForCount(t, b.BakedCount, 1) + + v, ok, err := cache.Get(42, "callTracer", tx1) + require.NoError(t, err) + require.True(t, ok) + require.JSONEq(t, `{"calls":[1]}`, string(v)) + + v, ok, err = cache.Get(42, "callTracer", tx2) + require.NoError(t, err) + require.True(t, ok) + require.JSONEq(t, `{"calls":[2]}`, string(v)) +} + +func TestTraceBakerEnqueueIsNonBlocking(t *testing.T) { + // QueueSize=1 + a single worker held on the gate. The first Enqueue + // fills the queue; the second drops without blocking. Consensus + // latency must never depend on baker progress. + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + gate := make(chan struct{}) + api := &fakeTracerAPI{gate: gate, results: map[int64][]*gethtracers.TxTraceResult{}} + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 1}) + b.tracersAPI = api + b.Start() + defer func() { + close(gate) + b.Stop() + }() + + b.Enqueue(1) // worker picks it up, blocks on gate + // give the worker a moment to dequeue + time.Sleep(20 * time.Millisecond) + b.Enqueue(2) // sits in the queue + for i := 0; i < 100; i++ { // any number > buffer must drop + b.Enqueue(int64(i + 3)) + } + require.Greater(t, b.DroppedCount(), uint64(0), + "queue full must drop subsequent Enqueue calls instead of blocking") +} + +func TestTraceBakerErrorBecomesFailedCount(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + api := &fakeTracerAPI{ + errs: map[int64]error{99: errors.New("boom")}, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 8}) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(99) + waitForCount(t, b.FailedCount, 1) + require.Equal(t, uint64(0), b.BakedCount(), "errors should not count as baked") +} + +func TestTraceBakerSkipsNilOrErroredTxResults(t *testing.T) { + // Tracer per-tx errors come back as TxTraceResult{Error:..., Result:nil}. + // The baker must skip those without crashing or caching empty values. + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + tx := common.HexToHash("0xab") + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 7: { + nil, + {TxHash: common.HexToHash("0xff"), Result: nil, Error: "trace failed"}, + {TxHash: tx, Result: json.RawMessage(`{"ok":1}`)}, + }, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 8}) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(7) + waitForCount(t, b.BakedCount, 1) + + v, ok, err := cache.Get(7, "callTracer", tx) + require.NoError(t, err) + require.True(t, ok) + require.JSONEq(t, `{"ok":1}`, string(v)) + + _, ok, _ = cache.Get(7, "callTracer", common.HexToHash("0xff")) + require.False(t, ok, "errored tx should not be cached") +} + +func TestTraceBakerMultipleTracers(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + tx := common.HexToHash("0x77") + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 3: {{TxHash: tx, Result: json.RawMessage(`{"v":1}`)}}, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{ + Workers: 1, + QueueSize: 8, + Tracers: []string{"callTracer", "prestateTracer"}, + }) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(3) + waitForCount(t, b.BakedCount, 2) + + for _, name := range []string{"callTracer", "prestateTracer"} { + v, ok, err := cache.Get(3, name, tx) + require.NoError(t, err) + require.True(t, ok, "tracer %s should be cached", name) + require.JSONEq(t, `{"v":1}`, string(v)) + } +} + +func TestTraceBakerLastBakedHeightAdvances(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 3: {{TxHash: common.HexToHash("0x1"), Result: json.RawMessage(`{}`)}}, + 5: {{TxHash: common.HexToHash("0x2"), Result: json.RawMessage(`{}`)}}, + 7: {{TxHash: common.HexToHash("0x3"), Result: json.RawMessage(`{}`)}}, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 8}) + b.tracersAPI = api + b.Start() + defer b.Stop() + + for _, h := range []int64{3, 5, 7} { + b.Enqueue(h) + } + waitForCount(t, b.BakedCount, 3) + + got, err := cache.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(7), got, "last_baked_height must advance to the highest baked height") +} + +func TestTraceBakerCatchUpFromLastBaked(t *testing.T) { + // Persist last_baked=5; tip=8; baker should bake heights 6, 7, 8. + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + require.NoError(t, cache.SetLastBakedHeight(5)) + + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 6: {{TxHash: common.HexToHash("0x6"), Result: json.RawMessage(`{}`)}}, + 7: {{TxHash: common.HexToHash("0x7"), Result: json.RawMessage(`{}`)}}, + 8: {{TxHash: common.HexToHash("0x8"), Result: json.RawMessage(`{}`)}}, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{ + Workers: 1, + QueueSize: 8, + TipFn: func() int64 { return 8 }, + }) + b.tracersAPI = api + b.Start() + defer b.Stop() + + waitForCount(t, b.BakedCount, 3) + got, err := cache.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(8), got) +} + +func TestTraceBakerCatchUpBoundedByWindow(t *testing.T) { + // last_baked=5, tip=100, window=10 — catch-up must start from tip-window+1 + // (=91), not from 6, so a long-stopped node doesn't burn forever. + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + require.NoError(t, cache.SetLastBakedHeight(5)) + + results := map[int64][]*gethtracers.TxTraceResult{} + for h := int64(1); h <= 100; h++ { + results[h] = []*gethtracers.TxTraceResult{ + {TxHash: common.BigToHash(big.NewInt(h)), Result: json.RawMessage(`{}`)}, + } + } + api := &fakeTracerAPI{results: results} + b := NewTraceBaker(nil, cache, TraceBakerConfig{ + Workers: 1, + QueueSize: 8, + WindowBlocks: 10, + TipFn: func() int64 { return 100 }, + }) + b.tracersAPI = api + b.Start() + defer b.Stop() + + // Window=10, tip=100 → catch-up bakes 91..100 (10 blocks). + waitForCount(t, b.BakedCount, 10) + require.Less(t, atomic.LoadInt32(&api.calls), int32(20), + "window-bounded catch-up must not bake the whole 1..100 range") +} + +func TestTraceBakerPruneLoopRemovesOldRows(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + for h := int64(1); h <= 5; h++ { + require.NoError(t, cache.Put(h, "callTracer", common.HexToHash("0xab"), json.RawMessage(`"x"`))) + } + + api := &fakeTracerAPI{results: map[int64][]*gethtracers.TxTraceResult{}} + b := NewTraceBaker(nil, cache, TraceBakerConfig{ + Workers: 1, + QueueSize: 1, + WindowBlocks: 2, + TipFn: func() int64 { return 5 }, + PruneInterval: 25 * time.Millisecond, + }) + b.tracersAPI = api + b.Start() + defer b.Stop() + + // Wait for prune to run at least once. Tip=5, window=2 → cutoff=3 → rows + // for heights 1 and 2 should be deleted; 3, 4, 5 must remain. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + _, ok1, _ := cache.Get(1, "callTracer", common.HexToHash("0xab")) + _, ok2, _ := cache.Get(2, "callTracer", common.HexToHash("0xab")) + if !ok1 && !ok2 { + break + } + time.Sleep(10 * time.Millisecond) + } + for _, h := range []int64{1, 2} { + _, ok, err := cache.Get(h, "callTracer", common.HexToHash("0xab")) + require.NoError(t, err) + require.False(t, ok, "height %d should be pruned", h) + } + for _, h := range []int64{3, 4, 5} { + _, ok, err := cache.Get(h, "callTracer", common.HexToHash("0xab")) + require.NoError(t, err) + require.True(t, ok, "height %d should remain", h) + } +} + +func TestTraceBakerWritesBlockResultWhenEnabled(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + tx1 := common.HexToHash("0x11") + tx2 := common.HexToHash("0x22") + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 42: { + {TxHash: tx1, Result: json.RawMessage(`{"a":1}`)}, + {TxHash: tx2, Result: json.RawMessage(`{"a":2}`)}, + }, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{ + Workers: 1, + QueueSize: 8, + CacheBlockResults: true, + }) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(42) + waitForCount(t, b.BakedCount, 1) + + // Per-tx rows still written (foundation; per-tx reads must keep working). + for _, tx := range []common.Hash{tx1, tx2} { + _, ok, err := cache.Get(42, "callTracer", tx) + require.NoError(t, err) + require.True(t, ok, "per-tx row for %s missing", tx.Hex()) + } + + // Block row written and contains both tx results in order. + bz, ok, err := cache.GetBlock(42, "callTracer") + require.NoError(t, err) + require.True(t, ok, "block row missing — CacheBlockResults didn't take effect") + require.Contains(t, string(bz), `"txHash":"0x0000000000000000000000000000000000000000000000000000000000000011"`) + require.Contains(t, string(bz), `"txHash":"0x0000000000000000000000000000000000000000000000000000000000000022"`) +} + +func TestTraceBakerSkipsBlockResultByDefault(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + tx := common.HexToHash("0xab") + api := &fakeTracerAPI{ + results: map[int64][]*gethtracers.TxTraceResult{ + 3: {{TxHash: tx, Result: json.RawMessage(`{}`)}}, + }, + } + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 1, QueueSize: 8}) + b.tracersAPI = api + b.Start() + defer b.Stop() + + b.Enqueue(3) + waitForCount(t, b.BakedCount, 1) + + // Per-tx row written... + _, ok, _ := cache.Get(3, "callTracer", tx) + require.True(t, ok) + // ...but the block row is NOT, since CacheBlockResults defaulted false. + _, ok, _ = cache.GetBlock(3, "callTracer") + require.False(t, ok, "block row must not be written when CacheBlockResults is off") +} + +func TestTraceBakerStopDrainsAndCleansUp(t *testing.T) { + cache, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer cache.Close() + + api := &fakeTracerAPI{results: map[int64][]*gethtracers.TxTraceResult{}} + b := NewTraceBaker(nil, cache, TraceBakerConfig{Workers: 2, QueueSize: 4}) + b.tracersAPI = api + b.Start() + for i := int64(0); i < 4; i++ { + b.Enqueue(i) + } + // Stop must return after the workers drain — no goroutine leak. + done := make(chan struct{}) + go func() { + b.Stop() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("baker.Stop() did not return") + } +} diff --git a/evmrpc/trace_cache_reader_test.go b/evmrpc/trace_cache_reader_test.go new file mode 100644 index 0000000000..225e9c737f --- /dev/null +++ b/evmrpc/trace_cache_reader_test.go @@ -0,0 +1,126 @@ +package evmrpc + +import ( + "encoding/json" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/eth/tracers" + "github.com/stretchr/testify/require" + + "github.com/sei-protocol/sei-chain/x/evm/keeper" +) + +func TestBakeableTracerName(t *testing.T) { + str := func(s string) *string { return &s } + cases := []struct { + name string + cfg *tracers.TraceConfig + want string + }{ + {"nil config (struct logger) — not bakeable", nil, ""}, + {"empty config (struct logger) — not bakeable", &tracers.TraceConfig{}, ""}, + {"callTracer plain — bakeable", &tracers.TraceConfig{Tracer: str("callTracer")}, "callTracer"}, + {"prestateTracer plain — bakeable", &tracers.TraceConfig{Tracer: str("prestateTracer")}, "prestateTracer"}, + {"flatCallTracer plain — bakeable", &tracers.TraceConfig{Tracer: str("flatCallTracer")}, "flatCallTracer"}, + { + // TracerConfig isn't part of the cache key, so any custom config + // makes the call un-bakeable — defensive against false hits. + "callTracer with TracerConfig — not bakeable", + &tracers.TraceConfig{Tracer: str("callTracer"), TracerConfig: json.RawMessage(`{"withLog":true}`)}, + "", + }, + {"unknown named tracer — not bakeable", &tracers.TraceConfig{Tracer: str("noopTracer")}, ""}, + {"raw JS tracer — not bakeable", &tracers.TraceConfig{Tracer: str("function() { ... }")}, ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, bakeableTracerName(tc.cfg)) + }) + } +} + +func TestBlockTraceCacheGet(t *testing.T) { + c, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + tx1 := common.HexToHash("0x11") + tx2 := common.HexToHash("0x22") + tx3 := common.HexToHash("0x33") + str := func(s string) *string { return &s } + cfg := &tracers.TraceConfig{Tracer: str("callTracer")} + + require.NoError(t, c.Put(5, "callTracer", tx1, json.RawMessage(`{"a":1}`))) + require.NoError(t, c.Put(5, "callTracer", tx2, json.RawMessage(`{"a":2}`))) + + t.Run("all txs cached -> returns assembled list", func(t *testing.T) { + got, ok := blockTraceCacheGet(c, 5, []common.Hash{tx1, tx2}, cfg) + require.True(t, ok) + require.Len(t, got, 2) + require.Equal(t, tx1, got[0].TxHash) + require.Equal(t, tx2, got[1].TxHash) + require.JSONEq(t, `{"a":1}`, string(got[0].Result.(json.RawMessage))) + require.JSONEq(t, `{"a":2}`, string(got[1].Result.(json.RawMessage))) + }) + + t.Run("any miss -> falls through", func(t *testing.T) { + got, ok := blockTraceCacheGet(c, 5, []common.Hash{tx1, tx2, tx3}, cfg) + require.False(t, ok, "tx3 missing — must report miss so caller falls back to live trace") + require.Nil(t, got) + }) + + t.Run("nil cache -> miss", func(t *testing.T) { + got, ok := blockTraceCacheGet(nil, 5, []common.Hash{tx1}, cfg) + require.False(t, ok) + require.Nil(t, got) + }) + + t.Run("unbakeable tracer config -> miss without touching cache", func(t *testing.T) { + // Default config (struct logger) is unbakeable; even with rows present + // for the same hash, the helper must not return them. + got, ok := blockTraceCacheGet(c, 5, []common.Hash{tx1}, nil) + require.False(t, ok) + require.Nil(t, got) + }) + + t.Run("empty block -> empty hit", func(t *testing.T) { + got, ok := blockTraceCacheGet(c, 5, []common.Hash{}, cfg) + require.True(t, ok) + require.Empty(t, got) + }) +} + +func TestTryBlockResultCache(t *testing.T) { + c, err := keeper.NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + str := func(s string) *string { return &s } + cfg := &tracers.TraceConfig{Tracer: str("callTracer")} + require.NoError(t, c.PutBlock(7, "callTracer", json.RawMessage(`[{"x":1}]`))) + + t.Run("hit returns the raw block JSON", func(t *testing.T) { + got, ok := tryBlockResultCache(c, 7, cfg) + require.True(t, ok) + require.JSONEq(t, `[{"x":1}]`, string(got.(json.RawMessage))) + }) + + t.Run("miss falls through (caller will try per-tx assembly)", func(t *testing.T) { + got, ok := tryBlockResultCache(c, 8, cfg) + require.False(t, ok) + require.Nil(t, got) + }) + + t.Run("unbakeable config -> miss without touching cache", func(t *testing.T) { + got, ok := tryBlockResultCache(c, 7, nil) + require.False(t, ok) + require.Nil(t, got) + }) + + t.Run("nil cache -> miss", func(t *testing.T) { + got, ok := tryBlockResultCache(nil, 7, cfg) + require.False(t, ok) + require.Nil(t, got) + }) +} diff --git a/evmrpc/tracers.go b/evmrpc/tracers.go index 6c2176ec76..872afadfa7 100644 --- a/evmrpc/tracers.go +++ b/evmrpc/tracers.go @@ -11,6 +11,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" + gethtypes "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/eth/tracers" _ "github.com/ethereum/go-ethereum/eth/tracers/js" // run init()s to register JS tracers _ "github.com/ethereum/go-ethereum/eth/tracers/native" // run init()s to register native tracers @@ -30,6 +31,13 @@ import ( const ( IsPanicCacheSize = 5000 IsPanicCacheTTL = 1 * time.Minute + + // Bakeable tracer names — used both as cache keys and as the trace + // config Tracer value. Kept here so the tracer name appears in one + // place rather than being repeated as a string literal. + callTracerName = "callTracer" + prestateTracerName = "prestateTracer" + flatCallTracerName = "flatCallTracer" ) var errTraceConcurrencyLimit = errors.New("trace request rejected due to concurrency limit: server busy") @@ -181,6 +189,10 @@ func (api *DebugAPI) TraceTransaction(ctx context.Context, hash common.Hash, con startTime := time.Now() defer recordMetricsWithError("debug_traceTransaction", api.connectionType, startTime, returnErr) + if cached, ok := api.tryTraceCache(hash, config); ok { + return cached, nil + } + ctx, done, err := api.prepareTraceContext(ctx) if err != nil { return nil, err @@ -190,6 +202,134 @@ func (api *DebugAPI) TraceTransaction(ctx context.Context, hash common.Hash, con return api.tracersAPI.TraceTransaction(ctx, hash, config) } +// tryTraceCache returns the cached trace JSON for hash + config when the +// baker has already produced one. Misses (no cache, unbakeable tracer +// config, missing receipt, or absent row) fall through silently to the +// caller's existing path. +func (api *DebugAPI) tryTraceCache(hash common.Hash, config *tracers.TraceConfig) (interface{}, bool) { + cache := api.keeper.TraceCache() + if cache == nil { + return nil, false + } + name := bakeableTracerName(config) + if name == "" { + return nil, false + } + sdkctx := api.ctxProvider(LatestCtxHeight) + receipt, err := api.keeper.GetReceipt(sdkctx, hash) + if err != nil || receipt == nil { + return nil, false + } + bz, ok, err := cache.Get(int64(receipt.BlockNumber), name, hash) //nolint:gosec + if err != nil || !ok { + return nil, false + } + logger.Debug("trace cache hit", "tracer", name, "tx", hash.Hex(), "block", receipt.BlockNumber) + return bz, true +} + +// blockTraceCacheGet returns cached results for every tx hash at height, +// or (nil, false) if any tx is missing — caller falls through to live trace. +func blockTraceCacheGet(cache *keeper.TraceCache, height int64, txHashes []common.Hash, config *tracers.TraceConfig) ([]*tracers.TxTraceResult, bool) { + if cache == nil { + return nil, false + } + name := bakeableTracerName(config) + if name == "" { + return nil, false + } + out := make([]*tracers.TxTraceResult, 0, len(txHashes)) + for _, h := range txHashes { + bz, ok, err := cache.Get(height, name, h) + if err != nil || !ok { + return nil, false + } + out = append(out, &tracers.TxTraceResult{ + TxHash: h, + Result: bz, + }) + } + return out, true +} + +func txHashesOf(txs gethtypes.Transactions) []common.Hash { + out := make([]common.Hash, len(txs)) + for i, tx := range txs { + out[i] = tx.Hash() + } + return out +} + +// tryBlockResultCache returns the denormalized per-block trace JSON when the +// baker has it. Single PK seek; preferred fast path when block_results are +// being written. Returns (nil, false) on miss so the caller can fall back to +// the per-tx assembly path. +func tryBlockResultCache(cache *keeper.TraceCache, height int64, config *tracers.TraceConfig) (interface{}, bool) { + if cache == nil { + return nil, false + } + name := bakeableTracerName(config) + if name == "" { + return nil, false + } + bz, ok, err := cache.GetBlock(height, name) + if err != nil || !ok { + return nil, false + } + return bz, true +} + +func (api *DebugAPI) tryBlockTraceCacheByNumber(ctx context.Context, number rpc.BlockNumber, config *tracers.TraceConfig) (interface{}, bool) { + if api.keeper.TraceCache() == nil || bakeableTracerName(config) == "" { + return nil, false + } + block, _, err := api.backend.BlockByNumber(ctx, number) + if err != nil || block == nil { + return nil, false + } + height := int64(block.NumberU64()) //nolint:gosec + if v, ok := tryBlockResultCache(api.keeper.TraceCache(), height, config); ok { + return v, true + } + return blockTraceCacheGet(api.keeper.TraceCache(), height, txHashesOf(block.Transactions()), config) +} + +func (api *DebugAPI) tryBlockTraceCacheByHash(ctx context.Context, hash common.Hash, config *tracers.TraceConfig) (interface{}, bool) { + if api.keeper.TraceCache() == nil || bakeableTracerName(config) == "" { + return nil, false + } + block, _, err := api.backend.BlockByHash(ctx, hash) + if err != nil || block == nil { + return nil, false + } + height := int64(block.NumberU64()) //nolint:gosec + if v, ok := tryBlockResultCache(api.keeper.TraceCache(), height, config); ok { + return v, true + } + return blockTraceCacheGet(api.keeper.TraceCache(), height, txHashesOf(block.Transactions()), config) +} + +// bakeableTracerName returns the tracer name iff the config is one the baker +// produces (no per-call TracerConfig) and therefore safe to serve from cache. +// Empty string means "fall through to live re-execution". +func bakeableTracerName(config *tracers.TraceConfig) string { + // Default config (no Tracer name) means struct logger, which we don't bake. + if config == nil || config.Tracer == nil { + return "" + } + // Per-tracer config (e.g. prestateTracer with diffMode) isn't part of + // the cache key, so we can't safely serve those from cache. + if len(config.TracerConfig) > 0 { + return "" + } + switch *config.Tracer { + case callTracerName, prestateTracerName, flatCallTracerName: + return *config.Tracer + default: + return "" + } +} + func (api *DebugAPI) AsRawJSON(result interface{}) ([]byte, bool) { switch v := result.(type) { case json.RawMessage: @@ -222,6 +362,12 @@ func (api *SeiDebugAPI) TraceBlockByNumberExcludeTraceFail(ctx context.Context, return nil, fmt.Errorf("block number %d is beyond max lookback of %d", number.Int64(), api.maxBlockLookback) } + if cached, ok := api.tryBlockTraceCacheByNumber(ctx, number, config); ok { + // Cached results are never errored (the baker skips errored traces), + // so the ExcludeTraceFail filter is a no-op here. + return cached, nil + } + if api.shouldUseProfiledBlockTrace(config) { result, returnErr = api.profiledTraceBlockByNumber(ctx, number, config) } else { @@ -254,6 +400,10 @@ func (api *SeiDebugAPI) TraceBlockByHashExcludeTraceFail(ctx context.Context, ha } defer done() + if cached, ok := api.tryBlockTraceCacheByHash(ctx, hash, config); ok { + return cached, nil + } + if api.shouldUseProfiledBlockTrace(config) { result, returnErr = api.profiledTraceBlockByHash(ctx, hash, config) } else { @@ -296,10 +446,10 @@ func (api *DebugAPI) isPanicOrSyntheticTx(ctx context.Context, hash common.Hash) } } - callTracer := "callTracer" + tracerName := callTracerName // This internal trace call is not directly acquiring the DebugAPI's semaphore. tracersResult, err := api.tracersAPI.TraceBlockByNumber(ctx, rpc.BlockNumber(height), &tracers.TraceConfig{ //nolint:gosec - Tracer: &callTracer, + Tracer: &tracerName, }) if err != nil { return false, err @@ -344,6 +494,10 @@ func (api *DebugAPI) TraceBlockByNumber(ctx context.Context, number rpc.BlockNum return nil, fmt.Errorf("block number %d is beyond max lookback of %d", number.Int64(), api.maxBlockLookback) } + if cached, ok := api.tryBlockTraceCacheByNumber(ctx, number, config); ok { + return cached, nil + } + if api.shouldUseProfiledBlockTrace(config) { result, returnErr = api.profiledTraceBlockByNumber(ctx, number, config) } else { @@ -362,6 +516,10 @@ func (api *DebugAPI) TraceBlockByHash(ctx context.Context, hash common.Hash, con } defer done() + if cached, ok := api.tryBlockTraceCacheByHash(ctx, hash, config); ok { + return cached, nil + } + if api.shouldUseProfiledBlockTrace(config) { result, returnErr = api.profiledTraceBlockByHash(ctx, hash, config) } else { diff --git a/x/evm/keeper/abci.go b/x/evm/keeper/abci.go index d1880e8e7b..e555f364da 100644 --- a/x/evm/keeper/abci.go +++ b/x/evm/keeper/abci.go @@ -60,6 +60,14 @@ func (k *Keeper) BeginBlock(ctx sdk.Context) { func (k *Keeper) EndBlock(ctx sdk.Context, height int64, blockGasUsed int64) { defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker) + // Forward the just-finalized PREVIOUS height to the trace baker. By + // the time EndBlock(N) fires, N itself isn't yet "safe latest" for + // trace queries (the indexer needs one more tick), so we always bake + // height-1 — guaranteed available. Skipped during tracing (re-entry + // guard) and on the genesis tick where height-1 wouldn't exist. + if !ctx.IsTracing() && height > 1 { + k.traceCache.Enqueue(height - 1) + } // TODO: remove after all TxHashes have been removed k.RemoveFirstNTxHashes(ctx, DefaultTxHashesToRemove) diff --git a/x/evm/keeper/keeper.go b/x/evm/keeper/keeper.go index f9cc564187..e9724dabf0 100644 --- a/x/evm/keeper/keeper.go +++ b/x/evm/keeper/keeper.go @@ -93,6 +93,11 @@ type Keeper struct { customPrecompiles map[common.Address]putils.VersionedPrecompiles latestCustomPrecompiles map[common.Address]vm.PrecompiledContract latestUpgrade string + + // traceCache, when non-nil, provides cached debug_trace results and + // forwards committed-block heights to the registered baker. nil-safe: + // EndBlock and reader paths skip when unset. + traceCache *TraceCache } type AddressNoncePair struct { @@ -157,6 +162,12 @@ func NewKeeper( return k } +// SetTraceCache wires a trace cache onto the keeper. Pass nil to disable. +func (k *Keeper) SetTraceCache(c *TraceCache) { k.traceCache = c } + +// TraceCache returns the keeper's trace cache (may be nil). +func (k *Keeper) TraceCache() *TraceCache { return k.traceCache } + func (k *Keeper) SetCustomPrecompiles(cp map[common.Address]putils.VersionedPrecompiles, latestUpgrade string) { k.customPrecompiles = cp k.latestUpgrade = latestUpgrade diff --git a/x/evm/keeper/trace_cache.go b/x/evm/keeper/trace_cache.go new file mode 100644 index 0000000000..82bcc63286 --- /dev/null +++ b/x/evm/keeper/trace_cache.go @@ -0,0 +1,238 @@ +package keeper + +import ( + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "path/filepath" + "sync" + + "github.com/cockroachdb/pebble/v2" + "github.com/ethereum/go-ethereum/common" +) + +// TraceCache stores pre-computed debug_trace results in a dedicated pebble db +// so writes don't share LSM with the chain state. Key shape: +// +// "ts/" || height(BE,8) || tracerLen(1) || tracer || txHash(32) +// +// Tx hashes are globally unique on this chain, so (height, tracer, txHash) is +// sufficient. height is leading so a single range delete prunes a window. +type TraceCache struct { + db *pebble.DB + + enqMu sync.Mutex + enqueuer TraceEnqueuer +} + +const ( + traceCachePrefix = "ts/" // per-tx: ts/// + traceCacheBlockPrefix = "tb/" // per-block: tb// + traceCacheLastBakedKy = "meta/last_baked_height" +) + +// NewTraceCache opens (or creates) the trace cache pebble db at +// /data/trace_cache. +func NewTraceCache(homeDir string) (*TraceCache, error) { + dir := filepath.Join(homeDir, "data", "trace_cache") + db, err := pebble.Open(dir, &pebble.Options{}) + if err != nil { + return nil, fmt.Errorf("open trace cache: %w", err) + } + return &TraceCache{db: db}, nil +} + +func (c *TraceCache) Close() error { + if c == nil || c.db == nil { + return nil + } + return c.db.Close() +} + +func traceCacheKey(height int64, tracer string, txHash common.Hash) []byte { + if len(tracer) > 255 { + tracer = tracer[:255] + } + out := make([]byte, 0, len(traceCachePrefix)+8+1+len(tracer)+32) + out = append(out, traceCachePrefix...) + var hb [8]byte + binary.BigEndian.PutUint64(hb[:], uint64(height)) //nolint:gosec // block heights are non-negative + out = append(out, hb[:]...) + out = append(out, byte(len(tracer))) + out = append(out, tracer...) + out = append(out, txHash[:]...) + return out +} + +// Put stores a trace result. Safe to call on a nil receiver (no-op). +func (c *TraceCache) Put(height int64, tracer string, txHash common.Hash, value json.RawMessage) error { + if c == nil || c.db == nil { + return nil + } + return c.db.Set(traceCacheKey(height, tracer, txHash), value, pebble.NoSync) +} + +// traceCacheBlockKey builds the per-block key. Same height/tracer encoding +// as the per-tx key, just a different prefix and no txHash suffix. +func traceCacheBlockKey(height int64, tracer string) []byte { + if len(tracer) > 255 { + tracer = tracer[:255] + } + out := make([]byte, 0, len(traceCacheBlockPrefix)+8+1+len(tracer)) + out = append(out, traceCacheBlockPrefix...) + var hb [8]byte + binary.BigEndian.PutUint64(hb[:], uint64(height)) //nolint:gosec // block heights are non-negative + out = append(out, hb[:]...) + out = append(out, byte(len(tracer))) + out = append(out, tracer...) + return out +} + +// PutBlock stores the assembled per-block trace result (JSON-marshaled +// []*TxTraceResult) so block-level reads are a single PK seek instead of N +// per-tx seeks. Safe on a nil receiver. +func (c *TraceCache) PutBlock(height int64, tracer string, value json.RawMessage) error { + if c == nil || c.db == nil { + return nil + } + return c.db.Set(traceCacheBlockKey(height, tracer), value, pebble.NoSync) +} + +// GetBlock returns the cached per-block result, or (nil, false, nil) on miss. +// Safe on a nil receiver. +func (c *TraceCache) GetBlock(height int64, tracer string) (json.RawMessage, bool, error) { + if c == nil || c.db == nil { + return nil, false, nil + } + val, closer, err := c.db.Get(traceCacheBlockKey(height, tracer)) + if err != nil { + if errors.Is(err, pebble.ErrNotFound) { + return nil, false, nil + } + return nil, false, fmt.Errorf("trace cache get block: %w", err) + } + out := make(json.RawMessage, len(val)) + copy(out, val) + _ = closer.Close() + return out, true, nil +} + +// Get returns the cached trace, or (nil, false, nil) on miss. Safe on a nil +// receiver (returns miss). +func (c *TraceCache) Get(height int64, tracer string, txHash common.Hash) (json.RawMessage, bool, error) { + if c == nil || c.db == nil { + return nil, false, nil + } + val, closer, err := c.db.Get(traceCacheKey(height, tracer, txHash)) + if err != nil { + if errors.Is(err, pebble.ErrNotFound) { + return nil, false, nil + } + return nil, false, fmt.Errorf("trace cache get: %w", err) + } + out := make(json.RawMessage, len(val)) + copy(out, val) + _ = closer.Close() + return out, true, nil +} + +// SetLastBakedHeight records the highest block height the baker has fully +// processed. Only writes when h is strictly greater than the stored value +// (atomic max under a small lock) so out-of-order workers can't roll it +// back. Safe on a nil receiver. +func (c *TraceCache) SetLastBakedHeight(h int64) error { + if c == nil || c.db == nil { + return nil + } + c.enqMu.Lock() + defer c.enqMu.Unlock() + cur, err := c.lastBakedHeightUnlocked() + if err != nil { + return err + } + if h <= cur { + return nil + } + var b [8]byte + binary.BigEndian.PutUint64(b[:], uint64(h)) //nolint:gosec + return c.db.Set([]byte(traceCacheLastBakedKy), b[:], pebble.NoSync) +} + +// LastBakedHeight returns the highest block height the baker has recorded as +// fully processed, or 0 if unset. Safe on a nil receiver. +func (c *TraceCache) LastBakedHeight() (int64, error) { + if c == nil || c.db == nil { + return 0, nil + } + c.enqMu.Lock() + defer c.enqMu.Unlock() + return c.lastBakedHeightUnlocked() +} + +func (c *TraceCache) lastBakedHeightUnlocked() (int64, error) { + val, closer, err := c.db.Get([]byte(traceCacheLastBakedKy)) + if err != nil { + if errors.Is(err, pebble.ErrNotFound) { + return 0, nil + } + return 0, fmt.Errorf("read last_baked_height: %w", err) + } + defer func() { _ = closer.Close() }() + if len(val) != 8 { + return 0, fmt.Errorf("trace cache: invalid last_baked_height length %d", len(val)) + } + return int64(binary.BigEndian.Uint64(val)), nil //nolint:gosec +} + +// Prune deletes per-tx and per-block cache entries with height strictly less +// than belowHeight. Two pebble range deletes — one per prefix — both bounded +// work regardless of how many rows are below. +func (c *TraceCache) Prune(belowHeight int64) error { + if c == nil || c.db == nil || belowHeight <= 0 { + return nil + } + var lo, hi [8]byte + binary.BigEndian.PutUint64(lo[:], 0) + binary.BigEndian.PutUint64(hi[:], uint64(belowHeight)) //nolint:gosec // block heights are non-negative + for _, prefix := range []string{traceCachePrefix, traceCacheBlockPrefix} { + start := append([]byte(prefix), lo[:]...) + end := append([]byte(prefix), hi[:]...) + if err := c.db.DeleteRange(start, end, pebble.NoSync); err != nil { + return err + } + } + return nil +} + +// TraceEnqueuer is implemented by the trace baker; the keeper holds a +// reference (via SetTraceEnqueuer) and forwards block heights to it from +// EndBlock so the baker can re-execute off the consensus path. +type TraceEnqueuer interface { + Enqueue(height int64) +} + +// SetTraceEnqueuer wires a TraceEnqueuer onto the cache so the keeper has a +// single field that owns both. Safe to call multiple times; nil disables. +func (c *TraceCache) SetTraceEnqueuer(e TraceEnqueuer) { + if c == nil { + return + } + c.enqMu.Lock() + defer c.enqMu.Unlock() + c.enqueuer = e +} + +// Enqueue forwards a height to the registered enqueuer if any. Non-blocking +// by contract of the enqueuer; safe on a nil cache. +func (c *TraceCache) Enqueue(height int64) { + if c == nil { + return + } + c.enqMu.Lock() + e := c.enqueuer + c.enqMu.Unlock() + if e != nil { + e.Enqueue(height) + } +} diff --git a/x/evm/keeper/trace_cache_test.go b/x/evm/keeper/trace_cache_test.go new file mode 100644 index 0000000000..753249c0f4 --- /dev/null +++ b/x/evm/keeper/trace_cache_test.go @@ -0,0 +1,250 @@ +package keeper + +import ( + "encoding/json" + "sync/atomic" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/require" +) + +func TestTraceCachePutGet(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + th := common.HexToHash("0x02") + val := json.RawMessage(`{"calls":[]}`) + + require.NoError(t, c.Put(100, "callTracer", th, val)) + + got, ok, err := c.Get(100, "callTracer", th) + require.NoError(t, err) + require.True(t, ok) + require.JSONEq(t, string(val), string(got)) +} + +func TestTraceCacheMiss(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + _, ok, err := c.Get(0, "callTracer", common.Hash{}) + require.NoError(t, err) + require.False(t, ok) +} + +func TestTraceCacheKeyDistinctness(t *testing.T) { + // Different (height, tracer) for the same txHash must round-trip + // independently — no key collisions across the dimensions in the key. + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + th := common.HexToHash("0xbb") + require.NoError(t, c.Put(1, "callTracer", th, json.RawMessage(`{"a":1}`))) + require.NoError(t, c.Put(1, "prestateTracer", th, json.RawMessage(`{"a":2}`))) + require.NoError(t, c.Put(2, "callTracer", th, json.RawMessage(`{"a":3}`))) + + v, ok, _ := c.Get(1, "callTracer", th) + require.True(t, ok) + require.JSONEq(t, `{"a":1}`, string(v)) + v, ok, _ = c.Get(1, "prestateTracer", th) + require.True(t, ok) + require.JSONEq(t, `{"a":2}`, string(v)) + v, ok, _ = c.Get(2, "callTracer", th) + require.True(t, ok) + require.JSONEq(t, `{"a":3}`, string(v)) +} + +func TestTraceCachePruneByHeight(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + th := common.HexToHash("0x02") + for h := int64(1); h <= 5; h++ { + require.NoError(t, c.Put(h, "callTracer", th, json.RawMessage(`"x"`))) + } + + require.NoError(t, c.Prune(3)) + + for _, h := range []int64{1, 2} { + _, ok, err := c.Get(h, "callTracer", th) + require.NoError(t, err) + require.False(t, ok, "height %d should be pruned", h) + } + for _, h := range []int64{3, 4, 5} { + _, ok, err := c.Get(h, "callTracer", th) + require.NoError(t, err) + require.True(t, ok, "height %d should remain", h) + } +} + +func TestTraceCacheNilSafe(t *testing.T) { + // Methods on nil receiver must no-op so callers can use a single + // keeper-held *TraceCache field that's nil when the feature is off. + var c *TraceCache + require.NoError(t, c.Close()) + require.NoError(t, c.Put(1, "x", common.Hash{}, json.RawMessage(`null`))) + _, ok, err := c.Get(1, "x", common.Hash{}) + require.NoError(t, err) + require.False(t, ok) + require.NoError(t, c.Prune(100)) + + c.SetTraceEnqueuer(nil) + c.Enqueue(42) // must not panic +} + +type recordingEnqueuer struct{ heights atomic.Value } + +func (r *recordingEnqueuer) Enqueue(h int64) { + cur, _ := r.heights.Load().([]int64) + r.heights.Store(append(append([]int64(nil), cur...), h)) +} + +func TestTraceCachePutGetBlock(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + val := json.RawMessage(`[{"txHash":"0x1","result":{}}]`) + require.NoError(t, c.PutBlock(42, "callTracer", val)) + + got, ok, err := c.GetBlock(42, "callTracer") + require.NoError(t, err) + require.True(t, ok) + require.JSONEq(t, string(val), string(got)) +} + +func TestTraceCacheBlockKeyDistinctFromTxKey(t *testing.T) { + // Different prefixes must not collide on (height, tracer) — block-row + // reads must not see per-tx rows and vice versa. + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + require.NoError(t, c.Put(1, "callTracer", common.HexToHash("0x1"), json.RawMessage(`{"a":1}`))) + require.NoError(t, c.PutBlock(1, "callTracer", json.RawMessage(`[{"a":2}]`))) + + tx, ok, _ := c.Get(1, "callTracer", common.HexToHash("0x1")) + require.True(t, ok) + require.JSONEq(t, `{"a":1}`, string(tx)) + + blk, ok, _ := c.GetBlock(1, "callTracer") + require.True(t, ok) + require.JSONEq(t, `[{"a":2}]`, string(blk)) +} + +func TestTraceCachePruneCoversBothKeyspaces(t *testing.T) { + // Prune must delete BOTH the per-tx and per-block rows below cutoff. + // If the loop missed a prefix, stale per-block rows would survive. + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + for h := int64(1); h <= 5; h++ { + require.NoError(t, c.Put(h, "callTracer", common.HexToHash("0xab"), json.RawMessage(`"x"`))) + require.NoError(t, c.PutBlock(h, "callTracer", json.RawMessage(`[]`))) + } + + require.NoError(t, c.Prune(3)) + + for _, h := range []int64{1, 2} { + _, ok, _ := c.Get(h, "callTracer", common.HexToHash("0xab")) + require.False(t, ok, "tx row at height %d should be pruned", h) + _, ok, _ = c.GetBlock(h, "callTracer") + require.False(t, ok, "block row at height %d should be pruned", h) + } + for _, h := range []int64{3, 4, 5} { + _, ok, _ := c.Get(h, "callTracer", common.HexToHash("0xab")) + require.True(t, ok, "tx row at height %d should remain", h) + _, ok, _ = c.GetBlock(h, "callTracer") + require.True(t, ok, "block row at height %d should remain", h) + } +} + +func TestTraceCacheBlockNilSafe(t *testing.T) { + var c *TraceCache + require.NoError(t, c.PutBlock(1, "x", json.RawMessage(`null`))) + _, ok, err := c.GetBlock(1, "x") + require.NoError(t, err) + require.False(t, ok) +} + +func TestTraceCacheLastBakedHeight(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + // Initially zero. + got, err := c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(0), got) + + // Round-trip. + require.NoError(t, c.SetLastBakedHeight(42)) + got, err = c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(42), got) + + // Atomic-max: lower values must be ignored so out-of-order workers + // can't roll the watermark backwards. + require.NoError(t, c.SetLastBakedHeight(10)) + got, err = c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(42), got) + + // Higher value advances it. + require.NoError(t, c.SetLastBakedHeight(100)) + got, err = c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(100), got) +} + +func TestTraceCachePruneSparesMetaKey(t *testing.T) { + // Prune is a range delete on "ts/..." keys; the meta key lives outside + // that range and must survive. + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + require.NoError(t, c.Put(1, "callTracer", common.HexToHash("0x1"), nil)) + require.NoError(t, c.SetLastBakedHeight(10)) + + require.NoError(t, c.Prune(1_000_000)) + + got, err := c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(10), got, "meta/last_baked_height must survive Prune") +} + +func TestTraceCacheLastBakedNilSafe(t *testing.T) { + var c *TraceCache + require.NoError(t, c.SetLastBakedHeight(5)) + got, err := c.LastBakedHeight() + require.NoError(t, err) + require.Equal(t, int64(0), got) +} + +func TestTraceCacheEnqueueForwarding(t *testing.T) { + c, err := NewTraceCache(t.TempDir()) + require.NoError(t, err) + defer c.Close() + + rec := &recordingEnqueuer{} + c.SetTraceEnqueuer(rec) + + c.Enqueue(7) + c.Enqueue(8) + + got, _ := rec.heights.Load().([]int64) + require.Equal(t, []int64{7, 8}, got) + + // Unregistering must stop forwarding. + c.SetTraceEnqueuer(nil) + c.Enqueue(9) + got, _ = rec.heights.Load().([]int64) + require.Equal(t, []int64{7, 8}, got) +}