sei-protocol · cody-littley · Apr 1, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/docker/monitornode/dashboards/cryptosim-dashboard.json b/docker/monitornode/dashboards/cryptosim-dashboard.json
diff --git a/sei-cosmos/storev2/rootmulti/store.go b/sei-cosmos/storev2/rootmulti/store.go
@@ -92,7 +92,10 @@ func NewStore(
 		limiter = rate.NewLimiter(rate.Limit(scConfig.HistoricalProofRateLimit), burst)
 	}
 	ctx := context.Background()
-	scStore := composite.NewCompositeCommitStore(ctx, scDir, logger, scConfig)
+	scStore, err := composite.NewCompositeCommitStore(ctx, scDir, logger, scConfig)
+	if err != nil {
+		panic(err)
+	}
 	store := &Store{
 		logger:           logger,
 		scStore:          scStore,

diff --git a/sei-db/common/metrics/buckets.go b/sei-db/common/metrics/buckets.go
@@ -0,0 +1,23 @@
+package metrics
+
+// Shared histogram bucket boundaries for use across the codebase.
+// The OTel defaults are too coarse for meaningful percentile queries in Grafana.
+
+// LatencyBuckets covers 10μs to 5 minutes — wide enough for both fast key
+// lookups and slow compactions/flushes without needing per-metric tuning.
+var LatencyBuckets = []float64{
+	0.00001, 0.000025, 0.00005, 0.0001, 0.00025, 0.0005, // 10μs–500μs
+	0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, // 1ms–50ms
+	0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 300, // 100ms–5min
+}
+
+// ByteSizeBuckets covers 256B to 1GB for data size histograms.
+var ByteSizeBuckets = []float64{
+	256, 1024, 4096, 16384, 65536, 262144, // 256B–256KB
+	1 << 20, 4 << 20, 16 << 20, 64 << 20, 256 << 20, 1 << 30, // 1MB–1GB
+}
+
+// CountBuckets covers 1 to 1M for per-operation step/iteration counts.
+var CountBuckets = []float64{
+	1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 100000, 1000000,
+}
diff --git a/sei-db/common/metrics/phase_timer.go b/sei-db/common/metrics/phase_timer.go
@@ -30,6 +30,7 @@ func NewPhaseTimerFactory(meter metric.Meter, timerName string) *PhaseTimerFacto
 		timerName+"_phase_latency_seconds",
 		metric.WithDescription("Latency per phase (seconds); use for p99, p95, etc."),
 		metric.WithUnit("s"),
+		metric.WithExplicitBucketBoundaries(LatencyBuckets...),
 	)
 	return &PhaseTimerFactory{
 		phaseDurationTotal: phaseDurationTotal,

diff --git a/sei-db/common/threading/adhoc_pool.go b/sei-db/common/threading/adhoc_pool.go
@@ -0,0 +1,19 @@
+package threading
+
+import "context"
+
+var _ Pool = (*adHocPool)(nil)
+
+// adHocPool is a Pool that runs each task in a new goroutine.
+// Intended for use in unit tests or where performance is not important.
+type adHocPool struct{}
+
+// NewAdHocPool creates a Pool that runs each submitted task in a one-off goroutine.
+func NewAdHocPool() Pool {
+	return &adHocPool{}
+}
+
+func (p *adHocPool) Submit(_ context.Context, task func()) error {
+	go task()
+	return nil
+}
diff --git a/sei-db/common/threading/chan_utils.go b/sei-db/common/threading/chan_utils.go
@@ -0,0 +1,32 @@
+package threading
+
+import (
+	"context"
+	"fmt"
+)
+
+// TODO unit test before merge
+
+// Push to a channel, returning an error if the context is cancelled before the value is pushed.
+func InterruptiblePush[T any](ctx context.Context, ch chan T, value T) error {
+	select {
+	case <-ctx.Done():
+		return fmt.Errorf("context cancelled: %w", ctx.Err())
+	case ch <- value:
+		return nil
+	}
+}
+
+// Pull from a channel, returning an error if the context is cancelled before the value is pulled.
+func InterruptiblePull[T any](ctx context.Context, ch <-chan T) (T, error) {
+	var zero T
+	select {
+	case <-ctx.Done():
+		return zero, fmt.Errorf("context cancelled: %w", ctx.Err())
+	case value, ok := <-ch:
+		if !ok {
+			return zero, fmt.Errorf("channel closed")
+		}
+		return value, nil
+	}
+}
diff --git a/sei-db/common/threading/elastic_pool.go b/sei-db/common/threading/elastic_pool.go
@@ -0,0 +1,74 @@
+package threading
+
+import (
+	"context"
+	"fmt"
+)
+
+var _ Pool = (*elasticPool)(nil)
+
+// elasticPool is a pool that guarantees every submitted task begins executing
+// immediately without waiting for other tasks to finish first. It maintains a
+// set of warm workers for goroutine reuse, and spawns temporary goroutines when
+// all warm workers are busy.
+//
+// This is useful when tasks submitted to the pool may depend on other tasks in
+// the same pool. For example, if task A is submitted and then submits task B,
+// and A waits for B to complete, a fixed-size pool may deadlock when all
+// workers are occupied, since task B can never be scheduled. An
+// elastic pool avoids this by ensuring B starts immediately in a temporary
+// goroutine if all workers are busy.
+type elasticPool struct {
+	workQueue chan func()
+}
+
+// NewElasticPool creates a pool with the given number of warm workers. Submitted
+// tasks are handed off to an idle warm worker if one is available, otherwise a
+// temporary goroutine is spawned. Tasks are never queued behind other tasks.
+func NewElasticPool(
+	ctx context.Context,
+	name string,
+	warmWorkers int,
+) Pool {
+	workQueue := make(chan func())
+	ep := &elasticPool{
+		workQueue: workQueue,
+	}
+
+	for i := 0; i < warmWorkers; i++ {
+		go ep.worker()
+	}
+
+	go func() {
+		<-ctx.Done()
+		close(workQueue)
+	}()
+
+	return ep
+}
+
+func (ep *elasticPool) Submit(ctx context.Context, task func()) (err error) {
+	defer func() {
+		if recover() != nil {
+			err = fmt.Errorf("elastic pool is shut down")
+		}
+	}()
+
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case ep.workQueue <- task:
+		return nil
+	default:
+		// We hit this case when all workers are busy. Under standard operation, this should
+		// be fairly rare, but it's not catastrophic if it happens.
+		go task()
+		return nil
+	}
+}
+
+func (ep *elasticPool) worker() {
+	for task := range ep.workQueue {
+		task()
+	}
+}
diff --git a/sei-db/common/threading/fixed_pool.go b/sei-db/common/threading/fixed_pool.go
@@ -0,0 +1,72 @@
+package threading
+
+import (
+	"context"
+	"fmt"
+)
+
+var _ Pool = (*fixedPool)(nil)
+
+// fixedPool is a pool of workers that can be used to execute tasks concurrently.
+// More efficient than spawning large numbers of short lived goroutines.
+type fixedPool struct {
+	workQueue chan func()
+}
+
+// TODO add metrics!
+// TODO unit test before merging!
+
+// Create a new work pool.
+func NewFixedPool(
+	// The work pool shuts down when the context is done.
+	ctx context.Context,
+	// The name of the work pool. Used for metrics.
+	name string,
+	// The number of workers to create.
+	workers int,
+	// The size of the work queue. Once full, Submit will block until a slot is available.
+	queueSize int,
+) Pool {
+
+	workQueue := make(chan func(), queueSize)
+	fp := &fixedPool{
+		workQueue: workQueue,
+	}
+
+	for i := 0; i < workers; i++ {
+		go fp.worker()
+	}
+
+	// Shutdown the work pool when the context is done.
+	go func() {
+		<-ctx.Done()
+		close(workQueue)
+
+		// Handle any remaining tasks in the queue to avoid caller deadlock.
+		for task := range workQueue {
+			task()
+		}
+	}()
+
+	return fp
+}
+
+func (fp *fixedPool) Submit(ctx context.Context, task func()) (err error) {
+	defer func() {
+		if recover() != nil {
+			err = fmt.Errorf("fixed pool is shut down")
+		}
+	}()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case fp.workQueue <- task:
+		return nil
+	}
+}
+
+func (fp *fixedPool) worker() {
+	for task := range fp.workQueue {
+		task()
+	}
+}
diff --git a/sei-db/common/threading/pool.go b/sei-db/common/threading/pool.go
@@ -0,0 +1,9 @@
+package threading
+
+import "context"
+
+// Pool is a pool of workers that can be used to execute tasks concurrently.
+type Pool interface {
+	// Submit submits a task to the pool.
+	Submit(ctx context.Context, task func()) error
+}
diff --git a/sei-db/common/unit/data_units.go b/sei-db/common/unit/data_units.go
@@ -0,0 +1,20 @@
+package unit
+
+const (
+	// KB is the number of bytes in a kilobyte.
+	KB = 1024
+	// MB is the number of bytes in a megabyte.
+	MB = KB * 1024
+	// GB is the number of bytes in a gigabyte.
+	GB = MB * 1024
+	// TB is the number of bytes in a terabyte.
+	TB = GB * 1024
+	// PB is the number of bytes in a petabyte.
+	PB = TB * 1024
+	// EB is the number of bytes in an exabyte.
+	EB = PB * 1024
+	// ZB is the number of bytes in a zettabyte.
+	ZB = EB * 1024
+	// YB is the number of bytes in a yottabyte.
+	YB = ZB * 1024
+)
diff --git a/sei-db/config/sc_config.go b/sei-db/config/sc_config.go
@@ -43,7 +43,7 @@ type StateCommitConfig struct {
 	MemIAVLConfig memiavl.Config
 
 	// FlatKVConfig is the configuration for the FlatKV (EVM) backend
-	FlatKVConfig flatkv.Config
+	FlatKVConfig *flatkv.Config
 
 	// Max concurrent historical proof queries (RPC /store path).
 	HistoricalProofMaxInFlight int `mapstructure:"historical-proof-max-inflight"`

diff --git a/sei-db/db_engine/pebbledb/batch.go b/sei-db/db_engine/pebbledb/batch.go
@@ -1,39 +1,61 @@
 package pebbledb
 
 import (
+	"fmt"
+
 	"github.com/cockroachdb/pebble/v2"
+	"github.com/sei-protocol/sei-chain/sei-db/db_engine/pebbledb/pebblecache"
 	"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
 )
 
 // pebbleBatch wraps a Pebble batch for atomic writes.
 // Important: Callers must call Close() after Commit() to release batch resources,
 // even if Commit() succeeds. Failure to Close() will leak memory.
 type pebbleBatch struct {
-	b *pebble.Batch
+	b     *pebble.Batch
+	cache pebblecache.Cache
+
+	// Writes are tracked so the cache can be updated after a successful commit.
+	pendingCacheUpdates []pebblecache.CacheUpdate
 }
 
 var _ types.Batch = (*pebbleBatch)(nil)
 
-func newPebbleBatch(db *pebble.DB) *pebbleBatch {
-	return &pebbleBatch{b: db.NewBatch()}
+func newPebbleBatch(db *pebble.DB, cache pebblecache.Cache) *pebbleBatch {
+	return &pebbleBatch{b: db.NewBatch(), cache: cache}
 }
 
 func (p *pebbleDB) NewBatch() types.Batch {
-	return newPebbleBatch(p.db)
+	return newPebbleBatch(p.db, p.cache)
 }
 
 func (pb *pebbleBatch) Set(key, value []byte) error {
-	// Durability options are applied on Commit.
+	pb.pendingCacheUpdates = append(pb.pendingCacheUpdates, pebblecache.CacheUpdate{
+		Key:   key,
+		Value: value,
+	})
 	return pb.b.Set(key, value, nil)
 }
 
 func (pb *pebbleBatch) Delete(key []byte) error {
-	// Durability options are applied on Commit.
+	pb.pendingCacheUpdates = append(pb.pendingCacheUpdates, pebblecache.CacheUpdate{
+		Key:      key,
+		IsDelete: true,
+	})
 	return pb.b.Delete(key, nil)
 }
 
 func (pb *pebbleBatch) Commit(opts types.WriteOptions) error {
-	return pb.b.Commit(toPebbleWriteOpts(opts))
+	err := pb.b.Commit(toPebbleWriteOpts(opts))
+	if err != nil {
+		return fmt.Errorf("failed to commit batch: %w", err)
+	}
+	err = pb.cache.BatchSet(pb.pendingCacheUpdates)
+	if err != nil {
+		return fmt.Errorf("failed to set cache: %w", err)
+	}
+	pb.pendingCacheUpdates = nil
+	return nil
 }
 
 func (pb *pebbleBatch) Len() int {
@@ -42,6 +64,7 @@ func (pb *pebbleBatch) Len() int {
 
 func (pb *pebbleBatch) Reset() {
 	pb.b.Reset()
+	pb.pendingCacheUpdates = nil
 }
 
 func (pb *pebbleBatch) Close() error {