base · BrianBland · May 5, 2026
diff --git a/go.mod b/go.mod
@@ -33,6 +33,7 @@ require (
 	github.com/hashicorp/golang-lru/v2 v2.0.7
 	github.com/hashicorp/raft v1.7.3
 	github.com/hashicorp/raft-boltdb/v2 v2.3.1
+	github.com/hashicorp/raft-mdb v0.0.0-20260220095904-29a69d9a0225
 	github.com/holiman/uint256 v1.3.2
 	github.com/ipfs/go-datastore v0.6.0
 	github.com/ipfs/go-ds-leveldb v0.5.0
@@ -74,10 +75,12 @@ require (
 )
 
 require (
+	github.com/armon/gomdb v0.0.0-20180202201627-75f545a47e89 // indirect
 	github.com/fatih/color v1.18.0 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/grafana/pyroscope-go v1.2.7 // indirect
 	github.com/grafana/pyroscope-go/godeltaprof v0.1.9 // indirect
+	github.com/hashicorp/go-msgpack v0.5.5 // indirect
 	golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 // indirect
 )
 
@@ -155,7 +158,7 @@ require (
 	github.com/graph-gophers/graphql-go v1.3.0 // indirect
 	github.com/hashicorp/errwrap v1.1.0 // indirect
 	github.com/hashicorp/go-bexpr v0.1.11 // indirect
-	github.com/hashicorp/go-hclog v1.6.2 // indirect
+	github.com/hashicorp/go-hclog v1.6.2
 	github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
 	github.com/hashicorp/go-metrics v0.5.4 // indirect
 	github.com/hashicorp/go-msgpack/v2 v2.1.2 // indirect

diff --git a/go.sum b/go.sum
diff --git a/op-conductor/README.md b/op-conductor/README.md
@@ -20,6 +20,16 @@ The design will provide below guarantees:
 
 For configuration and runbook, please refer to [RUNBOOK.md](./RUNBOOK.md)
 
+For latency and benchmarking notes around large unsafe payload commits, see [PERFORMANCE.md](./PERFORMANCE.md).
+
+The raft storage backend is configurable with `OP_CONDUCTOR_RAFT_BACKEND`. `bbolt` remains the default for rollout safety, and `mdb` enables the faster LMDB-backed log/stable store. The default raft retention settings are tuned for conductor's latest-head-only FSM:
+
+- `OP_CONDUCTOR_RAFT_SNAPSHOT_INTERVAL=1s`
+- `OP_CONDUCTOR_RAFT_SNAPSHOT_THRESHOLD=48`
+- `OP_CONDUCTOR_RAFT_TRAILING_LOGS=32`
+
+If you switch backends, use a fresh per-node storage root such as `/data/raft-mdb/<server-id>` and roll one node at a time.
+
 ## Design
 
 ### Architecture

diff --git a/op-conductor/RUNBOOK.md b/op-conductor/RUNBOOK.md
@@ -17,6 +17,8 @@ OP_NODE_CONDUCTOR_RPC=<conductor-rpc-endpoint> # for example http://conductor:85
 # prefix for the server id, used to identify the server in the raft cluster
 RAFT_SERVER_ID_PREFIX=<prefix-for-server-id> # for example, sequencer-1, sequencer-2, etc
 OP_CONDUCTOR_RAFT_STORAGE_DIR=<raft-storage-dir>
+OP_CONDUCTOR_RAFT_BACKEND=bbolt|mdb # defaults to bbolt; use mdb for the faster LMDB-backed store
+OP_CONDUCTOR_RAFT_MDB_MAX_SIZE=<bytes> # only used with raft backend mdb, defaults to 1 GiB
 OP_CONDUCTOR_RPC_ADDR=<rpc-address> # for example, 0.0.0.0
 OP_CONDUCTOR_RPC_PORT=<rpc-port> # for example, 8545
 OP_CONDUCTOR_METRICS_ENABLED=true/false
@@ -31,8 +33,14 @@ OP_CONDUCTOR_HEALTHCHECK_INTERVAL=<healthcheck-interval> # in seconds
 OP_CONDUCTOR_HEALTHCHECK_UNSAFE_INTERVAL=<unsafe-interval> # Interval allowed between unsafe head and now measured in seconds
 OP_CONDUCTOR_HEALTHCHECK_MIN_PEER_COUNT=<min-peer-count> # minimum number of peers required to be considered healthy
 OP_CONDUCTOR_RAFT_BOOTSTRAP=true/false # set to true if you want to bootstrap the raft cluster
+# conductor-specific raft retention defaults:
+# OP_CONDUCTOR_RAFT_SNAPSHOT_INTERVAL=1s
+# OP_CONDUCTOR_RAFT_SNAPSHOT_THRESHOLD=48
+# OP_CONDUCTOR_RAFT_TRAILING_LOGS=32
 ```
 
+When switching storage backends, do not reuse the old per-node raft directory in place. Use a fresh storage root such as `/data/raft-mdb` and roll one node at a time.
+
 ### How to bootstrap a sequencer cluster from scratch
 
 In normal situations, you probably have a running sequencer already and you want to turn it into a HA cluster. What you need to do in this situation is to:
@@ -66,6 +74,12 @@ For every redeploy, depending on your underlying infrastructure, you need to mak
 2. make sure sequencer is caught up with the rest of the nodes (this step isn't strictly necessary as conductor could handle this, but from a HA perspective, it does not make sense to have a sequencer that is lagging behind to join the cluster to potentially become the leader)
 3. resume conductor after it's caught up with the rest of the nodes so that conductor can start managing the sequencer
 
+If you are changing `OP_CONDUCTOR_RAFT_BACKEND`, treat the rollout as a node replacement:
+
+1. use a fresh local raft storage directory for the new backend
+2. roll followers first, then the leader
+3. never switch multiple nodes at once
+
 ### Disaster recovery
 
 Whenever there are a disaster situation that you see no route to have 2 healthy conductor in the cluster communicating with each other, you need to manually intervene to resume sequencing. The steps are as follows:

diff --git a/op-conductor/conductor/config.go b/op-conductor/conductor/config.go
@@ -9,6 +9,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/urfave/cli/v2"
 
+	"github.com/ethereum-optimism/optimism/op-conductor/consensus"
 	"github.com/ethereum-optimism/optimism/op-conductor/flags"
 	oplog "github.com/ethereum-optimism/optimism/op-service/log"
 	opmetrics "github.com/ethereum-optimism/optimism/op-service/metrics"
@@ -36,6 +37,12 @@ type Config struct {
 	// RaftStorageDir is the directory to store raft data.
 	RaftStorageDir string
 
+	// RaftBackend selects the local raft storage backend.
+	RaftBackend string
+
+	// RaftMDBMaxSize is the LMDB map size used by the mdb raft backend.
+	RaftMDBMaxSize uint64
+
 	// RaftBootstrap is true if this node should bootstrap a new raft cluster.
 	RaftBootstrap bool
 
@@ -117,6 +124,16 @@ func (c *Config) Check() error {
 	if c.RaftStorageDir == "" {
 		return fmt.Errorf("missing raft storage directory")
 	}
+	raftBackend := c.RaftBackend
+	if raftBackend == "" {
+		raftBackend = consensus.DefaultRaftBackend
+	}
+	if !consensus.ValidRaftBackend(raftBackend) {
+		return fmt.Errorf("invalid raft backend %q", c.RaftBackend)
+	}
+	if raftBackend == consensus.RaftBackendMDB && c.RaftMDBMaxSize == 0 {
+		return fmt.Errorf("invalid raft mdb max size: must be greater than zero")
+	}
 	if c.NodeRPC == "" {
 		return fmt.Errorf("missing node RPC")
 	}
@@ -167,6 +184,8 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) {
 		RaftBootstrap:                 ctx.Bool(flags.RaftBootstrap.Name),
 		RaftServerID:                  ctx.String(flags.RaftServerID.Name),
 		RaftStorageDir:                ctx.String(flags.RaftStorageDir.Name),
+		RaftBackend:                   ctx.String(flags.RaftBackend.Name),
+		RaftMDBMaxSize:                ctx.Uint64(flags.RaftMDBMaxSize.Name),
 		RaftSnapshotInterval:          ctx.Duration(flags.RaftSnapshotInterval.Name),
 		RaftSnapshotThreshold:         ctx.Uint64(flags.RaftSnapshotThreshold.Name),
 		RaftTrailingLogs:              ctx.Uint64(flags.RaftTrailingLogs.Name),

diff --git a/op-conductor/conductor/config_test.go b/op-conductor/conductor/config_test.go
@@ -1,9 +1,17 @@
 package conductor
 
 import (
+	"flag"
 	"testing"
+	"time"
 
+	"github.com/ethereum/go-ethereum/log"
 	"github.com/stretchr/testify/require"
+	"github.com/urfave/cli/v2"
+
+	"github.com/ethereum-optimism/optimism/op-conductor/consensus"
+	"github.com/ethereum-optimism/optimism/op-conductor/flags"
+	"github.com/ethereum-optimism/optimism/op-service/testlog"
 )
 
 func TestConfigCheckRollupBoostAndNextMutuallyExclusive(t *testing.T) {
@@ -23,3 +31,64 @@ func TestConfigCheckRollupBoostAndNextMutuallyExclusive(t *testing.T) {
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "only one of rollup-boost or rollup-boost next healthchecks can be enabled")
 }
+
+func TestConfigCheckRejectsInvalidRaftBackend(t *testing.T) {
+	cfg := validConfig()
+	cfg.RaftBackend = "badger"
+
+	err := cfg.Check()
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "invalid raft backend")
+}
+
+func TestConfigCheckRequiresMDBMaxSize(t *testing.T) {
+	cfg := validConfig()
+	cfg.RaftBackend = consensus.RaftBackendMDB
+	cfg.RaftMDBMaxSize = 0
+
+	err := cfg.Check()
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "invalid raft mdb max size")
+}
+
+func TestNewConfigAppliesRaftDefaults(t *testing.T) {
+	flagSet := flag.NewFlagSet("op-conductor", flag.ContinueOnError)
+	for _, f := range flags.Flags {
+		require.NoError(t, f.Apply(flagSet))
+	}
+
+	require.NoError(t, flagSet.Set(flags.ConsensusAddr.Name, "127.0.0.1"))
+	require.NoError(t, flagSet.Set(flags.ConsensusPort.Name, "50050"))
+	require.NoError(t, flagSet.Set(flags.RaftServerID.Name, "server-1"))
+	require.NoError(t, flagSet.Set(flags.RaftStorageDir.Name, "/tmp/op-conductor"))
+	require.NoError(t, flagSet.Set(flags.NodeRPC.Name, "http://node.example"))
+	require.NoError(t, flagSet.Set(flags.ExecutionRPC.Name, "http://exec.example"))
+	require.NoError(t, flagSet.Set(flags.HealthCheckInterval.Name, "10"))
+	require.NoError(t, flagSet.Set(flags.HealthCheckUnsafeInterval.Name, "12"))
+	require.NoError(t, flagSet.Set(flags.HealthCheckMinPeerCount.Name, "1"))
+
+	ctx := cli.NewContext(cli.NewApp(), flagSet, nil)
+	cfg, err := NewConfig(ctx, testlog.Logger(t, log.LevelWarn))
+	require.NoError(t, err)
+	require.Equal(t, consensus.DefaultRaftBackend, cfg.RaftBackend)
+	require.EqualValues(t, consensus.DefaultRaftMDBMaxSize, cfg.RaftMDBMaxSize)
+	require.Equal(t, time.Second, cfg.RaftSnapshotInterval)
+	require.Equal(t, uint64(48), cfg.RaftSnapshotThreshold)
+	require.Equal(t, uint64(32), cfg.RaftTrailingLogs)
+}
+
+func validConfig() *Config {
+	return &Config{
+		ConsensusAddr:         "127.0.0.1",
+		ConsensusPort:         9000,
+		RaftServerID:          "server-1",
+		RaftStorageDir:        "/tmp/op-conductor",
+		RaftBackend:           consensus.DefaultRaftBackend,
+		RaftMDBMaxSize:        consensus.DefaultRaftMDBMaxSize,
+		NodeRPC:               "http://node.example",
+		ExecutionRPC:          "http://exec.example",
+		RaftSnapshotInterval:  time.Second,
+		RaftSnapshotThreshold: 48,
+		RaftTrailingLogs:      32,
+	}
+}
diff --git a/op-conductor/conductor/service.go b/op-conductor/conductor/service.go
@@ -184,12 +184,15 @@ func (c *OpConductor) initConsensus(ctx context.Context) error {
 		ListenAddr:         c.cfg.ConsensusAddr,
 		ListenPort:         c.cfg.ConsensusPort,
 		StorageDir:         c.cfg.RaftStorageDir,
+		Backend:            c.cfg.RaftBackend,
+		MDBMaxSize:         c.cfg.RaftMDBMaxSize,
 		Bootstrap:          c.cfg.RaftBootstrap,
 		SnapshotInterval:   c.cfg.RaftSnapshotInterval,
 		SnapshotThreshold:  c.cfg.RaftSnapshotThreshold,
 		TrailingLogs:       c.cfg.RaftTrailingLogs,
 		HeartbeatTimeout:   c.cfg.RaftHeartbeatTimeout,
 		LeaderLeaseTimeout: c.cfg.RaftLeaderLeaseTimeout,
+		Metrics:            c.metrics,
 	}
 	cons, err := consensus.NewRaftConsensus(c.log, raftConsensusConfig)
 	if err != nil {

diff --git a/op-conductor/consensus/iface.go b/op-conductor/consensus/iface.go
@@ -4,6 +4,16 @@ import (
 	"github.com/ethereum-optimism/optimism/op-service/eth"
 )
 
+// ConsensusMetrics defines metrics for consensus commit operations.
+// This is intentionally minimal so that the consensus layer does not
+// depend on the full metrics.Metricer interface.
+type ConsensusMetrics interface {
+	RecordCommitDuration(marshalSec, raftApplySec float64)
+	RecordCommitPayloadSize(payloadBytes float64)
+	RecordFSMApplyDuration(seconds float64)
+	RecordLogStoreDuration(seconds float64)
+}
+
 // ServerSuffrage determines whether a Server in a Configuration gets a vote.
 type ServerSuffrage int