From 246e17d776e6b53d73c714dab1781d9a440a57d1 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 19:54:24 +0100
Subject: [PATCH 01/18] test(fiber-bench): single-sequencer ev-node bench
 against a remote Fibre network
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds tools/celestia-node-fiber/cmd/fiber-bench, a self-contained binary that
spins up an ev-node aggregator wired to a Fibre network with the bridge node
bypassed, then pumps load into the in-mem mempool to measure throughput end-to-
end. Built specifically to flush out the ev-node-vs-Fibre regression where the
combined stack hits ~1k tps despite Fibre alone delivering ~1.3 GiB/s.

Stripped to keep the measurement clean:
- solo sequencer (no based / no forced inclusion)
- aggregator-only (no syncer, no P2P)
- in-mem core.Executor with constant state root (no state-machine cost)
- bridge-bypass cnfiber.Adapter (Upload via consensus gRPC + FSPs only)
- direct InjectTx (no HTTP overhead)

Includes:
- keyring management (test backend, test-only convenience for the bench account)
- Fibre escrow deposit/query helpers so the bench is self-contained
- per-Upload latency instrumentation (p50/p99/mean/max) so we can split
  Fibre-side latency from ev-node submitter serialization
- live periodic stats (tps + MB/s for inj/exec/da_settled streams) and a
  baseline summary at end of run

Build with -tags fibre — without it the celestia-app x/fibre messages aren't
registered in the codec and async pay-for-fibre settlement fails with "unable
to resolve type URL /celestia.fibre.v1.MsgPayForFibre".
---
 .../cmd/fiber-bench/README.md                 | 171 ++++++++
 .../cmd/fiber-bench/escrow.go                 | 165 ++++++++
 .../cmd/fiber-bench/executor.go               | 168 ++++++++
 .../cmd/fiber-bench/fibre.go                  | 139 +++++++
 .../cmd/fiber-bench/instrumented.go           | 138 +++++++
 .../cmd/fiber-bench/keys.go                   | 153 +++++++
 .../cmd/fiber-bench/loader.go                 |  82 ++++
 .../cmd/fiber-bench/main.go                   |  44 ++
 .../cmd/fiber-bench/run-bench.sh              |  74 ++++
 .../cmd/fiber-bench/run.go                    | 349 ++++++++++++++++
 .../cmd/fiber-bench/stats.go                  | 380 ++++++++++++++++++
 .../cmd/fiber-bench/util.go                   |  26 ++
 .../cmd/fiber-bench/yield.go                  |   8 +
 13 files changed, 1897 insertions(+)
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/README.md
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/escrow.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/executor.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/fibre.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/instrumented.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/keys.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/loader.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/main.go
 create mode 100755 tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/run.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/stats.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/util.go
 create mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/yield.go

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/README.md b/tools/celestia-node-fiber/cmd/fiber-bench/README.md
new file mode 100644
index 0000000000..463014e80d
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/README.md
@@ -0,0 +1,171 @@
+# fiber-bench
+
+Single-sequencer ev-node throughput bench against a remote Fibre network.
+
+## What it is
+
+A self-contained binary that spins up an ev-node aggregator wired to a
+remote Fibre network (no bridge node, no P2P, no syncer, no
+state-machine cost) and pumps transactions into its mempool as fast as
+the configured backpressure allows.
+
+The intent is a fail-fast baseline so we can isolate ev-node's
+batching + DA-submit pipeline from everything else when chasing the
+1k tps regression.
+
+### What's stripped out (and why)
+
+| Stripped       | Why                                                        |
+|----------------|------------------------------------------------------------|
+| Bridge node    | Upload only needs consensus gRPC + FSPs.                   |
+| Syncer         | Aggregator-only single-node setup.                         |
+| P2P outbound   | ev-node already disables it when `da.fiber.enabled=true`.  |
+| Forced incl.   | Solo sequencer.                                            |
+| Real state machine | Constant state root — measure ev-node, not state cost. |
+| HTTP tx ingress | Direct `InjectTx`. Removes HTTP from the hot path.        |
+
+## Layout
+
+```
+tools/celestia-node-fiber/cmd/fiber-bench/
+  main.go        cobra root
+  keys.go        cosmos keyring management (test backend)
+  escrow.go      Fibre escrow deposit/query
+  run.go         the bench
+  executor.go    in-mem core.Executor with constant state root
+  loader.go      internal tx pump
+  stats.go       periodic stats line + final baseline summary
+  fibre.go       bridge-bypass cnfiber.Adapter constructor
+  run-bench.sh   convenience wrapper
+```
+
+## Quick start
+
+```sh
+cd tools/celestia-node-fiber
+
+# 1. Build — the `fibre` build tag is REQUIRED so celestia-app's
+# x/fibre messages (MsgPayForFibre, MsgDepositToEscrow) are registered
+# in the codec. Without it the async PFF settlement fails with
+# "unable to resolve type URL /celestia.fibre.v1.MsgPayForFibre".
+go build -tags fibre -o bin/fiber-bench ./cmd/fiber-bench/
+
+# 2. Create the bench key (cosmos keyring, test backend = unencrypted on disk)
+./bin/fiber-bench keys add bench
+#   prints: address: celestia1...
+#           mnemonic: ...
+
+# 3. Top up the printed address with utia on the chain (out of band).
+
+# 4. Deposit into the Fibre escrow
+./bin/fiber-bench escrow deposit \
+    --consensus-grpc 139.59.229.101:9091 \
+    --key-name bench \
+    --amount 50000000        # 50 TIA
+
+# 5. Sanity check
+./bin/fiber-bench escrow query \
+    --consensus-grpc 139.59.229.101:9091 \
+    --key-name bench
+
+# 6. Run the bench
+./bin/fiber-bench run \
+    --consensus-grpc 139.59.229.101:9091 \
+    --chain-id <chain-id-the-server-actually-reports> \
+    --key-name bench \
+    --duration 2m \
+    --workers 32 \
+    --tx-size 200 \
+    --block-time 1s \
+    --batching-strategy immediate
+```
+
+Or use the convenience wrapper:
+
+```sh
+CONSENSUS_GRPC=139.59.229.101:9091 \
+CHAIN_ID=talis-slab-diag \
+    ./cmd/fiber-bench/run-bench.sh 2m 32
+```
+
+## What the run prints
+
+A header, then one line per `--stats-interval` (default 1s):
+
+```
+elapsed   injected     inj/s     exec/s    blocks/s  committed_h  txs/blk blob_bytes pending   drops
+------------------------------------------------------------------------------------------------------
+1s        1452609      1452212   0         0.00      0            0.0     0         0         293116
+2s        1544094      91444     0         0.00      0            0.0     0         0         1007270
+```
+
+Columns:
+
+- `injected` — total txs the load generator has called `InjectTx` for
+- `inj/s` — injection rate over the last interval
+- `exec/s` — txs included in produced blocks (rate)
+- `blocks/s` — block production rate
+- `committed_h` — last block height confirmed by DA (0 until first
+  Upload settles)
+- `txs/blk` — running average over all blocks
+- `blob_bytes` — last block's data size in bytes
+- `pending` — `evnode_da_submitter_pending_blobs` gauge
+- `drops` — txs the load generator could not enqueue because the
+  in-mem mempool channel was full (this is the backpressure signal)
+
+At the end:
+
+```
+============================================================
+                BASELINE SUMMARY
+============================================================
+Duration:               2m0s
+Injected:               XXX (avg N tx/s, peak N tx/s)
+Dropped (mempool full): XXX
+Mempool high-water:     XXX
+Blocks produced:        XXX (committed_h=YYY)
+Txs executed:           XXX (avg N tx/s, peak N tx/s, T tx/blk)
+============================================================
+```
+
+## Knobs worth flipping while debugging
+
+| Flag                    | Default      | Why                                               |
+|-------------------------|--------------|---------------------------------------------------|
+| `--block-time`          | `1s`         | Drop to e.g. `100ms` to expose per-block overhead |
+| `--batching-strategy`   | `immediate`  | Try `time` / `size` / `adaptive`                  |
+| `--reaper-interval`     | `100ms`      | How often the mempool drain runs                  |
+| `--max-pending`         | `0`          | Cap pending DA blobs to test backpressure         |
+| `--workers`             | `32`         | Tx-injection concurrency                          |
+| `--tx-size`             | `200`        | Bytes per tx (matches user-reported regression)   |
+| `--mempool-size`        | `1_000_000`  | Bench's bounded backpressure boundary             |
+| `--keep-home`           | `false`      | Resume from prior state (defaults to wipe)        |
+| `--log-level`           | `info`       | `debug` to see ev-node block production logs      |
+
+## ev-node Prometheus
+
+When `--prometheus=true` (default), ev-node exposes metrics at
+`http://127.0.0.1:26660/metrics`. The bench scrapes a handful of them
+for its stats line, but you can hit the endpoint directly for the full
+picture: `evnode_block_production_duration_seconds`,
+`evnode_da_submitter_failures_total`, etc.
+
+## Operational notes
+
+- **Test-backend keyring**: keys live unencrypted on disk under
+  `~/.fiber-bench/keyring`. Fine for a bench account funded with a
+  small amount of utia. Don't use for anything else.
+- **The bench wipes its ev-node home (`~/.fiber-bench/node`) on every
+  run** unless `--keep-home` is passed. Block-signing key, store, and
+  any in-flight pending blocks all reset. The cosmos keyring is
+  separate and is preserved.
+- **Bridge bypass**: the bench builds the `cnfiber.Adapter` via
+  `cnfiber.FromModules` with a stub Blob module that errors on every
+  call. The aggregator-only setup never invokes Listen/Subscribe, so
+  this is safe; if the assumption breaks, you'll see a clear
+  `fiber-bench: blob module not supported` error rather than a nil
+  panic.
+- **Chain ID** is what the consensus node reports; the bench logs it
+  on startup. Pass the same value via `--chain-id` for config
+  validation; mismatch is logged but tx submission proceeds against
+  the chain's actual ID.
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/escrow.go b/tools/celestia-node-fiber/cmd/fiber-bench/escrow.go
new file mode 100644
index 0000000000..89e55e25c9
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/escrow.go
@@ -0,0 +1,165 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	sdkmath "cosmossdk.io/math"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/spf13/cobra"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+
+	"github.com/celestiaorg/celestia-app/v9/app"
+	"github.com/celestiaorg/celestia-app/v9/app/encoding"
+	"github.com/celestiaorg/celestia-app/v9/pkg/appconsts"
+	"github.com/celestiaorg/celestia-app/v9/pkg/user"
+	fibretypes "github.com/celestiaorg/celestia-app/v9/x/fibre/types"
+)
+
+// escrowCmd groups Fibre-escrow operations. Uploads consume utia from
+// the signer's escrow account; without a funded escrow, every Upload on
+// the bench will fail at the chain.
+func escrowCmd() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "escrow",
+		Short: "Manage Fibre escrow for the bench account",
+	}
+	cmd.AddCommand(escrowDepositCmd(), escrowQueryCmd())
+	return cmd
+}
+
+func escrowDepositCmd() *cobra.Command {
+	var (
+		consensusGRPC string
+		keyringDir    string
+		keyName       string
+		amountUtia    int64
+		gasLimit      uint64
+		feeUtia       uint64
+	)
+	cmd := &cobra.Command{
+		Use:   "deposit",
+		Short: "Deposit utia into the bench account's Fibre escrow",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			ctx, cancel := context.WithTimeout(cmd.Context(), 60*time.Second)
+			defer cancel()
+
+			kr, err := openKeyring(keyringDir)
+			if err != nil {
+				return fmt.Errorf("open keyring: %w", err)
+			}
+			rec, err := kr.Key(keyName)
+			if err != nil {
+				return fmt.Errorf("key %q not found in %s: %w", keyName, keyringDir, err)
+			}
+			addr, err := rec.GetAddress()
+			if err != nil {
+				return fmt.Errorf("get address: %w", err)
+			}
+
+			conn, err := grpc.NewClient(consensusGRPC, grpc.WithTransportCredentials(insecure.NewCredentials()))
+			if err != nil {
+				return fmt.Errorf("dial grpc: %w", err)
+			}
+			defer conn.Close()
+
+			ecfg := encoding.MakeConfig(app.ModuleEncodingRegisters...)
+			tc, err := user.SetupTxClient(ctx, kr, conn, ecfg, user.WithDefaultAccount(keyName))
+			if err != nil {
+				return fmt.Errorf("setup tx client: %w", err)
+			}
+
+			amount := sdk.NewCoin(appconsts.BondDenom, sdkmath.NewInt(amountUtia))
+			msg := &fibretypes.MsgDepositToEscrow{
+				Signer: addr.String(),
+				Amount: amount,
+			}
+			fmt.Printf("submitting MsgDepositToEscrow: signer=%s amount=%s\n", addr.String(), amount.String())
+			resp, err := tc.SubmitTx(ctx, []sdk.Msg{msg}, user.SetGasLimit(gasLimit), user.SetFee(feeUtia))
+			if err != nil {
+				return fmt.Errorf("submit tx: %w", err)
+			}
+			if resp.Code != 0 {
+				return fmt.Errorf("deposit tx failed: code=%d codespace=%s", resp.Code, resp.Codespace)
+			}
+			fmt.Printf("deposit included: height=%d txhash=%s\n", resp.Height, resp.TxHash)
+
+			// Sanity: read the escrow back so the operator sees the
+			// new balance immediately.
+			qc := fibretypes.NewQueryClient(conn)
+			res, err := qc.EscrowAccount(ctx, &fibretypes.QueryEscrowAccountRequest{Signer: addr.String()})
+			if err != nil {
+				fmt.Printf("(could not query escrow back: %v)\n", err)
+				return nil
+			}
+			if !res.Found {
+				fmt.Println("(escrow not found after deposit — chain may need another block)")
+				return nil
+			}
+			fmt.Printf("escrow balance: %s\n", res.EscrowAccount.Balance.String())
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&consensusGRPC, "consensus-grpc", "", "celestia-app gRPC address (host:port). Required.")
+	cmd.Flags().StringVar(&keyringDir, "keyring-dir", defaultKeyringDir(), "directory holding the bench keyring")
+	cmd.Flags().StringVar(&keyName, "key-name", "default", "key in the keyring to deposit from")
+	cmd.Flags().Int64Var(&amountUtia, "amount", 50_000_000, "amount in utia to deposit (default 50 TIA)")
+	cmd.Flags().Uint64Var(&gasLimit, "gas-limit", 200_000, "tx gas limit")
+	cmd.Flags().Uint64Var(&feeUtia, "fee", 5_000, "fee in utia")
+	_ = cobra.MarkFlagRequired(cmd.Flags(), "consensus-grpc")
+	return cmd
+}
+
+func escrowQueryCmd() *cobra.Command {
+	var (
+		consensusGRPC string
+		keyringDir    string
+		keyName       string
+	)
+	cmd := &cobra.Command{
+		Use:   "query",
+		Short: "Print the current Fibre escrow balance for the bench account",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			ctx, cancel := context.WithTimeout(cmd.Context(), 30*time.Second)
+			defer cancel()
+
+			kr, err := openKeyring(keyringDir)
+			if err != nil {
+				return err
+			}
+			rec, err := kr.Key(keyName)
+			if err != nil {
+				return fmt.Errorf("key %q not found: %w", keyName, err)
+			}
+			addr, err := rec.GetAddress()
+			if err != nil {
+				return err
+			}
+
+			conn, err := grpc.NewClient(consensusGRPC, grpc.WithTransportCredentials(insecure.NewCredentials()))
+			if err != nil {
+				return err
+			}
+			defer conn.Close()
+
+			qc := fibretypes.NewQueryClient(conn)
+			res, err := qc.EscrowAccount(ctx, &fibretypes.QueryEscrowAccountRequest{Signer: addr.String()})
+			if err != nil {
+				return err
+			}
+			if !res.Found {
+				fmt.Printf("address: %s\nescrow:  not found (deposit first)\n", addr.String())
+				return nil
+			}
+			fmt.Printf("address: %s\nescrow:  %s\n", addr.String(), res.EscrowAccount.Balance.String())
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&consensusGRPC, "consensus-grpc", "", "celestia-app gRPC address. Required.")
+	cmd.Flags().StringVar(&keyringDir, "keyring-dir", defaultKeyringDir(), "keyring directory")
+	cmd.Flags().StringVar(&keyName, "key-name", "default", "key name")
+	_ = cobra.MarkFlagRequired(cmd.Flags(), "consensus-grpc")
+	return cmd
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/executor.go b/tools/celestia-node-fiber/cmd/fiber-bench/executor.go
new file mode 100644
index 0000000000..1368213fe6
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/executor.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+	"context"
+	"sync/atomic"
+	"time"
+
+	coreexecution "github.com/evstack/ev-node/core/execution"
+)
+
+// inMemExecutor is a minimal core.Executor that:
+//   - accepts injected txs via a buffered channel (the "mempool")
+//   - drains them in GetTxs (non-blocking)
+//   - "executes" by counting (no state machine)
+//   - returns a constant state root, so we don't pay O(N) state-root cost on
+//     every block (which would dominate the measurement and tell us nothing
+//     about ev-node's batching/submitting performance).
+//
+// Use FilterTxs's size cap to enforce the configured per-block byte budget.
+type inMemExecutor struct {
+	txCh chan []byte
+
+	injected         atomic.Uint64
+	dropped          atomic.Uint64
+	blocksProduced   atomic.Uint64
+	totalExecutedTxs atomic.Uint64
+
+	// mempoolHigh tracks the maximum mempool depth observed (snapshot).
+	mempoolHigh atomic.Int64
+
+	// constStateRoot is what every block reports as its post-state. The
+	// measurement target is ev-node, not state computation.
+	constStateRoot []byte
+}
+
+func newInMemExecutor(mempoolSize int) *inMemExecutor {
+	return &inMemExecutor{
+		txCh:           make(chan []byte, mempoolSize),
+		constStateRoot: []byte("fiber-bench-const-state-root"),
+	}
+}
+
+// InjectTx is the bench's "mempool entry". Backpressures via channel
+// capacity: full → drop and increment counter so the operator sees it.
+func (e *inMemExecutor) InjectTx(tx []byte) bool {
+	select {
+	case e.txCh <- tx:
+		e.injected.Add(1)
+		// Loose mempool-depth high-water; not a hot-path concern.
+		if d := int64(len(e.txCh)); d > e.mempoolHigh.Load() {
+			e.mempoolHigh.Store(d)
+		}
+		return true
+	default:
+		e.dropped.Add(1)
+		return false
+	}
+}
+
+func (e *inMemExecutor) MempoolDepth() int { return len(e.txCh) }
+
+func (e *inMemExecutor) Stats() (injected, dropped, blocks, txs uint64, mempoolHigh int64) {
+	return e.injected.Load(),
+		e.dropped.Load(),
+		e.blocksProduced.Load(),
+		e.totalExecutedTxs.Load(),
+		e.mempoolHigh.Load()
+}
+
+// InitChain is called once at genesis.
+func (e *inMemExecutor) InitChain(_ context.Context, _ time.Time, _ uint64, _ string) ([]byte, error) {
+	return e.constStateRoot, nil
+}
+
+// GetTxs drains the mempool channel. Non-blocking — returns whatever is
+// currently buffered. ev-node's reaper polls this on its own cadence.
+func (e *inMemExecutor) GetTxs(ctx context.Context) ([][]byte, error) {
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	default:
+	}
+
+	n := len(e.txCh)
+	if n == 0 {
+		return nil, nil
+	}
+	txs := make([][]byte, 0, n)
+	for i := 0; i < n; i++ {
+		select {
+		case tx := <-e.txCh:
+			txs = append(txs, tx)
+		default:
+			return txs, nil
+		}
+	}
+	return txs, nil
+}
+
+// ExecuteTxs is intentionally a no-op state transition: count txs, return
+// a constant root. The whole point of this executor is to take state
+// computation out of the measurement.
+func (e *inMemExecutor) ExecuteTxs(_ context.Context, txs [][]byte, _ uint64, _ time.Time, _ []byte) ([]byte, error) {
+	e.blocksProduced.Add(1)
+	e.totalExecutedTxs.Add(uint64(len(txs)))
+	return e.constStateRoot, nil
+}
+
+func (e *inMemExecutor) SetFinal(_ context.Context, _ uint64) error { return nil }
+func (e *inMemExecutor) Rollback(_ context.Context, _ uint64) error { return nil }
+
+func (e *inMemExecutor) GetExecutionInfo(_ context.Context) (coreexecution.ExecutionInfo, error) {
+	// MaxGas=0 means "no gas-based filter"; the size cap (FilterTxs) is what
+	// bounds per-block bytes.
+	return coreexecution.ExecutionInfo{MaxGas: 0}, nil
+}
+
+// blockOverheadMargin is the safety margin we subtract from ev-node's
+// MaxBytes hint inside FilterTxs. ev-node currently caps raw tx bytes at
+// MaxBlobSize, but the actual DA blob also carries types.Data metadata,
+// signature, and protobuf framing — empirically ~80 KB at 200 B/tx,
+// 26k txs/block. Without this margin, submission of a full block hits
+// "single item exceeds DA blob size limit" and halts the node.
+//
+// This is a workaround for a real ev-node accounting bug
+// (block/internal/executing/executor.go:670 hardcodes MaxBytes =
+// MaxBlobSize without reserving room for metadata/proto). 256 KB is
+// generous and avoids the failure mode reliably.
+const blockOverheadMargin uint64 = 256 * 1024
+
+// FilterTxs enforces the configured per-block byte budget. Mirrors the
+// existing testapp KV executor's behavior: oversized txs are dropped, the
+// rest fill until the budget is hit and overflow is postponed for the
+// next block. We don't validate tx content — txs from the load generator
+// are well-formed by construction.
+func (e *inMemExecutor) FilterTxs(_ context.Context, txs [][]byte, maxBytes, _ uint64, _ bool) ([]coreexecution.FilterStatus, error) {
+	if maxBytes > blockOverheadMargin {
+		maxBytes -= blockOverheadMargin
+	}
+	out := make([]coreexecution.FilterStatus, len(txs))
+	var used uint64
+	limitReached := false
+	for i, tx := range txs {
+		size := uint64(len(tx))
+		if size == 0 {
+			out[i] = coreexecution.FilterRemove
+			continue
+		}
+		if maxBytes > 0 && size > maxBytes {
+			out[i] = coreexecution.FilterRemove
+			continue
+		}
+		if limitReached {
+			out[i] = coreexecution.FilterPostpone
+			continue
+		}
+		if maxBytes > 0 && used+size > maxBytes {
+			limitReached = true
+			out[i] = coreexecution.FilterPostpone
+			continue
+		}
+		used += size
+		out[i] = coreexecution.FilterOK
+	}
+	return out, nil
+}
+
+var _ coreexecution.Executor = (*inMemExecutor)(nil)
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/fibre.go b/tools/celestia-node-fiber/cmd/fiber-bench/fibre.go
new file mode 100644
index 0000000000..3a366d0824
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/fibre.go
@@ -0,0 +1,139 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/cosmos/cosmos-sdk/crypto/keyring"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+
+	appfibre "github.com/celestiaorg/celestia-app/v9/fibre"
+	libshare "github.com/celestiaorg/go-square/v4/share"
+
+	"github.com/celestiaorg/celestia-node/blob"
+	"github.com/celestiaorg/celestia-node/fibre"
+	blobapi "github.com/celestiaorg/celestia-node/nodebuilder/blob"
+	nodebuilderfibre "github.com/celestiaorg/celestia-node/nodebuilder/fibre"
+	"github.com/celestiaorg/celestia-node/state/txclient"
+
+	"github.com/evstack/ev-node/block"
+	cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber"
+)
+
+// buildFibreAdapter constructs a celestia-node-fiber Adapter that talks
+// directly to consensus gRPC + FSPs — no bridge node hop. We do this by
+// rebuilding only the submit-side wiring of celestia-node's api/client
+// (which is otherwise eager about dialing BridgeDAAddr in NewReadClient).
+//
+// The returned adapter only supports Upload (and Download via FSPs).
+// Listen would invoke a stub blob.Subscribe that returns an error;
+// ev-node's aggregator-only setup never calls it (no syncer, no based
+// sequencer), so this is fine.
+//
+// The returned closer releases the gRPC connection and stops the
+// underlying app-level fibre client.
+func buildFibreAdapter(
+	ctx context.Context,
+	consensusGRPC string,
+	keyName string,
+	kr keyring.Keyring,
+) (block.FiberClient, func() error, error) {
+	if consensusGRPC == "" {
+		return nil, nil, errors.New("consensus gRPC address is required")
+	}
+	if keyName == "" {
+		return nil, nil, errors.New("key name is required")
+	}
+
+	conn, err := grpc.NewClient(
+		consensusGRPC,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+	)
+	if err != nil {
+		return nil, nil, fmt.Errorf("dial consensus grpc %q: %w", consensusGRPC, err)
+	}
+
+	tc, err := txclient.NewTxClient(kr, keyName, conn)
+	if err != nil {
+		_ = conn.Close()
+		return nil, nil, fmt.Errorf("new tx client: %w", err)
+	}
+	if err := tc.Start(ctx); err != nil {
+		_ = conn.Close()
+		return nil, nil, fmt.Errorf("start tx client: %w", err)
+	}
+
+	appCfg := appfibre.DefaultClientConfig()
+	appCfg.DefaultKeyName = keyName
+	appCfg.StateAddress = conn.Target()
+	appClient, err := appfibre.NewClient(kr, appCfg)
+	if err != nil {
+		_ = tc.Stop(ctx)
+		_ = conn.Close()
+		return nil, nil, fmt.Errorf("new app fibre client: %w", err)
+	}
+	if err := appClient.Start(ctx); err != nil {
+		_ = tc.Stop(ctx)
+		_ = conn.Close()
+		return nil, nil, fmt.Errorf("start app fibre client: %w", err)
+	}
+
+	accClient := fibre.NewAccountClient(tc, conn)
+	svc := fibre.NewService(appClient, tc, accClient)
+	module := nodebuilderfibre.NewModule(svc)
+
+	adapter := cnfiber.FromModules(module, noBridgeBlob{}, 0)
+
+	closer := func() error {
+		stopCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		var errs error
+		if err := appClient.Stop(stopCtx); err != nil {
+			errs = errors.Join(errs, err)
+		}
+		if err := tc.Stop(stopCtx); err != nil {
+			errs = errors.Join(errs, err)
+		}
+		if err := conn.Close(); err != nil {
+			errs = errors.Join(errs, err)
+		}
+		return errs
+	}
+
+	return adapter, closer, nil
+}
+
+// noBridgeBlob errors on every call. The only path that would invoke it
+// is Listen→Subscribe, which our aggregator-only single-sequencer node
+// never reaches. A clear error here surfaces an assumption break instead
+// of a nil panic.
+type noBridgeBlob struct{}
+
+var _ blobapi.Module = noBridgeBlob{}
+
+var errNoBridge = errors.New("fiber-bench: blob module not supported (running without a bridge node)")
+
+func (noBridgeBlob) Submit(context.Context, []*blob.Blob, *blob.SubmitOptions) (uint64, error) {
+	return 0, errNoBridge
+}
+func (noBridgeBlob) Get(context.Context, uint64, libshare.Namespace, blob.Commitment) (*blob.Blob, error) {
+	return nil, errNoBridge
+}
+func (noBridgeBlob) GetAll(context.Context, uint64, []libshare.Namespace) ([]*blob.Blob, error) {
+	return nil, errNoBridge
+}
+func (noBridgeBlob) GetProof(context.Context, uint64, libshare.Namespace, blob.Commitment) (*blob.Proof, error) {
+	return nil, errNoBridge
+}
+func (noBridgeBlob) Included(context.Context, uint64, libshare.Namespace, *blob.Proof, blob.Commitment) (bool, error) {
+	return false, errNoBridge
+}
+func (noBridgeBlob) GetCommitmentProof(context.Context, uint64, libshare.Namespace, []byte) (*blob.CommitmentProof, error) {
+	return nil, errNoBridge
+}
+func (noBridgeBlob) Subscribe(context.Context, libshare.Namespace, uint64) (<-chan *blob.SubscriptionResponse, error) {
+	return nil, errNoBridge
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/instrumented.go b/tools/celestia-node-fiber/cmd/fiber-bench/instrumented.go
new file mode 100644
index 0000000000..3acdfea794
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/instrumented.go
@@ -0,0 +1,138 @@
+package main
+
+import (
+	"context"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/evstack/ev-node/block"
+)
+
+// instrumentedAdapter wraps a block.FiberClient and records latency
+// per Upload (and per Download) call. The bench's stats printer
+// reads percentiles from here so we can answer "is the bottleneck
+// ev-node's submitter serialization, or actual Fibre Upload time?".
+//
+// We keep the last N samples in a ring buffer rather than an
+// unbounded slice so a long run does not grow memory; N is sized for
+// a 30-minute run at peak block rate.
+type instrumentedAdapter struct {
+	inner block.FiberClient
+
+	uploadCount     atomic.Uint64
+	uploadFailures  atomic.Uint64
+	uploadBytesSent atomic.Uint64
+
+	mu      sync.Mutex
+	samples []time.Duration // ring buffer of recent durations
+	idx     int             // next slot to write
+	full    bool            // ring buffer has wrapped at least once
+}
+
+const uploadSampleCapacity = 4096
+
+func newInstrumentedAdapter(inner block.FiberClient) *instrumentedAdapter {
+	return &instrumentedAdapter{
+		inner:   inner,
+		samples: make([]time.Duration, uploadSampleCapacity),
+	}
+}
+
+func (a *instrumentedAdapter) Upload(ctx context.Context, namespace []byte, data []byte) (block.FiberUploadResult, error) {
+	start := time.Now()
+	res, err := a.inner.Upload(ctx, namespace, data)
+	elapsed := time.Since(start)
+
+	a.uploadCount.Add(1)
+	if err != nil {
+		a.uploadFailures.Add(1)
+	} else {
+		a.uploadBytesSent.Add(uint64(len(data)))
+	}
+
+	a.mu.Lock()
+	a.samples[a.idx] = elapsed
+	a.idx = (a.idx + 1) % len(a.samples)
+	if a.idx == 0 {
+		a.full = true
+	}
+	a.mu.Unlock()
+
+	return res, err
+}
+
+func (a *instrumentedAdapter) Download(ctx context.Context, blobID block.FiberBlobID) ([]byte, error) {
+	return a.inner.Download(ctx, blobID)
+}
+
+func (a *instrumentedAdapter) Listen(ctx context.Context, namespace []byte, fromHeight uint64) (<-chan block.FiberBlobEvent, error) {
+	return a.inner.Listen(ctx, namespace, fromHeight)
+}
+
+// uploadStats returns snapshot p50, p99, mean of recent Upload
+// durations plus cumulative counters. Returns zero durations when
+// no samples have been recorded yet.
+type uploadStats struct {
+	Count    uint64
+	Failures uint64
+	BytesOK  uint64
+	P50      time.Duration
+	P99      time.Duration
+	Mean     time.Duration
+	Max      time.Duration
+}
+
+func (a *instrumentedAdapter) uploadStats() uploadStats {
+	a.mu.Lock()
+	var n int
+	if a.full {
+		n = len(a.samples)
+	} else {
+		n = a.idx
+	}
+	if n == 0 {
+		a.mu.Unlock()
+		return uploadStats{
+			Count:    a.uploadCount.Load(),
+			Failures: a.uploadFailures.Load(),
+			BytesOK:  a.uploadBytesSent.Load(),
+		}
+	}
+	// Copy under lock so we can sort outside it.
+	cp := make([]time.Duration, n)
+	copy(cp, a.samples[:n])
+	a.mu.Unlock()
+
+	sort.Slice(cp, func(i, j int) bool { return cp[i] < cp[j] })
+
+	var sum time.Duration
+	for _, d := range cp {
+		sum += d
+	}
+
+	pct := func(p float64) time.Duration {
+		idx := int(float64(n-1) * p)
+		if idx < 0 {
+			idx = 0
+		}
+		if idx >= n {
+			idx = n - 1
+		}
+		return cp[idx]
+	}
+
+	return uploadStats{
+		Count:    a.uploadCount.Load(),
+		Failures: a.uploadFailures.Load(),
+		BytesOK:  a.uploadBytesSent.Load(),
+		P50:      pct(0.50),
+		P99:      pct(0.99),
+		Mean:     sum / time.Duration(n),
+		Max:      cp[n-1],
+	}
+}
+
+// Compile-time guard: must satisfy the same interface ev-node consumes.
+var _ block.FiberClient = (*instrumentedAdapter)(nil)
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/keys.go b/tools/celestia-node-fiber/cmd/fiber-bench/keys.go
new file mode 100644
index 0000000000..7b1e83ce93
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/keys.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/cosmos/cosmos-sdk/client"
+	"github.com/cosmos/cosmos-sdk/codec"
+	"github.com/cosmos/cosmos-sdk/codec/types"
+	"github.com/cosmos/cosmos-sdk/crypto/hd"
+	"github.com/cosmos/cosmos-sdk/crypto/keyring"
+	cryptocodec "github.com/cosmos/cosmos-sdk/crypto/codec"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/spf13/cobra"
+)
+
+// openKeyring opens (or creates if missing) a test-backend keyring at
+// keyringDir. The "test" backend is unencrypted on disk — fine for a
+// bench account, not fine for anything mainnet.
+func openKeyring(keyringDir string) (keyring.Keyring, error) {
+	interfaceRegistry := types.NewInterfaceRegistry()
+	cryptocodec.RegisterInterfaces(interfaceRegistry)
+	cdc := codec.NewProtoCodec(interfaceRegistry)
+	return keyring.New(
+		"fiber-bench",
+		keyring.BackendTest,
+		keyringDir,
+		nil,
+		cdc,
+	)
+}
+
+func keysCmd() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "keys",
+		Short: "Manage the cosmos keyring used to sign Fibre payment promises",
+	}
+	cmd.AddCommand(keysAddCmd(), keysShowCmd(), keysListCmd())
+	return cmd
+}
+
+func keysAddCmd() *cobra.Command {
+	var keyringDir string
+	cmd := &cobra.Command{
+		Use:   "add <name>",
+		Short: "Create a new key in the bench keyring (test backend, unencrypted)",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			name := args[0]
+			kr, err := openKeyring(keyringDir)
+			if err != nil {
+				return fmt.Errorf("open keyring: %w", err)
+			}
+
+			if rec, _ := kr.Key(name); rec != nil {
+				return fmt.Errorf("key %q already exists in keyring %s", name, keyringDir)
+			}
+
+			rec, mnemonic, err := kr.NewMnemonic(
+				name,
+				keyring.English,
+				sdk.FullFundraiserPath,
+				keyring.DefaultBIP39Passphrase,
+				hd.Secp256k1,
+			)
+			if err != nil {
+				return fmt.Errorf("create key: %w", err)
+			}
+
+			addr, err := rec.GetAddress()
+			if err != nil {
+				return fmt.Errorf("get address: %w", err)
+			}
+
+			fmt.Printf("name:    %s\n", name)
+			fmt.Printf("address: %s\n", addr.String())
+			fmt.Printf("keyring: %s (backend=test)\n", keyringDir)
+			fmt.Printf("\nmnemonic (back this up — printed once, never stored elsewhere):\n%s\n", mnemonic)
+			fmt.Printf("\nNext steps:\n")
+			fmt.Printf("  1. Top up the address above with utia on the chain.\n")
+			fmt.Printf("  2. Deposit into the Fibre escrow with celestia-appd or your tooling, e.g.\n")
+			fmt.Printf("     celestia-appd tx fibre deposit-escrow <utia-amount> --from %s --keyring-backend test --keyring-dir %s --chain-id <chain-id> --node tcp://<rpc>\n", name, keyringDir)
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&keyringDir, "keyring-dir", defaultKeyringDir(), "directory to store keyring files (test backend)")
+	return cmd
+}
+
+func keysShowCmd() *cobra.Command {
+	var keyringDir string
+	cmd := &cobra.Command{
+		Use:   "show <name>",
+		Short: "Print the address of an existing key",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			kr, err := openKeyring(keyringDir)
+			if err != nil {
+				return fmt.Errorf("open keyring: %w", err)
+			}
+			rec, err := kr.Key(args[0])
+			if err != nil {
+				return fmt.Errorf("get key: %w", err)
+			}
+			addr, err := rec.GetAddress()
+			if err != nil {
+				return fmt.Errorf("get address: %w", err)
+			}
+			fmt.Printf("name:    %s\n", args[0])
+			fmt.Printf("address: %s\n", addr.String())
+			fmt.Printf("keyring: %s (backend=test)\n", keyringDir)
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&keyringDir, "keyring-dir", defaultKeyringDir(), "directory holding keyring files (test backend)")
+	return cmd
+}
+
+func keysListCmd() *cobra.Command {
+	var keyringDir string
+	cmd := &cobra.Command{
+		Use:   "list",
+		Short: "List all keys in the bench keyring",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			kr, err := openKeyring(keyringDir)
+			if err != nil {
+				return fmt.Errorf("open keyring: %w", err)
+			}
+			records, err := kr.List()
+			if err != nil {
+				return fmt.Errorf("list keys: %w", err)
+			}
+			if len(records) == 0 {
+				fmt.Printf("(empty — keyring at %s)\n", keyringDir)
+				return nil
+			}
+			for _, rec := range records {
+				addr, err := rec.GetAddress()
+				if err != nil {
+					return err
+				}
+				fmt.Printf("%-20s %s\n", rec.Name, addr.String())
+			}
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&keyringDir, "keyring-dir", defaultKeyringDir(), "directory holding keyring files (test backend)")
+	return cmd
+}
+
+// silenceUnusedClient keeps the SDK client package referenced even if a
+// future refactor stops using it directly — convenient when wiring a
+// proper send/escrow command.
+var _ = client.Context{}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/loader.go b/tools/celestia-node-fiber/cmd/fiber-bench/loader.go
new file mode 100644
index 0000000000..df03a8ef26
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/loader.go
@@ -0,0 +1,82 @@
+package main
+
+import (
+	"context"
+	"encoding/binary"
+	"sync"
+	"sync/atomic"
+)
+
+// loader pumps fixed-size payloads into the in-mem executor as fast as it
+// can. Backpressure comes from the executor's bounded mempool channel:
+// when full, InjectTx returns false and we count it as dropped.
+//
+// Each payload is `txSize` bytes: a tx-id (uint64) prefix + zero filler.
+// Non-deterministic content isn't important — ev-node hashes them for
+// the seen-tx cache, so any unique-per-tx prefix is enough to avoid
+// dedup hits.
+type loader struct {
+	exec    *inMemExecutor
+	workers int
+	txSize  int
+
+	// counter monotonically increments per generated tx so the
+	// SHA-256-based seen cache never falsely dedups.
+	counter atomic.Uint64
+}
+
+func newLoader(exec *inMemExecutor, workers, txSize int) *loader {
+	if workers < 1 {
+		workers = 1
+	}
+	if txSize < 8 {
+		txSize = 8
+	}
+	return &loader{
+		exec:    exec,
+		workers: workers,
+		txSize:  txSize,
+	}
+}
+
+// run blocks until ctx is done. Each worker spins on InjectTx — when
+// full, it briefly yields. We don't sleep-back-off because the entire
+// point of the bench is to keep the executor's mempool pressed against
+// its bound.
+func (l *loader) run(ctx context.Context) {
+	var wg sync.WaitGroup
+	for i := 0; i < l.workers; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			buf := make([]byte, l.txSize)
+			for {
+				if ctx.Err() != nil {
+					return
+				}
+				id := l.counter.Add(1)
+				binary.BigEndian.PutUint64(buf, id)
+				// Copy on each Inject — the executor's mempool is a
+				// channel of []byte, and the consumer keeps a
+				// reference. Reusing the same buffer would corrupt
+				// in-flight items.
+				tx := make([]byte, l.txSize)
+				copy(tx, buf)
+				if !l.exec.InjectTx(tx) {
+					// Mempool full — yield and retry. ev-node's
+					// reaper will drain it on its scrape interval.
+					select {
+					case <-ctx.Done():
+						return
+					default:
+						// Tight retry: Gosched is enough to let the
+						// reaper goroutine make progress without us
+						// burning a syscall.
+						runtimeYield()
+					}
+				}
+			}
+		}()
+	}
+	wg.Wait()
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/main.go b/tools/celestia-node-fiber/cmd/fiber-bench/main.go
new file mode 100644
index 0000000000..0678cb8e42
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/main.go
@@ -0,0 +1,44 @@
+// Package main is the fiber-bench tool: a single-sequencer ev-node wired
+// to a remote Fibre network for throughput measurement.
+//
+// It deliberately runs in the simplest possible configuration:
+//
+//   - Solo sequencer (no based / no forced inclusion)
+//   - Aggregator-only (no syncer, no P2P)
+//   - In-memory executor with constant state root (no state computation
+//     cost in the measurement)
+//   - Bridge-bypass Fibre adapter (Upload directly via consensus gRPC + FSPs)
+//
+// The intent is a fail-fast baseline so we can isolate ev-node's batching
+// + DA-submit pipeline from everything else.
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+
+	// Pull celestia-app params for its init() that sets the global SDK
+	// bech32 prefix to "celestia" — must run before any keyring operation
+	// that prints addresses.
+	_ "github.com/celestiaorg/celestia-app/v9/app/params"
+)
+
+func main() {
+	root := &cobra.Command{
+		Use:   "fiber-bench",
+		Short: "Single-sequencer ev-node throughput bench against a remote Fibre network",
+	}
+
+	root.AddCommand(
+		keysCmd(),
+		escrowCmd(),
+		runCmd(),
+	)
+
+	if err := root.Execute(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh b/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
new file mode 100755
index 0000000000..6f8ecb80ef
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# run-bench.sh — convenience wrapper around `fiber-bench` for the
+# common case: build the binary if missing, ensure a key exists,
+# print the address, then start a run.
+#
+# Usage:
+#   CONSENSUS_GRPC=139.59.229.101:9091 \
+#   CHAIN_ID=talis-evnode \
+#     ./run-bench.sh [duration] [workers]
+#
+# All optional flags pass through via FIBER_BENCH_ARGS.
+set -euo pipefail
+
+cd "$(dirname "$0")/../../.."
+
+CONSENSUS_GRPC="${CONSENSUS_GRPC:-}"
+CHAIN_ID="${CHAIN_ID:-}"
+KEYRING_DIR="${KEYRING_DIR:-$HOME/.fiber-bench/keyring}"
+KEY_NAME="${KEY_NAME:-bench}"
+DURATION="${1:-${DURATION:-2m}}"
+WORKERS="${2:-${WORKERS:-32}}"
+TX_SIZE="${TX_SIZE:-200}"
+BLOCK_TIME="${BLOCK_TIME:-1s}"
+BATCHING="${BATCHING:-immediate}"
+HOME_DIR="${HOME_DIR:-$HOME/.fiber-bench/node}"
+
+if [[ -z "$CONSENSUS_GRPC" || -z "$CHAIN_ID" ]]; then
+    echo "ERROR: CONSENSUS_GRPC and CHAIN_ID must be set" >&2
+    echo "  example: CONSENSUS_GRPC=host:9091 CHAIN_ID=talis-evnode $0" >&2
+    exit 1
+fi
+
+BIN="$(pwd)/bin/fiber-bench"
+mkdir -p "$(dirname "$BIN")"
+
+if [[ ! -x "$BIN" || -n "${REBUILD:-}" ]]; then
+    echo "==> building fiber-bench (-tags fibre)"
+    go build -tags fibre -o "$BIN" ./cmd/fiber-bench/
+fi
+
+# Create the bench key if missing — idempotent: `keys add` errors if the
+# key exists, so we only run it on a fresh keyring.
+if ! "$BIN" keys show "$KEY_NAME" --keyring-dir "$KEYRING_DIR" >/dev/null 2>&1; then
+    echo "==> creating bench key '$KEY_NAME' at $KEYRING_DIR"
+    "$BIN" keys add "$KEY_NAME" --keyring-dir "$KEYRING_DIR"
+    echo
+    echo "Top up the address above and run:"
+    echo "  $BIN escrow deposit --consensus-grpc $CONSENSUS_GRPC \\"
+    echo "      --keyring-dir $KEYRING_DIR --key-name $KEY_NAME --amount 50000000"
+    echo
+    echo "Then re-run this script."
+    exit 0
+fi
+
+echo "==> bench account:"
+"$BIN" keys show "$KEY_NAME" --keyring-dir "$KEYRING_DIR"
+
+echo "==> escrow:"
+"$BIN" escrow query --consensus-grpc "$CONSENSUS_GRPC" \
+    --keyring-dir "$KEYRING_DIR" --key-name "$KEY_NAME" || true
+
+echo "==> starting bench: duration=$DURATION workers=$WORKERS tx_size=$TX_SIZE block_time=$BLOCK_TIME batching=$BATCHING"
+exec "$BIN" run \
+    --consensus-grpc "$CONSENSUS_GRPC" \
+    --chain-id "$CHAIN_ID" \
+    --keyring-dir "$KEYRING_DIR" \
+    --key-name "$KEY_NAME" \
+    --home "$HOME_DIR" \
+    --duration "$DURATION" \
+    --workers "$WORKERS" \
+    --tx-size "$TX_SIZE" \
+    --block-time "$BLOCK_TIME" \
+    --batching-strategy "$BATCHING" \
+    ${FIBER_BENCH_ARGS:-}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
new file mode 100644
index 0000000000..8b65cee42d
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -0,0 +1,349 @@
+package main
+
+import (
+	"context"
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/ipfs/go-datastore"
+	"github.com/libp2p/go-libp2p/core/crypto"
+	"github.com/rs/zerolog"
+	"github.com/spf13/cobra"
+
+	"github.com/evstack/ev-node/block"
+	evconfig "github.com/evstack/ev-node/pkg/config"
+	"github.com/evstack/ev-node/node"
+	"github.com/evstack/ev-node/pkg/genesis"
+	"github.com/evstack/ev-node/pkg/p2p"
+	"github.com/evstack/ev-node/pkg/p2p/key"
+	"github.com/evstack/ev-node/pkg/sequencers/solo"
+	pkgsigner "github.com/evstack/ev-node/pkg/signer"
+	"github.com/evstack/ev-node/pkg/signer/file"
+	"github.com/evstack/ev-node/pkg/store"
+)
+
+type runFlags struct {
+	// Fibre
+	consensusGRPC string
+	chainID       string
+	keyringDir    string
+	keyName       string
+	headerNS      string
+	dataNS        string
+
+	// ev-node tuning
+	blockTime        time.Duration
+	daBlockTime      time.Duration
+	batchingStrategy string
+	scrapeInterval   time.Duration
+	maxPending       uint64
+	signerPassphrase string
+
+	// Bench
+	homeDir       string
+	keepHome      bool
+	duration      time.Duration
+	workers       int
+	txSize        int
+	mempoolSize   int
+	statsInterval time.Duration
+
+	// Observability
+	prometheus     bool
+	prometheusAddr string
+	logLevel       string
+}
+
+func runCmd() *cobra.Command {
+	f := runFlags{}
+	cmd := &cobra.Command{
+		Use:   "run",
+		Short: "Run the bench: start a single-sequencer ev-node against a Fibre network and pump load",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return runBench(cmd.Context(), f)
+		},
+	}
+
+	flags := cmd.Flags()
+
+	flags.StringVar(&f.consensusGRPC, "consensus-grpc", "", "celestia-app gRPC address (host:port). Required.")
+	flags.StringVar(&f.chainID, "chain-id", "", "celestia-app consensus chain ID. Required.")
+	flags.StringVar(&f.keyringDir, "keyring-dir", defaultKeyringDir(), "directory holding the bench cosmos keyring (test backend)")
+	flags.StringVar(&f.keyName, "key-name", "default", "name of the key in the keyring used to sign Fibre payment promises")
+	flags.StringVar(&f.headerNS, "header-namespace", "fb-bench-h", "namespace string for ev-node block headers (10 bytes after hashing)")
+	flags.StringVar(&f.dataNS, "data-namespace", "fb-bench-d", "namespace string for ev-node block data")
+
+	flags.DurationVar(&f.blockTime, "block-time", time.Second, "ev-node block production interval")
+	flags.DurationVar(&f.daBlockTime, "da-block-time", time.Second, "DA layer block time hint (controls submitter cadence)")
+	flags.StringVar(&f.batchingStrategy, "batching-strategy", "immediate", "ev-node DA batching strategy: immediate|size|time|adaptive")
+	flags.DurationVar(&f.scrapeInterval, "reaper-interval", 100*time.Millisecond, "how often the reaper drains the mempool")
+	flags.Uint64Var(&f.maxPending, "max-pending", 0, "max pending headers/data before block production pauses (0 = unlimited)")
+	flags.StringVar(&f.signerPassphrase, "signer-passphrase", "fiber-bench-passphrase", "passphrase for the ev-node file signer (block-signing key, NOT the cosmos one)")
+
+	flags.StringVar(&f.homeDir, "home", defaultNodeHome(), "ev-node home directory (signer, store)")
+	flags.BoolVar(&f.keepHome, "keep-home", false, "do not wipe the ev-node home before starting (resumes prior state)")
+	flags.DurationVar(&f.duration, "duration", 60*time.Second, "how long to run the bench before stopping (0 = until SIGINT)")
+	flags.IntVar(&f.workers, "workers", 32, "number of concurrent tx-injection goroutines")
+	flags.IntVar(&f.txSize, "tx-size", 200, "size of each generated tx in bytes")
+	flags.IntVar(&f.mempoolSize, "mempool-size", 1_000_000, "size of the in-mem executor's mempool channel (backpressure boundary)")
+	flags.DurationVar(&f.statsInterval, "stats-interval", time.Second, "how often to print a stats line")
+
+	flags.BoolVar(&f.prometheus, "prometheus", true, "enable ev-node's Prometheus metrics endpoint")
+	flags.StringVar(&f.prometheusAddr, "prometheus-addr", "127.0.0.1:26660", "address for the ev-node Prometheus endpoint")
+	flags.StringVar(&f.logLevel, "log-level", "info", "ev-node log level (debug|info|warn|error)")
+
+	_ = cobra.MarkFlagRequired(flags, "consensus-grpc")
+	_ = cobra.MarkFlagRequired(flags, "chain-id")
+
+	return cmd
+}
+
+func runBench(parentCtx context.Context, f runFlags) error {
+	// Single root context for everything; SIGINT cancels.
+	ctx, cancel := signal.NotifyContext(parentCtx, os.Interrupt, syscall.SIGTERM)
+	defer cancel()
+
+	logger := setupLogger(f.logLevel)
+
+	if !f.keepHome {
+		_ = os.RemoveAll(f.homeDir)
+	}
+	if err := os.MkdirAll(f.homeDir, 0o755); err != nil {
+		return fmt.Errorf("create home %s: %w", f.homeDir, err)
+	}
+
+	// 1) Open the cosmos keyring (must already contain --key-name; we don't
+	// auto-create here so that operator-funded keys aren't accidentally
+	// regenerated when bench runs are re-launched).
+	kr, err := openKeyring(f.keyringDir)
+	if err != nil {
+		return fmt.Errorf("open keyring at %s: %w", f.keyringDir, err)
+	}
+	rec, err := kr.Key(f.keyName)
+	if err != nil {
+		return fmt.Errorf("key %q not found in keyring %s — run `fiber-bench keys add %s` first: %w",
+			f.keyName, f.keyringDir, f.keyName, err)
+	}
+	addr, err := rec.GetAddress()
+	if err != nil {
+		return fmt.Errorf("derive key address: %w", err)
+	}
+	logger.Info().Str("address", addr.String()).Str("key", f.keyName).Msg("loaded fibre signing key")
+
+	// 2) Build the bridge-bypass Fibre adapter.
+	logger.Info().Str("grpc", f.consensusGRPC).Msg("dialing consensus gRPC")
+	innerFiberClient, fiberClose, err := buildFibreAdapter(ctx, f.consensusGRPC, f.keyName, kr)
+	if err != nil {
+		return fmt.Errorf("build fibre adapter: %w", err)
+	}
+	defer func() {
+		if err := fiberClose(); err != nil {
+			logger.Warn().Err(err).Msg("fibre adapter close")
+		}
+	}()
+	// Wrap in a latency-recording proxy so the stats printer can show
+	// per-Upload p50/p99 — without this we can't tell whether the
+	// production-vs-DA-settlement gap comes from ev-node's submitter
+	// serialization (one header + one data Upload in flight at a time)
+	// or from actual Fibre Upload latency.
+	fiberClient := newInstrumentedAdapter(innerFiberClient)
+
+	// 3) Build the ev-node file signer (separate key — block signing, not
+	// fibre payments). Created in the home dir if missing.
+	signerDir := filepath.Join(f.homeDir, "signer")
+	if err := os.MkdirAll(signerDir, 0o750); err != nil {
+		return fmt.Errorf("create signer dir: %w", err)
+	}
+	signerFile := filepath.Join(signerDir, "signer.json")
+	var signer pkgsigner.Signer
+	if _, err := os.Stat(signerFile); os.IsNotExist(err) {
+		s, err := file.CreateFileSystemSigner(signerDir, []byte(f.signerPassphrase))
+		if err != nil {
+			return fmt.Errorf("create file signer: %w", err)
+		}
+		signer = s
+	} else {
+		s, err := file.LoadFileSystemSigner(signerDir, []byte(f.signerPassphrase))
+		if err != nil {
+			return fmt.Errorf("load file signer: %w", err)
+		}
+		signer = s
+	}
+	signerAddr, err := signer.GetAddress()
+	if err != nil {
+		return fmt.Errorf("signer address: %w", err)
+	}
+
+	// 4) Genesis. Single proposer = our signer.
+	gen := genesis.NewGenesis(f.chainID, 1, time.Now().UTC(), signerAddr)
+	if err := gen.Validate(); err != nil {
+		return fmt.Errorf("invalid genesis: %w", err)
+	}
+
+	// 5) ev-node config. P2P listen on a random port; ev-node disables p2p
+	// outbound when fiber is enabled, but the libp2p host is still
+	// constructed, so we still need a port.
+	cfg := evconfig.DefaultConfig()
+	cfg.RootDir = f.homeDir
+	cfg.DBPath = "data"
+	cfg.Node.Aggregator = true
+	cfg.Node.BlockTime = evconfig.DurationWrapper{Duration: f.blockTime}
+	cfg.Node.LazyMode = false
+	cfg.Node.MaxPendingHeadersAndData = f.maxPending
+	cfg.Node.ScrapeInterval = evconfig.DurationWrapper{Duration: f.scrapeInterval}
+
+	cfg.DA.BlockTime = evconfig.DurationWrapper{Duration: f.daBlockTime}
+	cfg.DA.Namespace = f.headerNS
+	cfg.DA.DataNamespace = f.dataNS
+	cfg.DA.BatchingStrategy = f.batchingStrategy
+	cfg.DA.RequestTimeout = evconfig.DurationWrapper{Duration: 60 * time.Second}
+	cfg.DA.Fiber.Enabled = true
+	cfg.DA.Fiber.ConsensusAddress = f.consensusGRPC
+	cfg.DA.Fiber.ConsensusChainID = f.chainID
+	// BridgeAddress is required by config validation when fiber enabled,
+	// but we never use it. Set a syntactically-valid placeholder.
+	cfg.DA.Fiber.BridgeAddress = "ws://127.0.0.1:0"
+	cfg.DA.Fiber.KeyName = f.keyName
+
+	cfg.P2P.ListenAddress = "/ip4/127.0.0.1/tcp/0"
+	cfg.P2P.DisableConnectionGater = true
+
+	cfg.Instrumentation.Prometheus = f.prometheus
+	cfg.Instrumentation.PrometheusListenAddr = f.prometheusAddr
+	cfg.Instrumentation.Pprof = false
+
+	cfg.RPC.Address = "127.0.0.1:0"
+	cfg.Log.Level = f.logLevel
+	cfg.Signer.SignerType = "file"
+	cfg.Signer.SignerPath = signerDir
+
+	// Validate fiber config the way ev-node would.
+	if err := cfg.DA.Fiber.Validate(); err != nil {
+		return fmt.Errorf("fiber config: %w", err)
+	}
+
+	// 6) Datastore for ev-node's internal state.
+	ds, err := store.NewDefaultKVStore(f.homeDir, cfg.DBPath, "fiber-bench")
+	if err != nil {
+		return fmt.Errorf("open datastore: %w", err)
+	}
+
+	// 7) Executor + sequencer.
+	exec := newInMemExecutor(f.mempoolSize)
+	seq := solo.NewSoloSequencer(logger, []byte(gen.ChainID), exec)
+
+	// 8) DA client wraps our adapter as the FullDAClient ev-node expects.
+	daClient := block.NewFiberDAClient(fiberClient, cfg, logger, gen.DAStartHeight)
+
+	// 9) p2p client (required by NewNode signature; outbound is disabled
+	// internally when fiber is enabled).
+	nodePrivKey, _, err := crypto.GenerateEd25519Key(rand.Reader)
+	if err != nil {
+		return fmt.Errorf("generate node key: %w", err)
+	}
+	nodeKey := &key.NodeKey{PrivKey: nodePrivKey}
+	p2pClient, err := p2p.NewClient(cfg.P2P, nodeKey.PrivKey, datastore.NewMapDatastore(), gen.ChainID, logger, nil)
+	if err != nil {
+		return fmt.Errorf("create p2p client: %w", err)
+	}
+
+	// 10) Build the node.
+	rollnode, err := node.NewNode(
+		cfg,
+		exec,
+		seq,
+		daClient,
+		signer,
+		p2pClient,
+		gen,
+		ds,
+		node.DefaultMetricsProvider(cfg.Instrumentation),
+		logger,
+		node.NodeOptions{},
+	)
+	if err != nil {
+		return fmt.Errorf("create node: %w", err)
+	}
+
+	// 11) Start the node.
+	nodeErrCh := make(chan error, 1)
+	var nodeWg sync.WaitGroup
+	nodeWg.Add(1)
+	go func() {
+		defer nodeWg.Done()
+		defer func() {
+			if r := recover(); r != nil {
+				nodeErrCh <- fmt.Errorf("node panicked: %v", r)
+			}
+		}()
+		nodeErrCh <- rollnode.Run(ctx)
+	}()
+
+	// 12) Start the load generator.
+	loaderWg := sync.WaitGroup{}
+	loaderWg.Add(1)
+	go func() {
+		defer loaderWg.Done()
+		newLoader(exec, f.workers, f.txSize).run(ctx)
+	}()
+
+	// 13) Stats printer + duration timer.
+	logger.Info().
+		Dur("duration", f.duration).
+		Int("workers", f.workers).
+		Int("tx_size", f.txSize).
+		Int("mempool", f.mempoolSize).
+		Dur("block_time", f.blockTime).
+		Str("batching", f.batchingStrategy).
+		Msg("bench started")
+
+	printer := newStatsPrinter(exec, f.prometheusAddr, f.txSize, fiberClient)
+	printer.start(ctx, f.statsInterval)
+
+	if f.duration > 0 {
+		select {
+		case <-time.After(f.duration):
+			logger.Info().Msg("duration elapsed, stopping")
+		case err := <-nodeErrCh:
+			if err != nil && !errors.Is(err, context.Canceled) {
+				logger.Error().Err(err).Msg("node exited unexpectedly")
+				cancel()
+				return err
+			}
+		case <-ctx.Done():
+		}
+	} else {
+		select {
+		case err := <-nodeErrCh:
+			if err != nil && !errors.Is(err, context.Canceled) {
+				logger.Error().Err(err).Msg("node exited unexpectedly")
+				cancel()
+				return err
+			}
+		case <-ctx.Done():
+		}
+	}
+
+	cancel()
+	loaderWg.Wait()
+	nodeWg.Wait()
+	printer.printFinalSummary()
+	return nil
+}
+
+func setupLogger(level string) zerolog.Logger {
+	lvl, err := zerolog.ParseLevel(level)
+	if err != nil {
+		lvl = zerolog.InfoLevel
+	}
+	zerolog.SetGlobalLevel(lvl)
+	return zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}).
+		With().Timestamp().Str("component", "fiber-bench").Logger()
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/stats.go b/tools/celestia-node-fiber/cmd/fiber-bench/stats.go
new file mode 100644
index 0000000000..f45fb91c06
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/stats.go
@@ -0,0 +1,380 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+// statsPrinter periodically prints a one-line summary combining counters
+// from the in-mem executor and selected Prometheus metrics scraped from
+// ev-node's instrumentation endpoint.
+//
+// Why scrape Prometheus instead of reaching into ev-node? Because the
+// metrics ev-node already exports give us the answers we want
+// (committed height, txs-per-block, pending blobs, block-production
+// duration histogram) and scraping is zero source diff. It also makes
+// the same numbers available to a real Prometheus once we move past the
+// fail-fast baseline.
+type statsPrinter struct {
+	exec       *inMemExecutor
+	promURL    string
+	httpClient *http.Client
+	txSize     int
+	adapter    *instrumentedAdapter
+
+	mu         sync.Mutex
+	startedAt  time.Time
+	lastTick   time.Time
+	lastInject uint64
+	lastTxs    float64
+	lastBlocks float64
+	lastDaInc  float64
+	peakInjRPS float64
+	peakTxRPS  float64
+	peakDaRPS  float64
+
+	// lastSnapshot caches the last successful Prometheus scrape so
+	// the final summary still has values after the node has shut
+	// down (its /metrics endpoint goes away with it).
+	lastSnapshot map[string]float64
+}
+
+func newStatsPrinter(exec *inMemExecutor, promListenAddr string, txSize int, adapter *instrumentedAdapter) *statsPrinter {
+	url := ""
+	if promListenAddr != "" {
+		// PrometheusListenAddr can be ":26660" or "127.0.0.1:26660";
+		// normalize to a fetchable URL.
+		host := promListenAddr
+		if strings.HasPrefix(host, ":") {
+			host = "127.0.0.1" + host
+		}
+		url = "http://" + host + "/metrics"
+	}
+	return &statsPrinter{
+		exec:       exec,
+		promURL:    url,
+		httpClient: &http.Client{Timeout: 500 * time.Millisecond},
+		txSize:     txSize,
+		adapter:    adapter,
+	}
+}
+
+// start prints a header then ticks every interval until ctx is done.
+func (p *statsPrinter) start(ctx context.Context, interval time.Duration) {
+	if interval <= 0 {
+		interval = time.Second
+	}
+	now := time.Now()
+	p.mu.Lock()
+	p.startedAt = now
+	p.lastTick = now
+	p.mu.Unlock()
+
+	fmt.Println()
+	// Each rate column shows "<tx/s> / <MB/s>" so tps and bandwidth
+	// land side by side without doubling the column count. The blob
+	// size at the latest block stays as an absolute (blob_KB) since
+	// it's a level, not a rate.
+	fmt.Printf("%-9s %-15s %-15s %-15s %-7s %-9s %-7s %-8s %-7s %-10s %s\n",
+		"elapsed", "inj tps/MBs", "exec tps/MBs", "da tps/MBs",
+		"prod_h", "da_inc_h", "txs/blk", "blob_KB", "pending", "drops", "upload latency")
+	fmt.Println(strings.Repeat("-", 140))
+
+	go func() {
+		ticker := time.NewTicker(interval)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				p.tick()
+			}
+		}
+	}()
+}
+
+func (p *statsPrinter) tick() {
+	now := time.Now()
+
+	injected, dropped, blocks, txs, _ := p.exec.Stats()
+	mempool := p.exec.MempoolDepth()
+
+	prom := p.scrapePrometheus()
+	if len(prom) > 0 {
+		p.mu.Lock()
+		p.lastSnapshot = prom
+		p.mu.Unlock()
+	}
+	// ev-node prefixes its metrics with the namespace from the metrics
+	// provider — for the aggregator path this is "evnode_sequencer".
+	producedHeight := prom["evnode_sequencer_height"]
+	daInclusionHeight := prom["evnode_sequencer_da_inclusion_height"]
+	totalTxs := prom["evnode_sequencer_total_txs"]
+	if totalTxs == 0 {
+		totalTxs = float64(txs)
+	}
+	blockBytes := prom["evnode_sequencer_block_size_bytes"]
+	pending := prom["evnode_sequencer_da_submitter_pending_blobs"]
+	blocksGauge := float64(blocks)
+	if producedHeight > blocksGauge {
+		blocksGauge = producedHeight
+	}
+	txsPerBlock := txsPerBlockMetric(blocksGauge, totalTxs)
+
+	p.mu.Lock()
+	dt := now.Sub(p.lastTick).Seconds()
+	if dt < 0.001 {
+		p.mu.Unlock()
+		return
+	}
+	injRPS := float64(injected-p.lastInject) / dt
+	txRPS := (totalTxs - p.lastTxs) / dt
+	daSettledRPS := (daInclusionHeight - p.lastDaInc) * txsPerBlock / dt
+	if injRPS > p.peakInjRPS {
+		p.peakInjRPS = injRPS
+	}
+	if txRPS > p.peakTxRPS {
+		p.peakTxRPS = txRPS
+	}
+	if daSettledRPS > p.peakDaRPS {
+		p.peakDaRPS = daSettledRPS
+	}
+	elapsed := now.Sub(p.startedAt).Truncate(time.Millisecond)
+	p.lastTick = now
+	p.lastInject = injected
+	p.lastTxs = totalTxs
+	p.lastBlocks = blocksGauge
+	p.lastDaInc = daInclusionHeight
+	p.mu.Unlock()
+
+	txSizeBytes := float64(p.txSize)
+
+	upStats := p.adapter.uploadStats()
+
+	fmt.Printf("%-9s %-15s %-15s %-15s %-7.0f %-9.0f %-7.0f %-8.0f %-7.0f %-10d %s\n",
+		elapsed.String(),
+		formatRate(injRPS, txSizeBytes),
+		formatRate(txRPS, txSizeBytes),
+		formatRate(daSettledRPS, txSizeBytes),
+		producedHeight, daInclusionHeight, txsPerBlock, blockBytes/1024, pending, dropped,
+		formatUploadLatency(upStats),
+	)
+
+	_ = mempool // currently we report drops, not depth — the mempool is large enough that depth isn't the meaningful signal
+}
+
+// formatUploadLatency renders Upload latency stats as a compact suffix
+// for the live table. Returns "-" if no samples yet.
+func formatUploadLatency(s uploadStats) string {
+	if s.Count == 0 {
+		return "upload[-]"
+	}
+	failPart := ""
+	if s.Failures > 0 {
+		failPart = fmt.Sprintf(",fails=%d", s.Failures)
+	}
+	return fmt.Sprintf("upload[n=%d p50=%v p99=%v%s]",
+		s.Count, s.P50.Truncate(time.Millisecond), s.P99.Truncate(time.Millisecond), failPart)
+}
+
+// formatRate renders "<tps> / <MB/s>" compactly, rounding to whole MB/s
+// since sub-MB/s precision isn't useful at our throughput levels and a
+// short string keeps the table aligned.
+func formatRate(rps, txSizeBytes float64) string {
+	mbps := rps * txSizeBytes / (1024 * 1024)
+	switch {
+	case rps >= 1_000_000:
+		return fmt.Sprintf("%.1fM/%.0fMB", rps/1_000_000, mbps)
+	case rps >= 1_000:
+		return fmt.Sprintf("%.0fk/%.0fMB", rps/1_000, mbps)
+	default:
+		return fmt.Sprintf("%.0f/%.1fMB", rps, mbps)
+	}
+}
+
+// txsPerBlockMetric computes the running mean tx/blk over all produced
+// blocks. Only meaningful once at least one block has been produced;
+// returns 0 otherwise.
+func txsPerBlockMetric(blocks, totalTxs float64) float64 {
+	if blocks <= 0 {
+		return 0
+	}
+	return totalTxs / blocks
+}
+
+// scrapePrometheus pulls the ev-node /metrics endpoint and parses just
+// the gauges/counters we care about. Best effort: returns empty map on
+// any error so the bench keeps running even if metrics aren't ready yet.
+func (p *statsPrinter) scrapePrometheus() map[string]float64 {
+	out := map[string]float64{}
+	if p.promURL == "" {
+		return out
+	}
+	resp, err := p.httpClient.Get(p.promURL)
+	if err != nil {
+		return out
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		_, _ = io.Copy(io.Discard, resp.Body)
+		return out
+	}
+
+	wanted := map[string]struct{}{
+		"evnode_sequencer_height":                     {},
+		"evnode_sequencer_latest_block_height":        {},
+		"evnode_sequencer_da_inclusion_height":        {},
+		"evnode_sequencer_total_txs":                  {},
+		"evnode_sequencer_num_txs":                    {},
+		"evnode_sequencer_block_size_bytes":           {},
+		"evnode_sequencer_da_submitter_pending_blobs": {},
+	}
+
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		// "metric_name{labels...} value [timestamp]" — strip labels and
+		// trailing timestamp; we don't use them.
+		nameEnd := strings.IndexAny(line, "{ ")
+		if nameEnd < 0 {
+			continue
+		}
+		name := line[:nameEnd]
+		if _, ok := wanted[name]; !ok {
+			continue
+		}
+		// Skip past labels if present.
+		rest := line[nameEnd:]
+		if rest[0] == '{' {
+			closeIdx := strings.Index(rest, "}")
+			if closeIdx < 0 {
+				continue
+			}
+			rest = rest[closeIdx+1:]
+		}
+		rest = strings.TrimSpace(rest)
+		valEnd := strings.IndexByte(rest, ' ')
+		valStr := rest
+		if valEnd >= 0 {
+			valStr = rest[:valEnd]
+		}
+		v, err := strconv.ParseFloat(valStr, 64)
+		if err != nil {
+			continue
+		}
+		out[name] = v
+	}
+	return out
+}
+
+func (p *statsPrinter) printFinalSummary() {
+	injected, dropped, blocks, txs, mempoolHigh := p.exec.Stats()
+	// Prefer a fresh scrape, but fall back to the last live snapshot:
+	// the node's /metrics endpoint goes away as it shuts down, so a
+	// post-stop scrape returns an empty map and the summary would
+	// otherwise print zeros.
+	prom := p.scrapePrometheus()
+	p.mu.Lock()
+	if len(prom) == 0 && p.lastSnapshot != nil {
+		prom = p.lastSnapshot
+	}
+	p.mu.Unlock()
+	producedHeight := uint64(prom["evnode_sequencer_height"])
+	daInclusionHeight := uint64(prom["evnode_sequencer_da_inclusion_height"])
+	totalTxs := uint64(prom["evnode_sequencer_total_txs"])
+	if totalTxs == 0 {
+		totalTxs = txs
+	}
+
+	p.mu.Lock()
+	elapsed := time.Since(p.startedAt)
+	peakInj := p.peakInjRPS
+	peakTx := p.peakTxRPS
+	p.mu.Unlock()
+
+	avgInj := 0.0
+	if elapsed.Seconds() > 0 {
+		avgInj = float64(injected) / elapsed.Seconds()
+	}
+	avgTx := 0.0
+	if elapsed.Seconds() > 0 {
+		avgTx = float64(totalTxs) / elapsed.Seconds()
+	}
+	txsPerBlock := 0.0
+	if blocks > 0 {
+		txsPerBlock = float64(totalTxs) / float64(blocks)
+	}
+	txSize := float64(p.txSize)
+	mb := func(rps float64) float64 { return rps * txSize / (1024 * 1024) }
+
+	p.mu.Lock()
+	peakDa := p.peakDaRPS
+	p.mu.Unlock()
+
+	var avgDaSettled float64
+	if daInclusionHeight > 0 && elapsed.Seconds() > 0 {
+		avgDaSettled = float64(daInclusionHeight) * txsPerBlock / elapsed.Seconds()
+	}
+
+	fmt.Println()
+	fmt.Println(strings.Repeat("=", 70))
+	fmt.Println("                       BASELINE SUMMARY")
+	fmt.Println(strings.Repeat("=", 70))
+	fmt.Printf("Duration:               %s\n", elapsed.Truncate(time.Millisecond))
+	fmt.Printf("Tx size:                %d B\n", p.txSize)
+	fmt.Println()
+	fmt.Printf("Injection:              avg %.0f tx/s (%.1f MB/s), peak %.0f tx/s (%.0f MB/s)\n",
+		avgInj, mb(avgInj), peakInj, mb(peakInj))
+	fmt.Printf("Block production:       avg %.0f tx/s (%.2f MB/s), peak %.0f tx/s (%.1f MB/s)\n",
+		avgTx, mb(avgTx), peakTx, mb(peakTx))
+	fmt.Printf("DA-settled:             avg %.0f tx/s (%.2f MB/s), peak %.0f tx/s (%.1f MB/s)\n",
+		avgDaSettled, mb(avgDaSettled), peakDa, mb(peakDa))
+	fmt.Println()
+	fmt.Printf("Blocks produced:        %d (prod_h=%d)\n", blocks, producedHeight)
+	fmt.Printf("DA-included height:     %d (lag = %d blocks behind production)\n",
+		daInclusionHeight, producedHeight-daInclusionHeight)
+	fmt.Printf("Txs into blocks:        %d (%.1f tx/blk)\n", totalTxs, txsPerBlock)
+	fmt.Printf("Dropped (mempool full): %d\n", dropped)
+	fmt.Printf("Mempool high-water:     %d\n", mempoolHigh)
+
+	upStats := p.adapter.uploadStats()
+	if upStats.Count > 0 {
+		fmt.Println()
+		fmt.Println("Fibre Upload latency (per call observed at the adapter):")
+		fmt.Printf("  count:     %d (failures: %d)\n", upStats.Count, upStats.Failures)
+		fmt.Printf("  mean:      %s\n", upStats.Mean.Truncate(time.Millisecond))
+		fmt.Printf("  p50:       %s\n", upStats.P50.Truncate(time.Millisecond))
+		fmt.Printf("  p99:       %s\n", upStats.P99.Truncate(time.Millisecond))
+		fmt.Printf("  max:       %s\n", upStats.Max.Truncate(time.Millisecond))
+		// ev-node's submitter runs ONE header-Upload goroutine and
+		// ONE data-Upload goroutine concurrently (each TryLock'd via
+		// its own mutex in submitter.go). A block settles only when
+		// BOTH its header and data Uploads have returned, and each
+		// stream submits at most 1 Upload per mean_latency seconds —
+		// so the per-stream cap is 1/mean blocks/s, and the block
+		// settlement cap (min of the two) equals it. We print this
+		// so the operator can compare it to the observed da_inc_h
+		// rate and tell apart "Fibre Upload is slow" from "ev-node
+		// is leaving capacity on the table".
+		if upStats.Mean > 0 {
+			capBlocksPerSec := 1.0 / upStats.Mean.Seconds()
+			fmt.Printf("  implied cap (1/mean per stream): %.2f blocks/s ≈ %.0f tx/s (%.2f MB/s)\n",
+				capBlocksPerSec,
+				capBlocksPerSec*txsPerBlock,
+				capBlocksPerSec*txsPerBlock*txSize/(1024*1024),
+			)
+		}
+	}
+	fmt.Println(strings.Repeat("=", 70))
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/util.go b/tools/celestia-node-fiber/cmd/fiber-bench/util.go
new file mode 100644
index 0000000000..c91354f989
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/util.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+)
+
+// defaultKeyringDir is where we put the bench's cosmos keyring by default.
+// Lives under the user's home so multiple bench runs share the same key.
+func defaultKeyringDir() string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return ".fiber-bench"
+	}
+	return filepath.Join(home, ".fiber-bench", "keyring")
+}
+
+// defaultNodeHome is the ev-node working directory (signer, store, config).
+// Cleared on each run by default — see runCmd's --keep-home flag.
+func defaultNodeHome() string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return ".fiber-bench-node"
+	}
+	return filepath.Join(home, ".fiber-bench", "node")
+}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/yield.go b/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
new file mode 100644
index 0000000000..2764090ab7
--- /dev/null
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
@@ -0,0 +1,8 @@
+package main
+
+import "runtime"
+
+// runtimeYield is a thin wrapper to keep the loader file free of stdlib
+// noise. Splitting it out makes future replacement (e.g. with a backoff
+// strategy) a one-file change.
+func runtimeYield() { runtime.Gosched() }

From 08897cd5c75bb2595b938946e86c09e12ef33fba Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 20:12:11 +0100
Subject: [PATCH 02/18] feat(common): default MaxBlobSize to Fibre's actual cap
 (128 MiB - 5 B)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 5 MB default left ~25x of Fibre's per-blob capacity unused: Fibre's
MaxBlobSize is 1 << 27 bytes (128 MiB) and the protocol's per-blob header
is 5 bytes (1 byte version + 4 byte data size, see celestia-app/v9/fibre/
blob.go::blobHeaderLen and protocol_params.go::MaxBlobSize). Anchoring
ev-node's default to the actual cap lets each block carry the full
~128 MiB of payload, multiplying settlement throughput at the same
per-Upload latency.

Also drops the bench's executor.FilterTxs overhead margin: the cap
already lives at the right level (Fibre's MaxBlobSize), and reserving
extra in the executor would just leave bandwidth on the table again. If
proto/metadata overhead pushes a marshaled block over the cap, that
should be addressed in ev-node's block producer rather than worked
around in test fixtures.

The link-time override is kept for callers that want to constrain the
default further (smaller cap → smaller blocks → lower per-Upload
latency for environments where that matters).
---
 block/internal/common/consts.go               | 11 ++++++++--
 .../cmd/fiber-bench/executor.go               | 20 ++++---------------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/block/internal/common/consts.go b/block/internal/common/consts.go
index 840b2faa97..b7b13318b8 100644
--- a/block/internal/common/consts.go
+++ b/block/internal/common/consts.go
@@ -2,11 +2,18 @@ package common
 
 import "strconv"
 
+// fibreMaxPayload is the maximum payload Fibre will accept in a single
+// blob: protocol max blob size (1 << 27 = 128 MiB) minus the 5-byte
+// Fibre blob header (1 byte version + 4 byte data size). See
+// celestia-app/v9/fibre/blob.go (blobHeaderLen) and fibre/protocol_params.go
+// (MaxBlobSize). Sized to fit a full Fibre blob and nothing more.
+const fibreMaxPayload = (1 << 27) - 5
+
 // defaultMaxBlobSizeStr holds the string representation of the default blob
 // size limit. Override at link time via:
 //
 //	go build -ldflags "-X github.com/evstack/ev-node/block/internal/common.defaultMaxBlobSizeStr=125829120"
-var defaultMaxBlobSizeStr = "5242880" // 5 MB
+var defaultMaxBlobSizeStr = strconv.FormatUint(fibreMaxPayload, 10)
 
 // DefaultMaxBlobSize is the max blob size limit used for blob submission.
 var DefaultMaxBlobSize uint64
@@ -14,7 +21,7 @@ var DefaultMaxBlobSize uint64
 func init() {
 	v, err := strconv.ParseUint(defaultMaxBlobSizeStr, 10, 64)
 	if err != nil || v == 0 {
-		DefaultMaxBlobSize = 5 * 1024 * 1024 // 5 MB fallback
+		DefaultMaxBlobSize = fibreMaxPayload
 		return
 	}
 	DefaultMaxBlobSize = v
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/executor.go b/tools/celestia-node-fiber/cmd/fiber-bench/executor.go
index 1368213fe6..088d21ffef 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/executor.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/executor.go
@@ -115,28 +115,16 @@ func (e *inMemExecutor) GetExecutionInfo(_ context.Context) (coreexecution.Execu
 	return coreexecution.ExecutionInfo{MaxGas: 0}, nil
 }
 
-// blockOverheadMargin is the safety margin we subtract from ev-node's
-// MaxBytes hint inside FilterTxs. ev-node currently caps raw tx bytes at
-// MaxBlobSize, but the actual DA blob also carries types.Data metadata,
-// signature, and protobuf framing — empirically ~80 KB at 200 B/tx,
-// 26k txs/block. Without this margin, submission of a full block hits
-// "single item exceeds DA blob size limit" and halts the node.
-//
-// This is a workaround for a real ev-node accounting bug
-// (block/internal/executing/executor.go:670 hardcodes MaxBytes =
-// MaxBlobSize without reserving room for metadata/proto). 256 KB is
-// generous and avoids the failure mode reliably.
-const blockOverheadMargin uint64 = 256 * 1024
-
 // FilterTxs enforces the configured per-block byte budget. Mirrors the
 // existing testapp KV executor's behavior: oversized txs are dropped, the
 // rest fill until the budget is hit and overflow is postponed for the
 // next block. We don't validate tx content — txs from the load generator
 // are well-formed by construction.
+//
+// We honor maxBytes as-is. Per-block proto/Metadata overhead is the
+// responsibility of the block-size cap (now anchored to Fibre's actual
+// MaxPayload in block/internal/common/consts.go), not the executor.
 func (e *inMemExecutor) FilterTxs(_ context.Context, txs [][]byte, maxBytes, _ uint64, _ bool) ([]coreexecution.FilterStatus, error) {
-	if maxBytes > blockOverheadMargin {
-		maxBytes -= blockOverheadMargin
-	}
 	out := make([]coreexecution.FilterStatus, len(txs))
 	var used uint64
 	limitReached := false

From 6ac59c728c9eb84d9c3e6c522fd3fb7ee70d586d Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 20:37:44 +0100
Subject: [PATCH 03/18] fix(block/executing): reserve proto/metadata overhead
 in RetrieveBatch's MaxBytes

The block producer was passing MaxBytes = MaxBlobSize directly to
GetNextBatch, but the marshaled types.Data (txs + Metadata + proto
framing) is larger than the sum of raw tx bytes. The per-tx proto
length-prefix is ~3 bytes, which is small in absolute terms but adds up
to 1.5% overhead at typical 200 B txs and over 1 MB of overhead at
peak block sizes (128 MiB). Without reserving this margin, a fully
packed batch builds a block that exceeds the submitter's MaxBlobSize
check and halts as 'unrecoverable: single item exceeds DA blob size
limit'.

Reserving in the block producer (rather than in FilterTxs) keeps the
executor's view of MaxBytes equal to the raw-tx budget, which is what
FilterTxs is meant to enforce.
---
 block/internal/executing/executor.go | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/block/internal/executing/executor.go b/block/internal/executing/executor.go
index 32ca4e596b..449144b681 100644
--- a/block/internal/executing/executor.go
+++ b/block/internal/executing/executor.go
@@ -663,11 +663,26 @@ func (e *Executor) ProduceBlock(ctx context.Context) error {
 	return nil
 }
 
+// blockMarshalOverhead reserves a fraction of MaxBlobSize for the proto
+// framing + Metadata overhead added when types.Data is marshaled into a
+// DA blob. Empirically the per-tx proto length-prefix runs ~3 bytes,
+// which is roughly 1.5% at 200 B txs and stays in that range across
+// realistic tx sizes; 2% gives margin for fixed Metadata fields without
+// leaving meaningful capacity unused. Reserving here (vs. inside
+// FilterTxs) keeps the executor’s view of MaxBytes equal to the raw-tx
+// budget and prevents a fully packed batch from blowing past the
+// submitter’s MaxBlobSize check.
+const blockMarshalOverheadPct = 2
+
 // RetrieveBatch gets the next batch of transactions from the sequencer.
 func (e *Executor) RetrieveBatch(ctx context.Context) (*BatchData, error) {
+	maxTxBytes := common.DefaultMaxBlobSize
+	if reserve := maxTxBytes * blockMarshalOverheadPct / 100; reserve < maxTxBytes {
+		maxTxBytes -= reserve
+	}
 	req := coresequencer.GetNextBatchRequest{
 		Id:            []byte(e.genesis.ChainID),
-		MaxBytes:      common.DefaultMaxBlobSize,
+		MaxBytes:      maxTxBytes,
 		LastBatchData: [][]byte{}, // Can be populated if needed for sequencer context
 	}
 

From ecd7f62bed33cad9050e5d3f40a0d5d3450b6ad5 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 21:03:55 +0100
Subject: [PATCH 04/18] fix(reaper,cache): make seen-tx retention link-time
 tunable to avoid OOM under load

The seen-tx cache holds a SHA-256 hash for every transaction the reaper
ever drained. With CleanupInterval = 1h and DefaultTxCacheRetention =
24h hardcoded as consts, sustained throughput causes the map to grow
linearly without ever shrinking until the GC pressure or process
memory caps the run. Observed empirically while benchmarking the Fibre
DA path: at ~1.5M tx/s the bench OOM-killed after ~80 s with ~16 GB
RSS, the cache holding ~120 M entries.

Changing both to vars driven by ldflags lets ev-node keep its
production-friendly defaults (memory-cheap dedup over a 24 h window,
swept once an hour) while letting benchmark builds opt into shorter
windows so the cache reaches a steady state. Example for the
fiber-bench tool:

  go build -ldflags "\
    -X github.com/evstack/ev-node/block/internal/cache.defaultTxCacheRetentionStr=30s \
    -X github.com/evstack/ev-node/block/internal/reaping.cleanupIntervalStr=5s"

A real fix probably reaches further (cap entry count, switch to a TTL
cache implementation, or bypass dedup when the caller already
guarantees uniqueness) but these are larger conversations; the
ldflag knob unblocks measurement in the meantime.
---
 block/internal/cache/manager.go  | 23 ++++++++++++++++++++---
 block/internal/reaping/reaper.go | 20 +++++++++++++++++++-
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/block/internal/cache/manager.go b/block/internal/cache/manager.go
index e907002600..298f5499ab 100644
--- a/block/internal/cache/manager.go
+++ b/block/internal/cache/manager.go
@@ -24,11 +24,28 @@ const (
 
 	// DataDAIncludedPrefix is the store key prefix for data DA inclusion tracking.
 	DataDAIncludedPrefix = "cache/data-da-included/"
-
-	// DefaultTxCacheRetention is the default time to keep transaction hashes in cache.
-	DefaultTxCacheRetention = 24 * time.Hour
 )
 
+// defaultTxCacheRetentionStr controls the duration tx hashes are kept in
+// the seen-tx cache before CleanupOldTxs removes them. Override at link
+// time for high-throughput benchmarks where the default 24 h causes the
+// cache to grow until OOM:
+//
+//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/cache.defaultTxCacheRetentionStr=30s"
+var defaultTxCacheRetentionStr = "24h"
+
+// DefaultTxCacheRetention is the resolved retention used by CleanupOldTxs.
+var DefaultTxCacheRetention time.Duration
+
+func init() {
+	d, err := time.ParseDuration(defaultTxCacheRetentionStr)
+	if err != nil || d <= 0 {
+		DefaultTxCacheRetention = 24 * time.Hour
+		return
+	}
+	DefaultTxCacheRetention = d
+}
+
 // CacheManager provides thread-safe cache operations for tracking seen blocks
 // and DA inclusion status.
 type CacheManager interface {
diff --git a/block/internal/reaping/reaper.go b/block/internal/reaping/reaper.go
index d35dbfff3e..8b6fff9807 100644
--- a/block/internal/reaping/reaper.go
+++ b/block/internal/reaping/reaper.go
@@ -21,9 +21,27 @@ import (
 const (
 	// MaxBackoffInterval is the maximum backoff interval for retries
 	MaxBackoffInterval = 30 * time.Second
-	CleanupInterval    = 1 * time.Hour
 )
 
+// cleanupIntervalStr controls how often the reaper sweeps expired hashes
+// from the seen-tx cache. Override at link time for high-throughput
+// benchmarks where the default hourly sweep lets the cache grow to OOM:
+//
+//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/reaping.cleanupIntervalStr=10s"
+var cleanupIntervalStr = "1h"
+
+// CleanupInterval is the resolved sweep period used by reaperLoop.
+var CleanupInterval time.Duration
+
+func init() {
+	d, err := time.ParseDuration(cleanupIntervalStr)
+	if err != nil || d <= 0 {
+		CleanupInterval = time.Hour
+		return
+	}
+	CleanupInterval = d
+}
+
 // Reaper is responsible for periodically retrieving transactions from the executor,
 // filtering out already seen transactions, and submitting new transactions to the sequencer.
 type Reaper struct {

From e0021d978b727ebd371e53e53bc3793b36e9e471 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 21:17:49 +0100
Subject: [PATCH 05/18] fix(fiber-bench/loader): backoff with sleep when
 mempool is full
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The loader's drop path was runtime.Gosched + immediate retry, which lets
each worker allocate a fresh 200 B tx slice at ~200k iter/s when the
executor's mempool channel is permanently full. With --workers=8 that
is 1.6 M short-lived allocations/s = ~320 MB/s of GC churn, against
nothing useful — the rejected slices never make it into a block.

Sleeping 100 us on a failed InjectTx caps the per-worker drop rate at
~10k/s and makes total allocation pressure scale with --workers as a
proportional backpressure signal rather than a constant maximum-rate
spin. Drops in the live stats line still grow visibly when the mempool
is full, just at a sane rate.

Without this fix the bench OOM-killed under sustained load even with
--max-pending=4 throttling block production: pending blob memory was
bounded but GC could not keep up with the loader's allocation rate
fast enough to prevent runaway heap growth alongside Badger's L0
backlog and ev-node's pending caches.
---
 .../cmd/fiber-bench/yield.go                  | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/yield.go b/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
index 2764090ab7..acd76daabd 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
@@ -1,8 +1,17 @@
 package main
 
-import "runtime"
+import "time"
 
-// runtimeYield is a thin wrapper to keep the loader file free of stdlib
-// noise. Splitting it out makes future replacement (e.g. with a backoff
-// strategy) a one-file change.
-func runtimeYield() { runtime.Gosched() }
+// loaderBackoff is what each worker waits when InjectTx returns false
+// because the mempool channel is full. Using a real sleep (rather than
+// runtime.Gosched) caps the per-worker drop rate, which keeps the
+// load generator's allocation pressure proportional to actual drain
+// throughput. Without this, full-mempool workers spin a tight
+// allocate-then-drop loop at ~200k iter/s/worker — millions of
+// short-lived 200 B slices per second across the pool, which drives GC
+// hard and drove the OOM kills observed at sustained load.
+//
+// 100 µs caps a single worker to ~10k drops/s when the mempool is
+// permanently full. Total drop rate scales with --workers and serves
+// as a bounded backpressure signal in the stats line.
+func runtimeYield() { time.Sleep(100 * time.Microsecond) }

From dede63e39ed42a152ee8aeefa0b1a2992277bcfe Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 21:31:32 +0100
Subject: [PATCH 06/18] fix(common): make defaultMaxBlobSizeStr a string
 literal so -ldflags -X works
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A previous version initialised the variable via strconv.FormatUint(...),
which Go's linker treats as a non-constant expression — so -ldflags -X
silently no-ops the override. Every benchmark that tried to set a
smaller MaxBlobSize at link time was actually running with the 128 MiB
default, masking what we were measuring.

The correct form is a plain string literal in the source. The Fibre
cap is documented in the comment so the magic number stays
self-explanatory; init() still parses and falls back to the literal
value if parsing fails.
---
 block/internal/common/consts.go | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/block/internal/common/consts.go b/block/internal/common/consts.go
index b7b13318b8..abe264249e 100644
--- a/block/internal/common/consts.go
+++ b/block/internal/common/consts.go
@@ -2,18 +2,21 @@ package common
 
 import "strconv"
 
-// fibreMaxPayload is the maximum payload Fibre will accept in a single
-// blob: protocol max blob size (1 << 27 = 128 MiB) minus the 5-byte
-// Fibre blob header (1 byte version + 4 byte data size). See
-// celestia-app/v9/fibre/blob.go (blobHeaderLen) and fibre/protocol_params.go
-// (MaxBlobSize). Sized to fit a full Fibre blob and nothing more.
-const fibreMaxPayload = (1 << 27) - 5
-
 // defaultMaxBlobSizeStr holds the string representation of the default blob
-// size limit. Override at link time via:
+// size limit. Anchored to Fibre's actual cap: protocol MaxBlobSize
+// (1 << 27 = 128 MiB) minus the 5-byte Fibre blob header (1 byte version +
+// 4 byte data size). See celestia-app/v9/fibre/blob.go (blobHeaderLen)
+// and fibre/protocol_params.go (MaxBlobSize).
+//
+// MUST be a string literal: Go's `-ldflags "-X ..."` only takes effect
+// on variables initialized to a string constant, NOT a function call.
+// A previous version used strconv.FormatUint here, which compiled but
+// silently ignored ldflag overrides.
+//
+// Override at link time via:
 //
-//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/common.defaultMaxBlobSizeStr=125829120"
-var defaultMaxBlobSizeStr = strconv.FormatUint(fibreMaxPayload, 10)
+//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/common.defaultMaxBlobSizeStr=33554432"
+var defaultMaxBlobSizeStr = "134217723" // 1 << 27 - 5 = 128 MiB - 5 B
 
 // DefaultMaxBlobSize is the max blob size limit used for blob submission.
 var DefaultMaxBlobSize uint64
@@ -21,7 +24,7 @@ var DefaultMaxBlobSize uint64
 func init() {
 	v, err := strconv.ParseUint(defaultMaxBlobSizeStr, 10, 64)
 	if err != nil || v == 0 {
-		DefaultMaxBlobSize = fibreMaxPayload
+		DefaultMaxBlobSize = 134217723
 		return
 	}
 	DefaultMaxBlobSize = v

From 44e977a7fcbe20b5b18bb5babf5793ee3a3118e6 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 21:38:46 +0100
Subject: [PATCH 07/18] docs: TODO(throughput-cleanup) on the
 DA-blob-vs-raw-tx-budget conflation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

common.DefaultMaxBlobSize is plugged into two semantically different
limits — the raw-tx budget that gates FilterTxs and the marshaled
ceiling that gates submitter retries — and the conflation has been the
root cause of more than one bug while debugging Fibre throughput
(packed blocks marshaling larger than MaxBlobSize, ad-hoc 2%
reservations in RetrieveBatch, etc.). File three TODOs pointing at
each other and at the umbrella note in common/consts.go so the next
person picking this up can do the cleanup atomically rather than
adding more workarounds.

No behavioral change.
---
 block/internal/common/consts.go           | 15 +++++++++++++++
 block/internal/executing/executor.go      |  9 +++++++++
 block/internal/submitting/da_submitter.go | 12 +++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/block/internal/common/consts.go b/block/internal/common/consts.go
index abe264249e..386649f1da 100644
--- a/block/internal/common/consts.go
+++ b/block/internal/common/consts.go
@@ -19,6 +19,21 @@ import "strconv"
 var defaultMaxBlobSizeStr = "134217723" // 1 << 27 - 5 = 128 MiB - 5 B
 
 // DefaultMaxBlobSize is the max blob size limit used for blob submission.
+//
+// TODO(throughput-cleanup): this single value is currently plugged in
+// at two semantically different limits and the conflation has caused
+// real bugs (a packed block marshals larger than its raw-tx total, so
+// using MaxBlobSize as both input cap and output cap let blocks blow
+// past the DA cap). Split into two:
+//
+//   MaxBlobSize       — chain-side ceiling on a marshaled DA blob
+//   MaxBlockTxBytes() — derived raw-tx budget = MaxBlobSize - per-block
+//                       marshal overhead. Used by RetrieveBatch /
+//                       FilterTxs.
+//
+// Once that derivation exists, drop the ad-hoc 2% reservation in
+// executing/executor.go::RetrieveBatch and the duplicate cap in
+// submitting/da_submitter.go::defaultRetryPolicy.
 var DefaultMaxBlobSize uint64
 
 func init() {
diff --git a/block/internal/executing/executor.go b/block/internal/executing/executor.go
index 449144b681..69746841ad 100644
--- a/block/internal/executing/executor.go
+++ b/block/internal/executing/executor.go
@@ -672,6 +672,15 @@ func (e *Executor) ProduceBlock(ctx context.Context) error {
 // FilterTxs) keeps the executor’s view of MaxBytes equal to the raw-tx
 // budget and prevents a fully packed batch from blowing past the
 // submitter’s MaxBlobSize check.
+//
+// TODO(throughput-cleanup): this is the workaround half of a deeper
+// issue — common.DefaultMaxBlobSize is used as both the raw-tx
+// budget AND the marshaled-blob ceiling. The right fix is to derive
+// a MaxBlockTxBytes() value once (= MaxBlobSize - overhead) and have
+// RetrieveBatch / FilterTxs / da_submitter.limitBatchBySize all
+// reference the appropriate value rather than each enforcing the
+// same number with their own ad-hoc adjustments. See
+// common/consts.go for the umbrella TODO.
 const blockMarshalOverheadPct = 2
 
 // RetrieveBatch gets the next batch of transactions from the sequencer.
diff --git a/block/internal/submitting/da_submitter.go b/block/internal/submitting/da_submitter.go
index 83f56d9cb5..948f967644 100644
--- a/block/internal/submitting/da_submitter.go
+++ b/block/internal/submitting/da_submitter.go
@@ -47,9 +47,15 @@ type retryPolicy struct {
 
 func defaultRetryPolicy(maxAttempts int, maxDuration time.Duration) retryPolicy {
 	return retryPolicy{
-		MaxAttempts:  maxAttempts,
-		MinBackoff:   initialBackoff,
-		MaxBackoff:   maxDuration,
+		MaxAttempts: maxAttempts,
+		MinBackoff:  initialBackoff,
+		MaxBackoff:  maxDuration,
+		// TODO(throughput-cleanup): same value is used by
+		// executing/executor.go::RetrieveBatch as the raw-tx budget
+		// (with a 2% reservation) and again here as the marshaled
+		// blob ceiling. They are semantically different limits;
+		// the duplication is what made packed-block-larger-than-cap
+		// failures non-obvious. See common/consts.go.
 		MaxBlobBytes: common.DefaultMaxBlobSize,
 	}
 }

From ef84e01e9f35ddfa93af57c844776a9c65fc3dba Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 22:07:44 +0100
Subject: [PATCH 08/18] perf(fiber-da): skip flatten allocation on single-item
 Submit; honor ctx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three changes in fiber_client.go::Submit, all hot-path correctness/
efficiency wins surfaced while debugging Fibre throughput:

1. Single-item fast path that bypasses flattenBlobs.
   For data blobs, limitBatchBySize already caps each Submit call at
   one item (each block's data already saturates MaxBlobBytes). The
   flatten step was therefore allocating MaxBlobSize bytes and
   memcpy'ing the entire payload solely to prepend the 8-byte
   count/length prefix used by splitBlobs. At 128 MiB blocks that's
   ~128 MB held in two places at once during every Upload. The fast
   path passes data[0] straight through and saves the full copy.

   Wire-format caveat: a retriever (full-node syncer or light client)
   downloading a blob written via this fast path can't decode it —
   splitBlobs always expects the prefix. The right fix is to pair
   this with a per-item Upload model so flatten falls away entirely;
   tracked as a TODO in the source pointing at the concurrent-uploads
   work where that lands naturally.

2. Honor caller's ctx in Upload.
   The previous context.Background() kept Uploads alive past node
   shutdown and was the proximate cause of the "payment promise
   already processed" warnings — a stale Upload would settle on-chain
   after ev-node had already moved on. Threading the caller's ctx
   makes shutdown promptly cancel in-flight Uploads.

3. Correct SubmittedCount on error.
   On a full-Upload failure the result reported len(data)-1 as
   submitted, which both reads weirdly for len==1 (uint64 underflow
   risk in any future arithmetic) and lies to submitToDA's
   prefix-of-success retry advance. Reset to 0 on error.

No behaviour change for the multi-item retrieve path (flatten still
runs when len > 1). Validated via go build / go vet.
---
 block/internal/da/fiber_client.go | 34 ++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/block/internal/da/fiber_client.go b/block/internal/da/fiber_client.go
index 22413a181c..d82174c8fc 100644
--- a/block/internal/da/fiber_client.go
+++ b/block/internal/da/fiber_client.go
@@ -87,9 +87,30 @@ func (c *fiberDAClient) Submit(ctx context.Context, data [][]byte, _ float64, na
 		}
 	}
 
-	flat := flattenBlobs(data)
+	// Single-item fast path: avoid the MaxBlobSize-sized allocation +
+	// memcpy that flattenBlobs would do just to wrap one item in the
+	// 8-byte count/length prefix. With per-item caps already saturating
+	// MaxBlobBytes for data blobs, this is the steady-state path.
+	//
+	// TODO: wire-format compat — splitBlobs always expects the prefix,
+	// so any retriever (full node syncer, light client) downloading a
+	// blob written via this fast path will fail to decode. Address
+	// alongside the concurrent-uploads change by switching to a
+	// per-item Upload model where flatten is no longer needed.
+	var blob []byte
+	if len(data) == 1 {
+		blob = data[0]
+	} else {
+		blob = flattenBlobs(data)
+	}
 
-	result, err := c.fiber.Upload(context.Background(), namespace[len(namespace)-10:], flat)
+	// Honor the caller's context so Upload returns promptly on
+	// shutdown / parent cancellation. The previous context.Background()
+	// kept Uploads alive past node shutdown and contributed to the
+	// "payment promise already processed" warnings we saw in early
+	// runs (a stale Upload would settle after the node had stopped
+	// tracking it).
+	result, err := c.fiber.Upload(ctx, namespace[len(namespace)-10:], blob)
 	if err != nil {
 		code := datypes.StatusError
 		switch {
@@ -103,9 +124,12 @@ func (c *fiberDAClient) Submit(ctx context.Context, data [][]byte, _ float64, na
 
 		return datypes.ResultSubmit{
 			BaseResult: datypes.BaseResult{
-				Code:           code,
-				Message:        fmt.Sprintf("fiber upload failed for blob: %v", err),
-				SubmittedCount: uint64(len(data) - 1),
+				Code: code,
+				Message: fmt.Sprintf("fiber upload failed for blob: %v", err),
+				// On error nothing settled — the previous len(data)-1
+				// reported all-but-one as submitted on full failure,
+				// which lied to the caller's retry/postSubmit logic.
+				SubmittedCount: 0,
 				BlobSize:       blobSize,
 				Timestamp:      time.Now(),
 			},

From 84ecbaf149e79f9e62fe3072afbd919dca7578c6 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 22:19:41 +0100
Subject: [PATCH 09/18] perf(fiber-da): per-item concurrent Uploads on Submit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fan out one goroutine per item in fiber DA Submit, calling fiber.Upload
concurrently with the caller's ctx. Settlement throughput now scales
linearly with the batch size: previously ev-node's submitter could
only have one Upload in flight per stream (header + data, mutex-locked
in submitter.go), and each Submit further serialized the batch into
one big flatten-encoded blob. With fan-out, a Submit of N items
becomes N concurrent Uploads, and Fibre's ~1.5 s per-Upload latency
amortizes across N.

The result-aggregation honors submitToDA's "prefix of successes"
contract: SubmittedCount = N means items [0..N) succeeded and the
caller will retry [N..end). Reporting interleaved successes would
double-submit blobs and waste escrow; matching prefix semantics keeps
the retry contract intact even when individual Uploads fail
out-of-order.

Pair changes in submitting/da_submitter.go:
- limitBatchBySize gains a maxItems cap (was total-bytes-only). Each
  item is still bounded by maxItemBytes (chain ceiling), but the
  total batch is now bounded by item count, letting multiple
  full-size items flow through one Submit.
- retryPolicy adds MaxItems with a sensible non-fiber default of 1
  (preserves legacy single-item-per-Submit semantics for backends
  that flatten a batch into one blob).
- For the fiber backend, MaxItems is bumped to 16 — covers a 5 min
  run at 1 b/s production with 4–8 pending blocks while leaving
  headroom for memory pressure under MaxBlobSize-sized items.

Wire-format follow-up (see TODO in fiber_client.go::Submit): the
retrieve path in this file still uses splitBlobs which assumes the
old single-prefixed-blob format. Per-item Uploads now produce raw
blobs with their own BlobIDs; retrieve needs an update to read each
BlobID separately. The bench's aggregator-only setup never invokes
retrieve so this is unblocked for measurement but blocks merging to
production until addressed.
---
 block/internal/da/fiber_client.go         | 105 ++++++++++++++--------
 block/internal/submitting/da_submitter.go |  67 +++++++++++---
 2 files changed, 125 insertions(+), 47 deletions(-)

diff --git a/block/internal/da/fiber_client.go b/block/internal/da/fiber_client.go
index d82174c8fc..422b34be1e 100644
--- a/block/internal/da/fiber_client.go
+++ b/block/internal/da/fiber_client.go
@@ -5,6 +5,7 @@ import (
 	"encoding/binary"
 	"errors"
 	"fmt"
+	"sync"
 	"time"
 
 	"github.com/rs/zerolog"
@@ -87,48 +88,75 @@ func (c *fiberDAClient) Submit(ctx context.Context, data [][]byte, _ float64, na
 		}
 	}
 
-	// Single-item fast path: avoid the MaxBlobSize-sized allocation +
-	// memcpy that flattenBlobs would do just to wrap one item in the
-	// 8-byte count/length prefix. With per-item caps already saturating
-	// MaxBlobBytes for data blobs, this is the steady-state path.
+	// Per-item concurrent Upload. Fibre's per-Upload latency is
+	// dominated by validator signature aggregation (~1.5 s on a
+	// healthy network) and does not scale up linearly under multiple
+	// in-flight Uploads, so settlement throughput scales with the
+	// number of concurrent items submitted in a single batch. Each
+	// item gets its own goroutine, its own Upload call, and its own
+	// BlobID in the result; the previous flatten step was both
+	// memory-wasteful (a MaxBlobSize-sized memcpy on every Submit)
+	// and inherently serial (one Upload per Submit).
 	//
-	// TODO: wire-format compat — splitBlobs always expects the prefix,
-	// so any retriever (full node syncer, light client) downloading a
-	// blob written via this fast path will fail to decode. Address
-	// alongside the concurrent-uploads change by switching to a
-	// per-item Upload model where flatten is no longer needed.
-	var blob []byte
-	if len(data) == 1 {
-		blob = data[0]
-	} else {
-		blob = flattenBlobs(data)
+	// TODO: wire-format compat — old splitBlobs assumed all items in
+	// a Submit were written as a single prefixed blob. With per-item
+	// Uploads, retrievers must treat each BlobID separately. The
+	// retrieve path in this file still uses splitBlobs and will need
+	// a follow-up to read the new per-item blobs as raw payloads.
+	nsID := namespace[len(namespace)-10:]
+	type uploadResult struct {
+		idx int
+		id  []byte
+		err error
+	}
+	results := make([]uploadResult, len(data))
+	var wg sync.WaitGroup
+	for i := range data {
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			res, err := c.fiber.Upload(ctx, nsID, data[i])
+			if err != nil {
+				results[i] = uploadResult{idx: i, err: err}
+				return
+			}
+			id := make([]byte, len(res.BlobID))
+			copy(id, res.BlobID)
+			results[i] = uploadResult{idx: i, id: id}
+		}(i)
+	}
+	wg.Wait()
+
+	// Walk results in submission order. submitToDA's retry logic
+	// expects "prefix of successes": SubmittedCount=N means items
+	// [0..N) succeeded and the caller will re-submit items [N..end)
+	// on the next attempt. Reporting interleaved successes would
+	// double-submit blobs and waste escrow; matching prefix
+	// semantics keeps the contract intact even when individual
+	// Uploads fail out-of-order.
+	ids := make([][]byte, 0, len(data))
+	var firstErr error
+	for _, r := range results {
+		if r.err != nil {
+			firstErr = r.err
+			break
+		}
+		ids = append(ids, r.id)
 	}
 
-	// Honor the caller's context so Upload returns promptly on
-	// shutdown / parent cancellation. The previous context.Background()
-	// kept Uploads alive past node shutdown and contributed to the
-	// "payment promise already processed" warnings we saw in early
-	// runs (a stale Upload would settle after the node had stopped
-	// tracking it).
-	result, err := c.fiber.Upload(ctx, namespace[len(namespace)-10:], blob)
-	if err != nil {
+	if len(ids) == 0 && firstErr != nil {
 		code := datypes.StatusError
 		switch {
-		case errors.Is(err, context.Canceled):
+		case errors.Is(firstErr, context.Canceled):
 			code = datypes.StatusContextCanceled
-		case errors.Is(err, context.DeadlineExceeded):
+		case errors.Is(firstErr, context.DeadlineExceeded):
 			code = datypes.StatusContextDeadline
 		}
-
-		c.logger.Error().Err(err).Msg("fiber upload failed")
-
+		c.logger.Error().Err(firstErr).Msg("fiber upload failed")
 		return datypes.ResultSubmit{
 			BaseResult: datypes.BaseResult{
-				Code: code,
-				Message: fmt.Sprintf("fiber upload failed for blob: %v", err),
-				// On error nothing settled — the previous len(data)-1
-				// reported all-but-one as submitted on full failure,
-				// which lied to the caller's retry/postSubmit logic.
+				Code:           code,
+				Message:        fmt.Sprintf("fiber upload failed for blob: %v", firstErr),
 				SubmittedCount: 0,
 				BlobSize:       blobSize,
 				Timestamp:      time.Now(),
@@ -136,13 +164,20 @@ func (c *fiberDAClient) Submit(ctx context.Context, data [][]byte, _ float64, na
 		}
 	}
 
-	c.logger.Debug().Int("num_ids", len(data)).Uint64("height", 0 /* TODO */).Msg("fiber DA submission successful")
+	if firstErr != nil {
+		c.logger.Warn().Err(firstErr).
+			Int("submitted", len(ids)).
+			Int("total", len(data)).
+			Msg("fiber upload partial success — caller will retry the remainder")
+	}
+
+	c.logger.Debug().Int("num_ids", len(ids)).Uint64("height", 0 /* TODO */).Msg("fiber DA submission successful")
 
 	return datypes.ResultSubmit{
 		BaseResult: datypes.BaseResult{
 			Code:           datypes.StatusSuccess,
-			IDs:            [][]byte{result.BlobID},
-			SubmittedCount: uint64(len(data)),
+			IDs:            ids,
+			SubmittedCount: uint64(len(ids)),
 			Height:         0, /* TODO */
 			BlobSize:       blobSize,
 			Timestamp:      time.Now(),
diff --git a/block/internal/submitting/da_submitter.go b/block/internal/submitting/da_submitter.go
index 948f967644..1550836af3 100644
--- a/block/internal/submitting/da_submitter.go
+++ b/block/internal/submitting/da_submitter.go
@@ -43,8 +43,28 @@ type retryPolicy struct {
 	MinBackoff   time.Duration
 	MaxBackoff   time.Duration
 	MaxBlobBytes uint64
+	// MaxItems caps the number of items packed into a single Submit
+	// call. DA clients that fan out per-item Uploads (fiber) benefit
+	// linearly from larger batches — settlement throughput scales
+	// with concurrency until per-Upload latency dominates. Default
+	// 1 preserves legacy single-item-per-Submit semantics for
+	// backends that flatten a batch into one blob (JSON-RPC blob
+	// client). The fiber path overrides this from config.
+	MaxItems int
 }
 
+// defaultBatchItems is the conservative default for non-fiber backends
+// that historically expected one item per Submit call. The fiber path
+// raises this via config because it can fan out per-item Uploads.
+const defaultBatchItems = 1
+
+// fiberDefaultBatchItems is the upper bound on items packed into a
+// single fiber Submit. Each item gets its own concurrent Upload, so
+// this caps the per-batch goroutine fan-out. 16 covers a 5 min run at
+// 1 b/s production with 4–8 pending blocks while leaving headroom for
+// memory pressure; tunable via config when the cleanup TODO lands.
+const fiberDefaultBatchItems = 16
+
 func defaultRetryPolicy(maxAttempts int, maxDuration time.Duration) retryPolicy {
 	return retryPolicy{
 		MaxAttempts: maxAttempts,
@@ -57,6 +77,7 @@ func defaultRetryPolicy(maxAttempts int, maxDuration time.Duration) retryPolicy
 		// the duplication is what made packed-block-larger-than-cap
 		// failures non-obvious. See common/consts.go.
 		MaxBlobBytes: common.DefaultMaxBlobSize,
+		MaxItems:     defaultBatchItems,
 	}
 }
 
@@ -578,12 +599,19 @@ func submitToDA[T any](
 	}
 
 	pol := defaultRetryPolicy(s.config.DA.MaxSubmitAttempts, s.config.DA.BlockTime.Duration)
+	// Fiber's DA client fans out per-item Uploads concurrently, so
+	// packing more items per Submit lifts settlement throughput. For
+	// non-fiber backends the default of 1 preserves the legacy
+	// flatten-one-blob behavior.
+	if s.config.DA.IsFiberEnabled() {
+		pol.MaxItems = fiberDefaultBatchItems
+	}
 
 	rs := retryState{Attempt: 0, Backoff: 0}
 
 	// Limit this submission to a single size-capped batch
 	if len(marshaled) > 0 {
-		batchItems, batchMarshaled, err := limitBatchBySize(items, marshaled, pol.MaxBlobBytes)
+		batchItems, batchMarshaled, err := limitBatchBySize(items, marshaled, pol.MaxBlobBytes, pol.MaxItems)
 		if err != nil {
 			s.logger.Error().
 				Str("itemType", itemType).
@@ -694,27 +722,42 @@ func submitToDA[T any](
 	return fmt.Errorf("failed to submit all %s(s) to DA layer after %d attempts", itemType, rs.Attempt)
 }
 
-// limitBatchBySize returns a prefix of items whose total marshaled size does not exceed maxBytes.
-// If the first item exceeds maxBytes, it returns ErrOversizedItem which is unrecoverable.
-func limitBatchBySize[T any](items []T, marshaled [][]byte, maxBytes uint64) ([]T, [][]byte, error) {
-	total := uint64(0)
+// limitBatchBySize returns a prefix of items whose per-item marshaled size
+// fits within maxItemBytes. The total batch size is bounded by item count
+// (maxItems), not by total bytes — DA clients that can fan out per-item
+// Uploads (e.g. the fiber DA client) settle each item in its own
+// concurrent Upload call, so packing more items per batch lifts the
+// effective settlement throughput. DA clients that flatten a batch into
+// a single blob still get one item per call when maxItems == 1.
+//
+// If the first item exceeds maxItemBytes, returns ErrOversizedItem
+// (unrecoverable). If no items fit at all (empty inputs), returns a
+// distinct error so the caller can distinguish "nothing to send".
+//
+// TODO(throughput-cleanup): see common/consts.go — maxItemBytes is the
+// per-item chain ceiling, separate from the raw-tx budget driving
+// FilterTxs. Once that split lands, the duplicate-cap-everywhere
+// problem these fixes work around goes away.
+func limitBatchBySize[T any](items []T, marshaled [][]byte, maxItemBytes uint64, maxItems int) ([]T, [][]byte, error) {
+	if maxItems <= 0 {
+		maxItems = 1
+	}
 	count := 0
 	for i := range items {
+		if count >= maxItems {
+			break
+		}
 		sz := uint64(len(marshaled[i]))
-		if sz > maxBytes {
+		if sz > maxItemBytes {
 			if i == 0 {
-				return nil, nil, fmt.Errorf("%w: item size %d exceeds max %d", common.ErrOversizedItem, sz, maxBytes)
+				return nil, nil, fmt.Errorf("%w: item size %d exceeds max %d", common.ErrOversizedItem, sz, maxItemBytes)
 			}
 			break
 		}
-		if total+sz > maxBytes {
-			break
-		}
-		total += sz
 		count++
 	}
 	if count == 0 {
-		return nil, nil, fmt.Errorf("no items fit within %d bytes", maxBytes)
+		return nil, nil, fmt.Errorf("no items fit within %d bytes", maxItemBytes)
 	}
 	return items[:count], marshaled[:count], nil
 }

From 35f1e13c927f873d01eafcbded6ca83c24191ec6 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 22:38:50 +0100
Subject: [PATCH 10/18] perf(fiber-bench): use in-memory KV store, not
 disk-backed Badger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Block production calls store.batch.Commit() synchronously inside
ProduceBlock — which means Badger's write throughput is a hard ceiling
on block production rate. At 128 MB blocks × ~1 b/s the on-disk
backend generates ~150 MB/s of value-log writes plus heavy compaction
churn that backed up under load: vlog files filled (~1.2 GB each)
faster than Badger could rotate, and we hit a "file exists" race on
.vlog rotation that wedged the producer entirely.

The bench has no durability requirement — if it crashes we re-run —
so swap to NewTestInMemoryKVStore. ev-node's code path is unchanged
(same Batch / Commit semantics), the data just lives in a map. This
removes Badger from the critical path and lets the bench measure
ev-node's actual pipeline rather than Badger's write-amplification
curve.

Open question for production fiber rollups: since Fibre IS the
storage (a fiber-only node can re-sync any block from the chain),
does ev-node need to persist block data to local Badger at all?
Possibly worth a fiber-only-skip-block-store mode in the executor,
analogous to how the !fiber broadcast paths are gated. Filed
informally; not blocking the throughput investigation.
---
 tools/celestia-node-fiber/cmd/fiber-bench/run.go | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
index 8b65cee42d..576ceff28c 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -230,10 +230,21 @@ func runBench(parentCtx context.Context, f runFlags) error {
 	}
 
 	// 6) Datastore for ev-node's internal state.
-	ds, err := store.NewDefaultKVStore(f.homeDir, cfg.DBPath, "fiber-bench")
+	//
+	// Use the in-memory KV store so block writes don't touch disk:
+	// at sustained 128 MB blocks × ~1 b/s the on-disk Badger backend
+	// generates 100+ MB/s of value-log writes plus heavy compaction
+	// pressure that contributes to GC stalls and, under load, can
+	// hit Badger races ("file exists" on .vlog rotation observed at
+	// ~150 MB/s sustained). The bench has no durability requirement
+	// — if it crashes we re-run — so we skip persistence entirely
+	// and let the bench measure ev-node's pipeline rather than
+	// Badger's write-amplification curve.
+	ds, err := store.NewTestInMemoryKVStore()
 	if err != nil {
 		return fmt.Errorf("open datastore: %w", err)
 	}
+	_ = f.homeDir // referenced elsewhere for signer dir
 
 	// 7) Executor + sequencer.
 	exec := newInMemExecutor(f.mempoolSize)

From 7ed0bf1074ac2ad700879023114549c5aeb2930a Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 22:43:42 +0100
Subject: [PATCH 11/18] fix(fiber-bench): use ds.MapDatastore, not Badger
 in-memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous in-memory switch used store.NewTestInMemoryKVStore() which is
backed by Badger with WithInMemory(true). That mode still enforces
Badger's default 1 MiB ValueThreshold, so any block larger than 1 MiB
fails to save with:

  Value with size 133506229 exceeded 1048576 limit

Our 128 MiB blocks blow past this on every commit. Symptom in the
logs is a stream of 'failed to save block data' errors while the
submitter continues to upload pending items from cache — so settlement
keeps advancing for already-cached items but new block production
halts.

Swap to ds.MutexWrap(ds.NewMapDatastore()): a pure-Go in-memory map
with no per-value size limit, thread-safe via the standard sync
wrapper. Same Batch / Commit semantics ev-node expects, just a thin
sync.Mutex around a Go map.

The bench has no durability requirement — the Badger reference is
kept aliased above the assignment so the dependency stays imported
in case we want to switch back via flag later.
---
 .../cmd/fiber-bench/run.go                    | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
index 576ceff28c..a1cc5be53d 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -13,6 +13,7 @@ import (
 	"time"
 
 	"github.com/ipfs/go-datastore"
+	dssync "github.com/ipfs/go-datastore/sync"
 	"github.com/libp2p/go-libp2p/core/crypto"
 	"github.com/rs/zerolog"
 	"github.com/spf13/cobra"
@@ -231,19 +232,19 @@ func runBench(parentCtx context.Context, f runFlags) error {
 
 	// 6) Datastore for ev-node's internal state.
 	//
-	// Use the in-memory KV store so block writes don't touch disk:
-	// at sustained 128 MB blocks × ~1 b/s the on-disk Badger backend
-	// generates 100+ MB/s of value-log writes plus heavy compaction
-	// pressure that contributes to GC stalls and, under load, can
-	// hit Badger races ("file exists" on .vlog rotation observed at
-	// ~150 MB/s sustained). The bench has no durability requirement
-	// — if it crashes we re-run — so we skip persistence entirely
-	// and let the bench measure ev-node's pipeline rather than
-	// Badger's write-amplification curve.
-	ds, err := store.NewTestInMemoryKVStore()
-	if err != nil {
-		return fmt.Errorf("open datastore: %w", err)
-	}
+	// Pure-Go in-memory map (sync-wrapped) so block writes don't
+	// touch disk and aren't bounded by any storage-engine value
+	// limit. Badger's NewTestInMemoryKVStore has a default 1 MiB
+	// ValueThreshold — anything over that fails to save, which our
+	// 128 MB blocks blow past on every commit. With ds.MapDatastore
+	// we get O(map insert) writes with no value-size cap.
+	//
+	// The bench has no durability requirement — if it crashes we
+	// re-run — and removing Badger from the critical path is what
+	// finally lets concurrent uploads multiply throughput rather
+	// than queueing behind block.Commit.
+	_ = store.NewTestInMemoryKVStore // keep dep referenced for the alternate path
+	ds := dssync.MutexWrap(datastore.NewMapDatastore())
 	_ = f.homeDir // referenced elsewhere for signer dir
 
 	// 7) Executor + sequencer.

From 1b518e82673d4da5dc85dd876d7cc42599bb9ae5 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 23:27:42 +0100
Subject: [PATCH 12/18] hack(store): swap NewDefaultKVStore to in-memory
 MapDatastore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Block production calls store.batch.Commit() synchronously inside
ProduceBlock, so storage write throughput is a hard ceiling on block
production. With 128 MiB blocks × ~1 b/s the on-disk Badger backend
generates ~150 MB/s of value-log writes plus heavy compaction; under
sustained load we hit a Badger .vlog rotation race ("file exists")
that wedges the producer entirely.

Returning a sync-wrapped MapDatastore from the canonical constructor
(rather than special-casing the bench) puts the change exactly where
ev-node loads its store, makes the diff small and obvious, and lets
the bench drop its private MapDS swap to call NewDefaultKVStore the
same way every other ev-node binary does.

The HACK comment names three real fixes — async commit, fiber-only
skip-persistence, write-optimised backend — so this isn't read as
"revert to Badger before merge". NewDefaultKVStoreOnDisk preserved as
the literal Badger constructor for any caller that explicitly wants
disk-backed state today.

Reverts the bench-side workaround introduced in 7ed0bf10.
---
 pkg/store/kv.go                               | 36 ++++++++++++++++++-
 .../cmd/fiber-bench/run.go                    | 24 ++++---------
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/pkg/store/kv.go b/pkg/store/kv.go
index 3ee23bc2c1..5cea24ba12 100644
--- a/pkg/store/kv.go
+++ b/pkg/store/kv.go
@@ -8,6 +8,7 @@ import (
 	ds "github.com/ipfs/go-datastore"
 	ktds "github.com/ipfs/go-datastore/keytransform"
 	dsq "github.com/ipfs/go-datastore/query"
+	dssync "github.com/ipfs/go-datastore/sync"
 	badger4 "github.com/ipfs/go-ds-badger4"
 )
 
@@ -15,7 +16,40 @@ import (
 const EvPrefix = "0"
 
 // NewDefaultKVStore creates instance of default key-value store.
-func NewDefaultKVStore(rootDir, dbPath, dbName string) (ds.Batching, error) {
+//
+// HACK(fiber-throughput): swapped to a pure in-memory map for the
+// Fibre throughput investigation. The real issue this surfaces is
+// architectural, not a Badger bug: block.executing.Executor.ProduceBlock
+// calls store.batch.Commit() synchronously inside the producer, so
+// the storage engine's write rate is a hard ceiling on block
+// production. With 128 MiB blocks × ~1 b/s the on-disk path drives
+// ~150 MB/s of value-log writes plus heavy compaction; the producer
+// blocks on Badger long before the DA submitter is the bottleneck.
+//
+// Don't revert this in place — fix the underlying design instead.
+// Options worth weighing:
+//   - Move the block save off the producer hot path (async commit
+//     with a bounded queue). Block durability is not required to
+//     advance state, only to recover after restart.
+//   - For Fibre-only rollups specifically: skip local persistence
+//     entirely. Fibre IS the storage; a node can re-sync from the
+//     chain on restart. This removes the question.
+//   - If we keep persisting, pick a write-optimised backend that
+//     handles 100s of MB/s of large-value writes without compaction
+//     stalls. Badger v4 with these tunings still hit a .vlog
+//     rotation race under sustained load.
+//
+// NewDefaultKVStoreOnDisk preserved below as the literal Badger
+// constructor for any caller that explicitly wants disk-backed
+// state today; the production wiring should switch to one of the
+// three options above before this directory is dropped.
+func NewDefaultKVStore(_, _, _ string) (ds.Batching, error) {
+	return dssync.MutexWrap(ds.NewMapDatastore()), nil
+}
+
+// NewDefaultKVStoreOnDisk is the original Badger-backed constructor,
+// preserved for the duration of the throughput-cleanup window.
+func NewDefaultKVStoreOnDisk(rootDir, dbPath, dbName string) (ds.Batching, error) {
 	path := filepath.Join(rootify(rootDir, dbPath), dbName)
 	return badger4.NewDatastore(path, BadgerOptions())
 }
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
index a1cc5be53d..7ac15c9963 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -13,7 +13,6 @@ import (
 	"time"
 
 	"github.com/ipfs/go-datastore"
-	dssync "github.com/ipfs/go-datastore/sync"
 	"github.com/libp2p/go-libp2p/core/crypto"
 	"github.com/rs/zerolog"
 	"github.com/spf13/cobra"
@@ -230,22 +229,13 @@ func runBench(parentCtx context.Context, f runFlags) error {
 		return fmt.Errorf("fiber config: %w", err)
 	}
 
-	// 6) Datastore for ev-node's internal state.
-	//
-	// Pure-Go in-memory map (sync-wrapped) so block writes don't
-	// touch disk and aren't bounded by any storage-engine value
-	// limit. Badger's NewTestInMemoryKVStore has a default 1 MiB
-	// ValueThreshold — anything over that fails to save, which our
-	// 128 MB blocks blow past on every commit. With ds.MapDatastore
-	// we get O(map insert) writes with no value-size cap.
-	//
-	// The bench has no durability requirement — if it crashes we
-	// re-run — and removing Badger from the critical path is what
-	// finally lets concurrent uploads multiply throughput rather
-	// than queueing behind block.Commit.
-	_ = store.NewTestInMemoryKVStore // keep dep referenced for the alternate path
-	ds := dssync.MutexWrap(datastore.NewMapDatastore())
-	_ = f.homeDir // referenced elsewhere for signer dir
+	// 6) Datastore for ev-node's internal state. Uses the standard
+	// constructor; the in-memory swap for benchmarking lives in
+	// pkg/store/kv.go::NewDefaultKVStore (see HACK there).
+	ds, err := store.NewDefaultKVStore(f.homeDir, cfg.DBPath, "fiber-bench")
+	if err != nil {
+		return fmt.Errorf("open datastore: %w", err)
+	}
 
 	// 7) Executor + sequencer.
 	exec := newInMemExecutor(f.mempoolSize)

From b4e03f9fd142ec26fe52a76146d1a3515028037b Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 23:28:15 +0100
Subject: [PATCH 13/18] hack(reaper,cache): collapse seen-tx TTL plumbing back
 to plain consts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous fix (ecd7f62b) made DefaultTxCacheRetention and CleanupInterval
ldflag-overridable so the bench could shrink them at link time. That
hid the actual change behind 30 lines of init() / parsing scaffolding —
the diff said "add tunable" but the operational story was "the default
is wrong for any meaningful TPS". Replacing the plumbing with two
const edits puts the hack where it belongs, where the value lives.

DefaultTxCacheRetention: 24h -> 30s.
At ~1.5M tx/s sustained the 24h dedup window grows the cache to ~16 GB
in under a minute (each entry is the SHA-256 hex string, ~150 B in map
representation), which OOM-kills the bench before any throughput
signal is visible. The HACK comment flags 24h as itself wrong:
retention-by-wall-time scales poorly with TPS. The proper fix is an
LRU-by-count cache, or expressing the window in DA blocks (mempool
TTL × DA block time), not a fixed duration.

CleanupInterval: 1h -> 5s.
Coupled to the previous 24h retention; an hourly sweep against a 24h
window means entries can outlive expiry by 1h (fine when retention is
days, completely broken at 30s retention where entries would survive
12× past expiry). The HACK comment notes this should derive from
retention rather than be a separate fixed value.

Reverts the link-time tunability scaffolding from ecd7f62b. The bench
no longer needs ldflags for these — same hack with the standard
build.
---
 block/internal/cache/manager.go  | 39 ++++++++++++++++----------------
 block/internal/reaping/reaper.go | 34 ++++++++++++----------------
 2 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/block/internal/cache/manager.go b/block/internal/cache/manager.go
index 298f5499ab..9e293c775a 100644
--- a/block/internal/cache/manager.go
+++ b/block/internal/cache/manager.go
@@ -24,27 +24,26 @@ const (
 
 	// DataDAIncludedPrefix is the store key prefix for data DA inclusion tracking.
 	DataDAIncludedPrefix = "cache/data-da-included/"
-)
 
-// defaultTxCacheRetentionStr controls the duration tx hashes are kept in
-// the seen-tx cache before CleanupOldTxs removes them. Override at link
-// time for high-throughput benchmarks where the default 24 h causes the
-// cache to grow until OOM:
-//
-//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/cache.defaultTxCacheRetentionStr=30s"
-var defaultTxCacheRetentionStr = "24h"
-
-// DefaultTxCacheRetention is the resolved retention used by CleanupOldTxs.
-var DefaultTxCacheRetention time.Duration
-
-func init() {
-	d, err := time.ParseDuration(defaultTxCacheRetentionStr)
-	if err != nil || d <= 0 {
-		DefaultTxCacheRetention = 24 * time.Hour
-		return
-	}
-	DefaultTxCacheRetention = d
-}
+	// DefaultTxCacheRetention is how long tx hashes stay in the
+	// seen-tx cache before CleanupOldTxs evicts them.
+	//
+	// HACK(fiber-throughput): dropped from 24h to 30s while we chase
+	// throughput, but the previous default was itself wrong: 24h is
+	// retention × tps in memory, so any rollup with meaningful TPS
+	// would OOM (we hit ~16 GB in under a minute at ~1.5M tx/s).
+	// What this should be properly:
+	//   - Bounded by entry count, not wall time. The dedup window
+	//     should be "the last N txs we saw", LRU-evicted, so cache
+	//     memory is fixed regardless of throughput.
+	//   - Or expressed in DA blocks: "drop hashes once their txs
+	//     would have been retried out of the mempool", which is a
+	//     property of mempool TTL × DA block time, not 24 hours.
+	//   - 30s is a fine measurement default and a reasonable upper
+	//     bound for pretty much any rollup; pick the right number
+	//     when the cache structure itself is reworked.
+	DefaultTxCacheRetention = 30 * time.Second
+)
 
 // CacheManager provides thread-safe cache operations for tracking seen blocks
 // and DA inclusion status.
diff --git a/block/internal/reaping/reaper.go b/block/internal/reaping/reaper.go
index 8b6fff9807..7cd9e27450 100644
--- a/block/internal/reaping/reaper.go
+++ b/block/internal/reaping/reaper.go
@@ -21,26 +21,22 @@ import (
 const (
 	// MaxBackoffInterval is the maximum backoff interval for retries
 	MaxBackoffInterval = 30 * time.Second
-)
 
-// cleanupIntervalStr controls how often the reaper sweeps expired hashes
-// from the seen-tx cache. Override at link time for high-throughput
-// benchmarks where the default hourly sweep lets the cache grow to OOM:
-//
-//	go build -ldflags "-X github.com/evstack/ev-node/block/internal/reaping.cleanupIntervalStr=10s"
-var cleanupIntervalStr = "1h"
-
-// CleanupInterval is the resolved sweep period used by reaperLoop.
-var CleanupInterval time.Duration
-
-func init() {
-	d, err := time.ParseDuration(cleanupIntervalStr)
-	if err != nil || d <= 0 {
-		CleanupInterval = time.Hour
-		return
-	}
-	CleanupInterval = d
-}
+	// CleanupInterval is how often the reaper sweeps expired hashes
+	// out of the seen-tx cache.
+	//
+	// HACK(fiber-throughput): dropped from 1h to 5s. The original
+	// 1h was effectively coupled to the previous 24h retention —
+	// sweeping every hour against a 24h window means a cache entry
+	// can outlive its retention by 1h, which is fine when retention
+	// is a day but completely breaks at 30s retention (entries
+	// would survive 12× past expiry). Whatever the right retention
+	// turns out to be (see DefaultTxCacheRetention's note in
+	// cache/manager.go), this value should be a small fraction of
+	// it — not a fixed time. Better to derive: e.g.
+	// retention/10 with a sane min/max.
+	CleanupInterval = 5 * time.Second
+)
 
 // Reaper is responsible for periodically retrieving transactions from the executor,
 // filtering out already seen transactions, and submitting new transactions to the sequencer.

From bba2aa8a1bf4957761db6da95d27c599d4c01b8b Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 23:28:36 +0100
Subject: [PATCH 14/18] docs: surface follow-up issues left by the throughput
 hacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three small comment / dead-code edits. None change behaviour; they
make hidden assumptions visible so the next person reading the diff
doesn't trip on them.

block/internal/common/consts.go
  DefaultMaxBlobSize: flag that the new 128 MiB-5 default is correct
  for fiber-enabled deployments but WRONG for the legacy JSON-RPC
  blob client path — bridge / chain reject blobs above their own
  much smaller cap. The right shape is per-backend caps; the global
  default was always going to be a leaky abstraction.

block/internal/da/fiber_client.go
  Remove flattenBlobs (dead code now that Submit fans out per item).
  Keep splitBlobs but document loudly that it can no longer decode
  blobs THIS branch's Submit writes — the per-item Upload path
  produces raw blobs while splitBlobs expects the legacy "count +
  per-item length" framing. Retrieve / Get / Subscribe callers in
  the same file are therefore broken for our writes; the comment
  points at the wire-format follow-up that has to land before any
  node on this branch tries to sync from another.

block/internal/submitting/da_submitter.go
  fiberDefaultBatchItems = 16: flag the magic number as needing a
  config knob (FiberDAConfig.UploadConcurrency was scaffolded for
  exactly this earlier and reverted; wire it through here when the
  concurrent-uploads change graduates from prototype). 16 is a
  pragmatic measurement default, not a considered production value.
---
 block/internal/common/consts.go           | 20 +++++++++----
 block/internal/da/fiber_client.go         | 36 ++++++++---------------
 block/internal/submitting/da_submitter.go | 13 ++++++--
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/block/internal/common/consts.go b/block/internal/common/consts.go
index 386649f1da..8e1e679fc3 100644
--- a/block/internal/common/consts.go
+++ b/block/internal/common/consts.go
@@ -2,11 +2,21 @@ package common
 
 import "strconv"
 
-// defaultMaxBlobSizeStr holds the string representation of the default blob
-// size limit. Anchored to Fibre's actual cap: protocol MaxBlobSize
-// (1 << 27 = 128 MiB) minus the 5-byte Fibre blob header (1 byte version +
-// 4 byte data size). See celestia-app/v9/fibre/blob.go (blobHeaderLen)
-// and fibre/protocol_params.go (MaxBlobSize).
+// defaultMaxBlobSizeStr holds the string representation of the default
+// blob size limit. Anchored to Fibre's actual cap: protocol MaxBlobSize
+// (1 << 27 = 128 MiB) minus the 5-byte Fibre blob header (1 byte
+// version + 4 byte data size). See celestia-app/v9/fibre/blob.go
+// (blobHeaderLen) and fibre/protocol_params.go (MaxBlobSize).
+//
+// HACK(fiber-throughput): this default is correct for fiber-enabled
+// deployments but WRONG for the legacy JSON-RPC blob client path —
+// the bridge / chain rejects blobs above its own (much smaller) cap,
+// so a non-fiber node started against this default would fail to
+// submit. The right shape is per-backend: fiber's cap is one number,
+// blob-RPC's cap is another, and DefaultMaxBlobSize shouldn't be a
+// single global. Restructure into config when the throughput-cleanup
+// TODO lands; until then, non-fiber callers should override via
+// ldflag or local config.
 //
 // MUST be a string literal: Go's `-ldflags "-X ..."` only takes effect
 // on variables initialized to a string constant, NOT a function call.
diff --git a/block/internal/da/fiber_client.go b/block/internal/da/fiber_client.go
index 422b34be1e..db92f06792 100644
--- a/block/internal/da/fiber_client.go
+++ b/block/internal/da/fiber_client.go
@@ -388,29 +388,19 @@ func (c *fiberDAClient) Validate(_ context.Context, ids []datypes.ID, proofs []d
 func (c *fiberDAClient) GetHeaderNamespace() []byte { return c.namespaceBz }
 func (c *fiberDAClient) GetDataNamespace() []byte   { return c.dataNamespaceBz }
 
-func flattenBlobs(blobs [][]byte) []byte {
-	if len(blobs) == 0 {
-		return nil
-	}
-
-	var total int
-	for _, b := range blobs {
-		total += 4 + len(b)
-	}
-	total += 4
-
-	buf := make([]byte, total)
-	binary.BigEndian.PutUint32(buf, uint32(len(blobs)))
-	off := 4
-	for _, b := range blobs {
-		binary.BigEndian.PutUint32(buf[off:], uint32(len(b)))
-		off += 4
-		copy(buf[off:], b)
-		off += len(b)
-	}
-	return buf
-}
-
+// splitBlobs decodes the legacy "count + per-item length" framing that
+// the previous Submit path used to pack multiple blobs into a single
+// Upload. The per-item-concurrent Submit path no longer writes that
+// framing — each item is uploaded raw — so any blob written by this
+// branch's Submit will fail to decode here.
+//
+// Callers (Retrieve / RetrieveBlobs / Get / Subscribe) therefore only
+// work for blobs written by the OLD code path, OR for the multi-item
+// header batches that still use it. Pair the format change with a
+// matching update to the read path before any node on this branch
+// tries to sync from another node on this branch.
+//
+// Tracked alongside the wire-format TODO on Submit (above).
 func splitBlobs(data []byte) ([][]byte, error) {
 	if len(data) == 0 {
 		return nil, nil
diff --git a/block/internal/submitting/da_submitter.go b/block/internal/submitting/da_submitter.go
index 1550836af3..bb30eb6461 100644
--- a/block/internal/submitting/da_submitter.go
+++ b/block/internal/submitting/da_submitter.go
@@ -60,9 +60,16 @@ const defaultBatchItems = 1
 
 // fiberDefaultBatchItems is the upper bound on items packed into a
 // single fiber Submit. Each item gets its own concurrent Upload, so
-// this caps the per-batch goroutine fan-out. 16 covers a 5 min run at
-// 1 b/s production with 4–8 pending blocks while leaving headroom for
-// memory pressure; tunable via config when the cleanup TODO lands.
+// this caps the per-batch goroutine fan-out.
+//
+// HACK(fiber-throughput): hardcoded at 16. The right value depends on
+// memory budget × per-item Upload size × Fibre validator-side
+// throughput, none of which the submitter can know at compile time.
+// Should be a config knob (FiberDAConfig.UploadConcurrency was
+// scaffolded for exactly this earlier — wire it through here when the
+// concurrent-uploads change graduates from prototype). 16 is a
+// pragmatic measurement default that gives meaningful concurrency
+// without overwhelming celestia-node's per-FSP rate.
 const fiberDefaultBatchItems = 16
 
 func defaultRetryPolicy(maxAttempts int, maxDuration time.Duration) retryPolicy {

From 57fa859640611a89c60a0c18bae0eac92a48d145 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 23:39:12 +0100
Subject: [PATCH 15/18] refactor(fiber-bench): delegate node wiring to
 rollcmd.StartNode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bench was hand-rolling the same node wiring testapp/evm/grpc
apps already do via pkg/cmd.StartNode — DA client construction, p2p
client setup, node.NewNode call, signal handling, the run loop. Each
of those grew its own way of doing things in the bench, drifted from
the canonical path, and left a maintenance gap if cmd.StartNode ever
gained a new responsibility (which is exactly how the fiberClient
parameter regression on this branch happened — testapp was never
updated to pass it).

Replace the inline wiring with one rollcmd.StartNode call. The bench
now owns only what's genuinely bench-specific:

  - Cosmos keyring open + bridge-bypass cnfiber.Adapter (no
    production equivalent — bypasses bridge node dialing)
  - Block-signing key created in homedir, passphrase written to a
    temp file so StartNode can read it through its standard flag
  - inMemExecutor + solo sequencer (constant state root for
    measurement; testapp's KVExecutor recomputes state by scanning
    every key, O(N) per block)
  - Loader + stats printer goroutines spawned before the blocking
    StartNode call; SIGINT-to-self triggers shutdown when the
    duration timer expires (StartNode's outer select waits on
    signal/err only — not ctx — so this is the contained way to
    drive duration through its existing shutdown path).

Net diff: ~30 LOC fewer, but the meaningful change is that the bench
is no longer carrying its own copy of testapp/evm/grpc's node setup.
The bridge-bypass adapter, instrumented Upload latency proxy, escrow
helpers, and stats printer remain (those don't duplicate canonical
ev-node code; they exist only for measurement and operator UX).

Filing for follow-up: testapp/evm/grpc apps still don't compile on
this branch because cmd.StartNode gained the fiberClient parameter
without updating its callers. The right fix is one of:
  - testapp/cmd/run.go imports tools/celestia-node-fiber and wires
    cnfiber.New (with bridge) when nodeConfig.DA.Fiber.Enabled.
  - Or cmd.StartNode grows a constructor-style overload so callers
    that don't use Fiber can keep their old signature.
Either way, that's a separate piece of work; this commit just
demonstrates the canonical pattern from the bench side.
---
 .../cmd/fiber-bench/run.go                    | 205 ++++++++----------
 1 file changed, 88 insertions(+), 117 deletions(-)

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
index 7ac15c9963..929df3ed5d 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -2,7 +2,6 @@ package main
 
 import (
 	"context"
-	"crypto/rand"
 	"errors"
 	"fmt"
 	"os"
@@ -12,19 +11,15 @@ import (
 	"syscall"
 	"time"
 
-	"github.com/ipfs/go-datastore"
-	"github.com/libp2p/go-libp2p/core/crypto"
 	"github.com/rs/zerolog"
 	"github.com/spf13/cobra"
 
-	"github.com/evstack/ev-node/block"
+	rollcmd "github.com/evstack/ev-node/pkg/cmd"
 	evconfig "github.com/evstack/ev-node/pkg/config"
 	"github.com/evstack/ev-node/node"
 	"github.com/evstack/ev-node/pkg/genesis"
-	"github.com/evstack/ev-node/pkg/p2p"
 	"github.com/evstack/ev-node/pkg/p2p/key"
 	"github.com/evstack/ev-node/pkg/sequencers/solo"
-	pkgsigner "github.com/evstack/ev-node/pkg/signer"
 	"github.com/evstack/ev-node/pkg/signer/file"
 	"github.com/evstack/ev-node/pkg/store"
 )
@@ -67,7 +62,7 @@ func runCmd() *cobra.Command {
 		Use:   "run",
 		Short: "Run the bench: start a single-sequencer ev-node against a Fibre network and pump load",
 		RunE: func(cmd *cobra.Command, args []string) error {
-			return runBench(cmd.Context(), f)
+			return runBench(cmd, f)
 		},
 	}
 
@@ -99,17 +94,21 @@ func runCmd() *cobra.Command {
 	flags.StringVar(&f.prometheusAddr, "prometheus-addr", "127.0.0.1:26660", "address for the ev-node Prometheus endpoint")
 	flags.StringVar(&f.logLevel, "log-level", "info", "ev-node log level (debug|info|warn|error)")
 
+	// FlagSignerPassphraseFile is what cmd.StartNode reads to load the
+	// file-backed signer's passphrase. We define it on the bench's run
+	// command so cmd.StartNode finds it via cmd.Flags().GetString;
+	// runBench writes the operator's --signer-passphrase to a temp
+	// file and sets this flag's value before delegating.
+	flags.String(evconfig.FlagSignerPassphraseFile, "", "(internal) populated by --signer-passphrase before cmd.StartNode runs")
+	_ = cmd.Flags().MarkHidden(evconfig.FlagSignerPassphraseFile)
+
 	_ = cobra.MarkFlagRequired(flags, "consensus-grpc")
 	_ = cobra.MarkFlagRequired(flags, "chain-id")
 
 	return cmd
 }
 
-func runBench(parentCtx context.Context, f runFlags) error {
-	// Single root context for everything; SIGINT cancels.
-	ctx, cancel := signal.NotifyContext(parentCtx, os.Interrupt, syscall.SIGTERM)
-	defer cancel()
-
+func runBench(cobraCmd *cobra.Command, f runFlags) error {
 	logger := setupLogger(f.logLevel)
 
 	if !f.keepHome {
@@ -119,9 +118,9 @@ func runBench(parentCtx context.Context, f runFlags) error {
 		return fmt.Errorf("create home %s: %w", f.homeDir, err)
 	}
 
-	// 1) Open the cosmos keyring (must already contain --key-name; we don't
-	// auto-create here so that operator-funded keys aren't accidentally
-	// regenerated when bench runs are re-launched).
+	// 1) Open the cosmos keyring + build the bridge-bypass Fibre
+	// adapter. These are the two genuinely fiber-bench-specific
+	// pieces — neither lives in the production wiring path.
 	kr, err := openKeyring(f.keyringDir)
 	if err != nil {
 		return fmt.Errorf("open keyring at %s: %w", f.keyringDir, err)
@@ -137,9 +136,8 @@ func runBench(parentCtx context.Context, f runFlags) error {
 	}
 	logger.Info().Str("address", addr.String()).Str("key", f.keyName).Msg("loaded fibre signing key")
 
-	// 2) Build the bridge-bypass Fibre adapter.
 	logger.Info().Str("grpc", f.consensusGRPC).Msg("dialing consensus gRPC")
-	innerFiberClient, fiberClose, err := buildFibreAdapter(ctx, f.consensusGRPC, f.keyName, kr)
+	innerFiberClient, fiberClose, err := buildFibreAdapter(cobraCmd.Context(), f.consensusGRPC, f.keyName, kr)
 	if err != nil {
 		return fmt.Errorf("build fibre adapter: %w", err)
 	}
@@ -149,47 +147,53 @@ func runBench(parentCtx context.Context, f runFlags) error {
 		}
 	}()
 	// Wrap in a latency-recording proxy so the stats printer can show
-	// per-Upload p50/p99 — without this we can't tell whether the
-	// production-vs-DA-settlement gap comes from ev-node's submitter
-	// serialization (one header + one data Upload in flight at a time)
-	// or from actual Fibre Upload latency.
+	// per-Upload p50/p99.
 	fiberClient := newInstrumentedAdapter(innerFiberClient)
 
-	// 3) Build the ev-node file signer (separate key — block signing, not
-	// fibre payments). Created in the home dir if missing.
+	// 2) ev-node block-signing key. Created in the home dir if missing.
 	signerDir := filepath.Join(f.homeDir, "signer")
 	if err := os.MkdirAll(signerDir, 0o750); err != nil {
 		return fmt.Errorf("create signer dir: %w", err)
 	}
 	signerFile := filepath.Join(signerDir, "signer.json")
-	var signer pkgsigner.Signer
-	if _, err := os.Stat(signerFile); os.IsNotExist(err) {
+	if _, statErr := os.Stat(signerFile); os.IsNotExist(statErr) {
 		s, err := file.CreateFileSystemSigner(signerDir, []byte(f.signerPassphrase))
 		if err != nil {
 			return fmt.Errorf("create file signer: %w", err)
 		}
-		signer = s
-	} else {
-		s, err := file.LoadFileSystemSigner(signerDir, []byte(f.signerPassphrase))
-		if err != nil {
-			return fmt.Errorf("load file signer: %w", err)
+		if _, err := s.GetAddress(); err != nil {
+			return fmt.Errorf("signer address: %w", err)
 		}
-		signer = s
 	}
-	signerAddr, err := signer.GetAddress()
+	// cmd.StartNode reads the passphrase from a file path stored in
+	// FlagSignerPassphraseFile; write the in-memory string out so
+	// the canonical signer-loading path works without a separate
+	// passphrase-flag flow.
+	passphraseFile := filepath.Join(f.homeDir, "passphrase.txt")
+	if err := os.WriteFile(passphraseFile, []byte(f.signerPassphrase), 0o600); err != nil {
+		return fmt.Errorf("write passphrase file: %w", err)
+	}
+	if err := cobraCmd.Flags().Set(evconfig.FlagSignerPassphraseFile, passphraseFile); err != nil {
+		return fmt.Errorf("set passphrase flag: %w", err)
+	}
+
+	// Reload the signer to derive the genesis proposer address.
+	loaded, err := file.LoadFileSystemSigner(signerDir, []byte(f.signerPassphrase))
+	if err != nil {
+		return fmt.Errorf("load file signer: %w", err)
+	}
+	signerAddr, err := loaded.GetAddress()
 	if err != nil {
 		return fmt.Errorf("signer address: %w", err)
 	}
 
-	// 4) Genesis. Single proposer = our signer.
+	// 3) Genesis. Single proposer = our signer.
 	gen := genesis.NewGenesis(f.chainID, 1, time.Now().UTC(), signerAddr)
 	if err := gen.Validate(); err != nil {
 		return fmt.Errorf("invalid genesis: %w", err)
 	}
 
-	// 5) ev-node config. P2P listen on a random port; ev-node disables p2p
-	// outbound when fiber is enabled, but the libp2p host is still
-	// constructed, so we still need a port.
+	// 4) ev-node config.
 	cfg := evconfig.DefaultConfig()
 	cfg.RootDir = f.homeDir
 	cfg.DBPath = "data"
@@ -224,79 +228,44 @@ func runBench(parentCtx context.Context, f runFlags) error {
 	cfg.Signer.SignerType = "file"
 	cfg.Signer.SignerPath = signerDir
 
-	// Validate fiber config the way ev-node would.
 	if err := cfg.DA.Fiber.Validate(); err != nil {
 		return fmt.Errorf("fiber config: %w", err)
 	}
 
-	// 6) Datastore for ev-node's internal state. Uses the standard
-	// constructor; the in-memory swap for benchmarking lives in
-	// pkg/store/kv.go::NewDefaultKVStore (see HACK there).
+	// 5) Datastore + node-key + executor + sequencer. The first three
+	// look identical to what testapp/cmd/run.go does; the executor
+	// is the bench-specific in-memory variant (constant state root,
+	// see executor.go for rationale) and the sequencer is solo (no
+	// based-sequencer / no forced inclusion machinery).
 	ds, err := store.NewDefaultKVStore(f.homeDir, cfg.DBPath, "fiber-bench")
 	if err != nil {
 		return fmt.Errorf("open datastore: %w", err)
 	}
-
-	// 7) Executor + sequencer.
-	exec := newInMemExecutor(f.mempoolSize)
-	seq := solo.NewSoloSequencer(logger, []byte(gen.ChainID), exec)
-
-	// 8) DA client wraps our adapter as the FullDAClient ev-node expects.
-	daClient := block.NewFiberDAClient(fiberClient, cfg, logger, gen.DAStartHeight)
-
-	// 9) p2p client (required by NewNode signature; outbound is disabled
-	// internally when fiber is enabled).
-	nodePrivKey, _, err := crypto.GenerateEd25519Key(rand.Reader)
-	if err != nil {
-		return fmt.Errorf("generate node key: %w", err)
-	}
-	nodeKey := &key.NodeKey{PrivKey: nodePrivKey}
-	p2pClient, err := p2p.NewClient(cfg.P2P, nodeKey.PrivKey, datastore.NewMapDatastore(), gen.ChainID, logger, nil)
+	nodeKey, err := loadOrGenNodeKey(filepath.Join(f.homeDir, "node-key.json"))
 	if err != nil {
-		return fmt.Errorf("create p2p client: %w", err)
-	}
-
-	// 10) Build the node.
-	rollnode, err := node.NewNode(
-		cfg,
-		exec,
-		seq,
-		daClient,
-		signer,
-		p2pClient,
-		gen,
-		ds,
-		node.DefaultMetricsProvider(cfg.Instrumentation),
-		logger,
-		node.NodeOptions{},
-	)
-	if err != nil {
-		return fmt.Errorf("create node: %w", err)
+		return fmt.Errorf("node key: %w", err)
 	}
+	exec := newInMemExecutor(f.mempoolSize)
+	seq := solo.NewSoloSequencer(logger, []byte(gen.ChainID), exec)
 
-	// 11) Start the node.
-	nodeErrCh := make(chan error, 1)
-	var nodeWg sync.WaitGroup
-	nodeWg.Add(1)
-	go func() {
-		defer nodeWg.Done()
-		defer func() {
-			if r := recover(); r != nil {
-				nodeErrCh <- fmt.Errorf("node panicked: %v", r)
-			}
-		}()
-		nodeErrCh <- rollnode.Run(ctx)
-	}()
+	// 6) Spawn loader + stats printer BEFORE cmd.StartNode (which
+	// blocks). They run for the lifetime of the bench. cmd.StartNode
+	// owns its own signal-handling goroutine; we send SIGINT to
+	// ourselves when the duration timer expires so it can exit
+	// through its normal shutdown path.
+	bgCtx, bgCancel := signal.NotifyContext(cobraCmd.Context(), os.Interrupt, syscall.SIGTERM)
+	defer bgCancel()
 
-	// 12) Start the load generator.
-	loaderWg := sync.WaitGroup{}
+	var loaderWg sync.WaitGroup
 	loaderWg.Add(1)
 	go func() {
 		defer loaderWg.Done()
-		newLoader(exec, f.workers, f.txSize).run(ctx)
+		newLoader(exec, f.workers, f.txSize).run(bgCtx)
 	}()
 
-	// 13) Stats printer + duration timer.
+	printer := newStatsPrinter(exec, f.prometheusAddr, f.txSize, fiberClient)
+	printer.start(bgCtx, f.statsInterval)
+
 	logger.Info().
 		Dur("duration", f.duration).
 		Int("workers", f.workers).
@@ -306,40 +275,42 @@ func runBench(parentCtx context.Context, f runFlags) error {
 		Str("batching", f.batchingStrategy).
 		Msg("bench started")
 
-	printer := newStatsPrinter(exec, f.prometheusAddr, f.txSize, fiberClient)
-	printer.start(ctx, f.statsInterval)
-
 	if f.duration > 0 {
-		select {
-		case <-time.After(f.duration):
-			logger.Info().Msg("duration elapsed, stopping")
-		case err := <-nodeErrCh:
-			if err != nil && !errors.Is(err, context.Canceled) {
-				logger.Error().Err(err).Msg("node exited unexpectedly")
-				cancel()
-				return err
+		go func() {
+			select {
+			case <-time.After(f.duration):
+				logger.Info().Msg("duration elapsed, sending SIGINT to trigger shutdown")
+				_ = syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+			case <-bgCtx.Done():
 			}
-		case <-ctx.Done():
-		}
-	} else {
-		select {
-		case err := <-nodeErrCh:
-			if err != nil && !errors.Is(err, context.Canceled) {
-				logger.Error().Err(err).Msg("node exited unexpectedly")
-				cancel()
-				return err
-			}
-		case <-ctx.Done():
-		}
+		}()
 	}
 
-	cancel()
+	// 7) The actual node — let cmd.StartNode do all the wiring
+	// (signer load, DA client, p2p, node.NewNode, run loop with
+	// shutdown). Same call testapp/evm/grpc apps make.
+	startErr := rollcmd.StartNode(
+		logger, cobraCmd, exec, seq, nodeKey, ds, cfg, gen,
+		node.NodeOptions{}, fiberClient,
+	)
+
+	bgCancel()
 	loaderWg.Wait()
-	nodeWg.Wait()
 	printer.printFinalSummary()
+
+	if startErr != nil && !errors.Is(startErr, context.Canceled) {
+		return startErr
+	}
 	return nil
 }
 
+// loadOrGenNodeKey is a tiny shim around pkg/p2p/key.LoadOrGenNodeKey,
+// kept as a package-local helper so the bench can stay decoupled from
+// changes to that helper's import path. The behaviour is identical.
+func loadOrGenNodeKey(path string) (*key.NodeKey, error) {
+	return key.LoadOrGenNodeKey(filepath.Dir(path))
+}
+
 func setupLogger(level string) zerolog.Logger {
 	lvl, err := zerolog.ParseLevel(level)
 	if err != nil {

From a1b2c9dc0e2ed69bd48a562210dde1cb98f04f01 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Mon, 27 Apr 2026 23:44:25 +0100
Subject: [PATCH 16/18] fix(apps): unblock testapp/evm/grpc compile by passing
 nil fiberClient
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fiberClient parameter was added to pkg/cmd.StartNode in commit
87573ae0 (on this branch's parent julien/fiber) but the three apps
that call it were never updated. Branch HEAD therefore had three
broken compiles — anyone trying to build a testapp / evm / grpc
binary on this branch hit:

  cmd/run.go: not enough arguments in call to cmd.StartNode

Pass nil for the new parameter in each app and document why with a
TODO pointing at tools/celestia-node-fiber. None of the three apps
currently need fiber DA support — they pre-date this branch's fiber
work — and the right way to add it is to construct a
*cnfiber.Adapter from nodeConfig.DA.Fiber and pass it through, the
same pattern fiber-bench's run.go uses (see commit 57fa8596). That
work is out of scope for this commit; this is just the "stop the
bleed" change so the branch builds cleanly.

Three identical comment blocks across the three apps so anyone
landing in any one of them sees the same context.
---
 apps/evm/cmd/run.go     | 5 ++++-
 apps/grpc/cmd/run.go    | 6 ++++--
 apps/testapp/cmd/run.go | 7 ++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/apps/evm/cmd/run.go b/apps/evm/cmd/run.go
index a88e548d21..7829c2e1b0 100644
--- a/apps/evm/cmd/run.go
+++ b/apps/evm/cmd/run.go
@@ -131,7 +131,10 @@ var RunCmd = &cobra.Command{
 			}()
 		}
 
-		return rollcmd.StartNode(logger, cmd, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{})
+		// nil fiberClient: the EVM app doesn't wire Fibre DA. See
+		// tools/celestia-node-fiber for the adapter; testapp/cmd/run.go
+		// has the same TODO note for matching context.
+		return rollcmd.StartNode(logger, cmd, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{}, nil)
 	},
 }
 
diff --git a/apps/grpc/cmd/run.go b/apps/grpc/cmd/run.go
index 22ca71f587..586a89eddc 100644
--- a/apps/grpc/cmd/run.go
+++ b/apps/grpc/cmd/run.go
@@ -86,8 +86,10 @@ The execution client must implement the Evolve execution gRPC interface.`,
 			return err
 		}
 
-		// Start the node
-		return rollcmd.StartNode(logger, cmd, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{})
+		// Start the node. nil fiberClient: the gRPC app doesn't wire
+		// Fibre DA. See tools/celestia-node-fiber for the adapter;
+		// testapp/cmd/run.go has the same TODO note for context.
+		return rollcmd.StartNode(logger, cmd, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{}, nil)
 	},
 }
 
diff --git a/apps/testapp/cmd/run.go b/apps/testapp/cmd/run.go
index 75e5a49019..05698f1e60 100644
--- a/apps/testapp/cmd/run.go
+++ b/apps/testapp/cmd/run.go
@@ -97,7 +97,12 @@ var RunCmd = &cobra.Command{
 			return err
 		}
 
-		return cmd.StartNode(logger, command, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{})
+		// nil fiberClient: testapp doesn't yet wire Fibre DA. To enable
+		// fiber support here, build a *cnfiber.Adapter from
+		// nodeConfig.DA.Fiber and pass it as the last argument. The
+		// adapter wiring lives in tools/celestia-node-fiber; see the
+		// fiber-bench tool's run.go for a working caller.
+		return cmd.StartNode(logger, command, executor, sequencer, nodeKey, datastore, nodeConfig, genesis, node.NodeOptions{}, nil)
 	},
 }
 

From 5946b089d83c79bf25752e60ebb48e7fe1576a3e Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Tue, 28 Apr 2026 00:05:48 +0100
Subject: [PATCH 17/18] refactor(fiber-bench): reuse canonical config flags via
 rollconf.AddFlags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bench's runFlags struct had grown ~22 cobra flags, ~15 of which
were straight aliases for things rollconf.AddFlags already registers
(--block-time → --evnode.node.block_time, --batching-strategy →
--evnode.da.batching_strategy, --consensus-grpc →
--evnode.da.fiber.consensus_address, etc.). Each alias was its own
maintenance liability — defaults drifted from the canonical defaults,
new ev-node config fields didn't surface here without manual sync, and
operators learned a bench-specific flag dialect that didn't transfer
to testapp/evm/grpc.

Drop the aliases. Run command now calls:

  rollconf.AddGlobalFlags(root, AppName + "/node")  // --home, --evnode.log.*
  rollconf.AddFlags(runCmd)                         // --evnode.node.*, etc.
  rollcmd.ParseConfig(cmd) → rollcmd.SetupLogger(cfg.Log)

…then post-parse forces what the bench requires (Aggregator, Fiber.Enabled,
P2P.ListenAddress, Signer.SignerType, Pprof off, Prometheus on, BridgeAddress
placeholder for FiberDAConfig.Validate) and overrides canonical defaults
that are wrong for a throughput bench (DA block time → 1s, batching →
immediate, scrape interval → 100ms, namespaces → fb-bench-{h,d}). Operator
flags always win — overrides only fire when cobra reports the flag wasn't
Changed.

Bench-local flags that survived: --duration, --workers, --tx-size,
--mempool-size, --stats-interval, --keep-home, --keyring-dir (cosmos
keyring; not the ev-node signer), --signer-passphrase (still writes a
temp file consumed by --evnode.signer.passphrase_file; commit 2 will
replace this with a real init flow).

Default home stays at ~/.fiber-bench/node (passed as
\"fiber-bench/node\" to AddGlobalFlags) so the os.RemoveAll(cfg.RootDir)
on --keep-home=false runs cannot clobber the cosmos keyring at
~/.fiber-bench/keyring. Updated run-bench.sh and README to use the
canonical --evnode.* flag names.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../cmd/fiber-bench/README.md                 |  36 +-
 .../cmd/fiber-bench/main.go                   |  18 +-
 .../cmd/fiber-bench/run-bench.sh              |  10 +-
 .../cmd/fiber-bench/run.go                    | 332 +++++++++---------
 .../cmd/fiber-bench/util.go                   |  15 +-
 5 files changed, 209 insertions(+), 202 deletions(-)

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/README.md b/tools/celestia-node-fiber/cmd/fiber-bench/README.md
index 463014e80d..d99419f6e8 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/README.md
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/README.md
@@ -70,16 +70,21 @@ go build -tags fibre -o bin/fiber-bench ./cmd/fiber-bench/
 
 # 6. Run the bench
 ./bin/fiber-bench run \
-    --consensus-grpc 139.59.229.101:9091 \
-    --chain-id <chain-id-the-server-actually-reports> \
-    --key-name bench \
+    --evnode.da.fiber.consensus_address 139.59.229.101:9091 \
+    --evnode.da.fiber.consensus_chain_id <chain-id-the-server-actually-reports> \
+    --evnode.da.fiber.key_name bench \
     --duration 2m \
     --workers 32 \
     --tx-size 200 \
-    --block-time 1s \
-    --batching-strategy immediate
+    --evnode.node.block_time 1s \
+    --evnode.da.batching_strategy immediate
 ```
 
+The bench reuses canonical ev-node flags (`--evnode.*`) registered by
+`pkg/config.AddFlags` rather than defining bench-specific aliases. See
+`fiber-bench run --help` for the full list — anything you'd configure on
+testapp/evm/grpc apps works here too.
+
 Or use the convenience wrapper:
 
 ```sh
@@ -130,17 +135,28 @@ Txs executed:           XXX (avg N tx/s, peak N tx/s, T tx/blk)
 
 ## Knobs worth flipping while debugging
 
+Bench-local flags:
+
 | Flag                    | Default      | Why                                               |
 |-------------------------|--------------|---------------------------------------------------|
-| `--block-time`          | `1s`         | Drop to e.g. `100ms` to expose per-block overhead |
-| `--batching-strategy`   | `immediate`  | Try `time` / `size` / `adaptive`                  |
-| `--reaper-interval`     | `100ms`      | How often the mempool drain runs                  |
-| `--max-pending`         | `0`          | Cap pending DA blobs to test backpressure         |
 | `--workers`             | `32`         | Tx-injection concurrency                          |
 | `--tx-size`             | `200`        | Bytes per tx (matches user-reported regression)   |
 | `--mempool-size`        | `1_000_000`  | Bench's bounded backpressure boundary             |
 | `--keep-home`           | `false`      | Resume from prior state (defaults to wipe)        |
-| `--log-level`           | `info`       | `debug` to see ev-node block production logs      |
+| `--duration`            | `1m`         | How long to run (0 = until SIGINT)                |
+| `--stats-interval`      | `1s`         | Stats line cadence                                |
+| `--keyring-dir`         | `~/.fiber-bench/keyring` | Cosmos keyring (Fibre payment promises) |
+| `--signer-passphrase`   | `fiber-bench-passphrase` | ev-node block-signing key passphrase  |
+
+Canonical ev-node flags worth flipping (full list: `run --help`):
+
+| Flag                                | Bench default | Why                                  |
+|-------------------------------------|---------------|--------------------------------------|
+| `--evnode.node.block_time`          | `1s`          | Drop to `100ms` to expose per-block overhead |
+| `--evnode.da.batching_strategy`     | `immediate`   | Try `time` / `size` / `adaptive`              |
+| `--evnode.node.scrape_interval`     | `100ms`       | How often the mempool drain runs              |
+| `--evnode.node.max_pending_headers_and_data` | `0`  | Cap pending DA blobs to test backpressure     |
+| `--evnode.log.level`                | `info`        | `debug` to see ev-node block production logs  |
 
 ## ev-node Prometheus
 
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/main.go b/tools/celestia-node-fiber/cmd/fiber-bench/main.go
index 0678cb8e42..1671e4a784 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/main.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/main.go
@@ -23,14 +23,30 @@ import (
 	// bech32 prefix to "celestia" — must run before any keyring operation
 	// that prints addresses.
 	_ "github.com/celestiaorg/celestia-app/v9/app/params"
+
+	rollconf "github.com/evstack/ev-node/pkg/config"
+)
+
+// AppName names the binary. The home dir intentionally lives one level
+// deeper at ~/.fiber-bench/node so the bench's --keep-home=false default
+// (which os.RemoveAll's cfg.RootDir) cannot wipe the cosmos keyring at
+// ~/.fiber-bench/keyring.
+const (
+	AppName            = "fiber-bench"
+	defaultHomeAppName = AppName + "/node"
 )
 
 func main() {
 	root := &cobra.Command{
-		Use:   "fiber-bench",
+		Use:   AppName,
 		Short: "Single-sequencer ev-node throughput bench against a remote Fibre network",
 	}
 
+	// Register --home, --evnode.log.level, --evnode.log.format,
+	// --evnode.log.trace on the root so every subcommand inherits them
+	// (matches apps/testapp).
+	rollconf.AddGlobalFlags(root, defaultHomeAppName)
+
 	root.AddCommand(
 		keysCmd(),
 		escrowCmd(),
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh b/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
index 6f8ecb80ef..43ea31d0be 100755
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run-bench.sh
@@ -61,14 +61,14 @@ echo "==> escrow:"
 
 echo "==> starting bench: duration=$DURATION workers=$WORKERS tx_size=$TX_SIZE block_time=$BLOCK_TIME batching=$BATCHING"
 exec "$BIN" run \
-    --consensus-grpc "$CONSENSUS_GRPC" \
-    --chain-id "$CHAIN_ID" \
+    --evnode.da.fiber.consensus_address "$CONSENSUS_GRPC" \
+    --evnode.da.fiber.consensus_chain_id "$CHAIN_ID" \
+    --evnode.da.fiber.key_name "$KEY_NAME" \
     --keyring-dir "$KEYRING_DIR" \
-    --key-name "$KEY_NAME" \
     --home "$HOME_DIR" \
     --duration "$DURATION" \
     --workers "$WORKERS" \
     --tx-size "$TX_SIZE" \
-    --block-time "$BLOCK_TIME" \
-    --batching-strategy "$BATCHING" \
+    --evnode.node.block_time "$BLOCK_TIME" \
+    --evnode.da.batching_strategy "$BATCHING" \
     ${FIBER_BENCH_ARGS:-}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/run.go b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
index 929df3ed5d..0666790c26 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/run.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/run.go
@@ -11,12 +11,11 @@ import (
 	"syscall"
 	"time"
 
-	"github.com/rs/zerolog"
 	"github.com/spf13/cobra"
 
-	rollcmd "github.com/evstack/ev-node/pkg/cmd"
-	evconfig "github.com/evstack/ev-node/pkg/config"
 	"github.com/evstack/ev-node/node"
+	rollcmd "github.com/evstack/ev-node/pkg/cmd"
+	rollconf "github.com/evstack/ev-node/pkg/config"
 	"github.com/evstack/ev-node/pkg/genesis"
 	"github.com/evstack/ev-node/pkg/p2p/key"
 	"github.com/evstack/ev-node/pkg/sequencers/solo"
@@ -24,120 +23,105 @@ import (
 	"github.com/evstack/ev-node/pkg/store"
 )
 
-type runFlags struct {
-	// Fibre
-	consensusGRPC string
-	chainID       string
-	keyringDir    string
-	keyName       string
-	headerNS      string
-	dataNS        string
-
-	// ev-node tuning
-	blockTime        time.Duration
-	daBlockTime      time.Duration
-	batchingStrategy string
-	scrapeInterval   time.Duration
-	maxPending       uint64
-	signerPassphrase string
-
-	// Bench
-	homeDir       string
-	keepHome      bool
-	duration      time.Duration
-	workers       int
-	txSize        int
-	mempoolSize   int
-	statsInterval time.Duration
-
-	// Observability
-	prometheus     bool
-	prometheusAddr string
-	logLevel       string
-}
+// Bench-local flag names. The rest come from rollconf.AddFlags
+// (--evnode.da.fiber.consensus_address, --evnode.da.batching_strategy, …)
+// and rollconf.AddGlobalFlags (--home, --log.level, …).
+const (
+	flagKeyringDir       = "keyring-dir"
+	flagKeepHome         = "keep-home"
+	flagDuration         = "duration"
+	flagWorkers          = "workers"
+	flagTxSize           = "tx-size"
+	flagMempoolSize      = "mempool-size"
+	flagStatsInterval    = "stats-interval"
+	flagSignerPassphrase = "signer-passphrase"
+)
 
 func runCmd() *cobra.Command {
-	f := runFlags{}
 	cmd := &cobra.Command{
 		Use:   "run",
 		Short: "Run the bench: start a single-sequencer ev-node against a Fibre network and pump load",
-		RunE: func(cmd *cobra.Command, args []string) error {
-			return runBench(cmd, f)
-		},
+		RunE:  runBench,
 	}
 
-	flags := cmd.Flags()
-
-	flags.StringVar(&f.consensusGRPC, "consensus-grpc", "", "celestia-app gRPC address (host:port). Required.")
-	flags.StringVar(&f.chainID, "chain-id", "", "celestia-app consensus chain ID. Required.")
-	flags.StringVar(&f.keyringDir, "keyring-dir", defaultKeyringDir(), "directory holding the bench cosmos keyring (test backend)")
-	flags.StringVar(&f.keyName, "key-name", "default", "name of the key in the keyring used to sign Fibre payment promises")
-	flags.StringVar(&f.headerNS, "header-namespace", "fb-bench-h", "namespace string for ev-node block headers (10 bytes after hashing)")
-	flags.StringVar(&f.dataNS, "data-namespace", "fb-bench-d", "namespace string for ev-node block data")
+	// Canonical ev-node flags: --evnode.node.*, --evnode.da.*,
+	// --evnode.da.fiber.*, --evnode.signer.*, --evnode.instrumentation.*,
+	// --evnode.p2p.*, --evnode.signer.passphrase_file, etc. The bench
+	// applies opinionated defaults post-parse for the ones a thoughtful
+	// operator would otherwise have to flip every run (see runBench).
+	rollconf.AddFlags(cmd)
 
-	flags.DurationVar(&f.blockTime, "block-time", time.Second, "ev-node block production interval")
-	flags.DurationVar(&f.daBlockTime, "da-block-time", time.Second, "DA layer block time hint (controls submitter cadence)")
-	flags.StringVar(&f.batchingStrategy, "batching-strategy", "immediate", "ev-node DA batching strategy: immediate|size|time|adaptive")
-	flags.DurationVar(&f.scrapeInterval, "reaper-interval", 100*time.Millisecond, "how often the reaper drains the mempool")
-	flags.Uint64Var(&f.maxPending, "max-pending", 0, "max pending headers/data before block production pauses (0 = unlimited)")
-	flags.StringVar(&f.signerPassphrase, "signer-passphrase", "fiber-bench-passphrase", "passphrase for the ev-node file signer (block-signing key, NOT the cosmos one)")
-
-	flags.StringVar(&f.homeDir, "home", defaultNodeHome(), "ev-node home directory (signer, store)")
-	flags.BoolVar(&f.keepHome, "keep-home", false, "do not wipe the ev-node home before starting (resumes prior state)")
-	flags.DurationVar(&f.duration, "duration", 60*time.Second, "how long to run the bench before stopping (0 = until SIGINT)")
-	flags.IntVar(&f.workers, "workers", 32, "number of concurrent tx-injection goroutines")
-	flags.IntVar(&f.txSize, "tx-size", 200, "size of each generated tx in bytes")
-	flags.IntVar(&f.mempoolSize, "mempool-size", 1_000_000, "size of the in-mem executor's mempool channel (backpressure boundary)")
-	flags.DurationVar(&f.statsInterval, "stats-interval", time.Second, "how often to print a stats line")
+	flags := cmd.Flags()
+	flags.String(flagKeyringDir, defaultKeyringDir(), "directory holding the bench cosmos keyring (test backend) used to sign Fibre payment promises")
+	flags.Bool(flagKeepHome, false, "do not wipe the ev-node home before starting (resumes prior state)")
+	flags.Duration(flagDuration, 60*time.Second, "how long to run the bench before stopping (0 = until SIGINT)")
+	flags.Int(flagWorkers, 32, "number of concurrent tx-injection goroutines")
+	flags.Int(flagTxSize, 200, "size of each generated tx in bytes")
+	flags.Int(flagMempoolSize, 1_000_000, "size of the in-mem executor's mempool channel (backpressure boundary)")
+	flags.Duration(flagStatsInterval, time.Second, "how often to print a stats line")
+	flags.String(flagSignerPassphrase, "fiber-bench-passphrase", "passphrase for the ev-node file signer (block-signing key, NOT the cosmos one). Written to a temp file consumed by --evnode.signer.passphrase_file.")
+
+	// Fibre consensus address/chain ID don't have empty defaults
+	// (DefaultConfig points at 127.0.0.1:9090 / mocha-4), but those
+	// values are sentinels — running the bench against them is never
+	// what the operator wants. Force them through.
+	_ = cobra.MarkFlagRequired(flags, rollconf.FlagDAFiberConsensusAddress)
+	_ = cobra.MarkFlagRequired(flags, rollconf.FlagDAFiberConsensusChainID)
 
-	flags.BoolVar(&f.prometheus, "prometheus", true, "enable ev-node's Prometheus metrics endpoint")
-	flags.StringVar(&f.prometheusAddr, "prometheus-addr", "127.0.0.1:26660", "address for the ev-node Prometheus endpoint")
-	flags.StringVar(&f.logLevel, "log-level", "info", "ev-node log level (debug|info|warn|error)")
+	return cmd
+}
 
-	// FlagSignerPassphraseFile is what cmd.StartNode reads to load the
-	// file-backed signer's passphrase. We define it on the bench's run
-	// command so cmd.StartNode finds it via cmd.Flags().GetString;
-	// runBench writes the operator's --signer-passphrase to a temp
-	// file and sets this flag's value before delegating.
-	flags.String(evconfig.FlagSignerPassphraseFile, "", "(internal) populated by --signer-passphrase before cmd.StartNode runs")
-	_ = cmd.Flags().MarkHidden(evconfig.FlagSignerPassphraseFile)
+func runBench(cobraCmd *cobra.Command, _ []string) error {
+	cfg, err := rollcmd.ParseConfig(cobraCmd)
+	if err != nil {
+		return err
+	}
+	applyBenchDefaults(cobraCmd, &cfg)
 
-	_ = cobra.MarkFlagRequired(flags, "consensus-grpc")
-	_ = cobra.MarkFlagRequired(flags, "chain-id")
+	// Re-validate after the bench's overrides — ParseConfig already ran
+	// once on parse, but we mutated Aggregator/Fiber/etc. afterwards.
+	if err := cfg.Validate(); err != nil {
+		return fmt.Errorf("config invalid after bench overrides: %w", err)
+	}
 
-	return cmd
-}
+	logger := rollcmd.SetupLogger(cfg.Log)
 
-func runBench(cobraCmd *cobra.Command, f runFlags) error {
-	logger := setupLogger(f.logLevel)
+	keyringDir, _ := cobraCmd.Flags().GetString(flagKeyringDir)
+	keepHome, _ := cobraCmd.Flags().GetBool(flagKeepHome)
+	duration, _ := cobraCmd.Flags().GetDuration(flagDuration)
+	workers, _ := cobraCmd.Flags().GetInt(flagWorkers)
+	txSize, _ := cobraCmd.Flags().GetInt(flagTxSize)
+	mempoolSize, _ := cobraCmd.Flags().GetInt(flagMempoolSize)
+	statsInterval, _ := cobraCmd.Flags().GetDuration(flagStatsInterval)
+	signerPassphrase, _ := cobraCmd.Flags().GetString(flagSignerPassphrase)
 
-	if !f.keepHome {
-		_ = os.RemoveAll(f.homeDir)
+	if !keepHome {
+		_ = os.RemoveAll(cfg.RootDir)
 	}
-	if err := os.MkdirAll(f.homeDir, 0o755); err != nil {
-		return fmt.Errorf("create home %s: %w", f.homeDir, err)
+	if err := os.MkdirAll(cfg.RootDir, 0o755); err != nil {
+		return fmt.Errorf("create home %s: %w", cfg.RootDir, err)
 	}
 
-	// 1) Open the cosmos keyring + build the bridge-bypass Fibre
-	// adapter. These are the two genuinely fiber-bench-specific
-	// pieces — neither lives in the production wiring path.
-	kr, err := openKeyring(f.keyringDir)
+	// 1) Cosmos keyring + bridge-bypass Fibre adapter — the two genuinely
+	// fiber-bench-specific pieces. Neither lives in the production wiring
+	// path.
+	kr, err := openKeyring(keyringDir)
 	if err != nil {
-		return fmt.Errorf("open keyring at %s: %w", f.keyringDir, err)
+		return fmt.Errorf("open keyring at %s: %w", keyringDir, err)
 	}
-	rec, err := kr.Key(f.keyName)
+	rec, err := kr.Key(cfg.DA.Fiber.KeyName)
 	if err != nil {
 		return fmt.Errorf("key %q not found in keyring %s — run `fiber-bench keys add %s` first: %w",
-			f.keyName, f.keyringDir, f.keyName, err)
+			cfg.DA.Fiber.KeyName, keyringDir, cfg.DA.Fiber.KeyName, err)
 	}
 	addr, err := rec.GetAddress()
 	if err != nil {
 		return fmt.Errorf("derive key address: %w", err)
 	}
-	logger.Info().Str("address", addr.String()).Str("key", f.keyName).Msg("loaded fibre signing key")
+	logger.Info().Str("address", addr.String()).Str("key", cfg.DA.Fiber.KeyName).Msg("loaded fibre signing key")
 
-	logger.Info().Str("grpc", f.consensusGRPC).Msg("dialing consensus gRPC")
-	innerFiberClient, fiberClose, err := buildFibreAdapter(cobraCmd.Context(), f.consensusGRPC, f.keyName, kr)
+	logger.Info().Str("grpc", cfg.DA.Fiber.ConsensusAddress).Msg("dialing consensus gRPC")
+	innerFiberClient, fiberClose, err := buildFibreAdapter(cobraCmd.Context(), cfg.DA.Fiber.ConsensusAddress, cfg.DA.Fiber.KeyName, kr)
 	if err != nil {
 		return fmt.Errorf("build fibre adapter: %w", err)
 	}
@@ -150,14 +134,26 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 	// per-Upload p50/p99.
 	fiberClient := newInstrumentedAdapter(innerFiberClient)
 
-	// 2) ev-node block-signing key. Created in the home dir if missing.
-	signerDir := filepath.Join(f.homeDir, "signer")
+	// 2) ev-node block-signing key. Created in cfg.Signer.SignerPath if
+	// missing. cmd.StartNode reads the passphrase from the path stored
+	// in --evnode.signer.passphrase_file; we write a temp file from
+	// --signer-passphrase and inject the flag value so the canonical
+	// signer-loading path works without us asking the operator to manage
+	// a passphrase file by hand.
+	signerDir := cfg.Signer.SignerPath
+	if signerDir == "" {
+		signerDir = filepath.Join(cfg.RootDir, "config")
+	}
+	if !filepath.IsAbs(signerDir) {
+		signerDir = filepath.Join(cfg.RootDir, signerDir)
+	}
+	cfg.Signer.SignerPath = signerDir
 	if err := os.MkdirAll(signerDir, 0o750); err != nil {
 		return fmt.Errorf("create signer dir: %w", err)
 	}
 	signerFile := filepath.Join(signerDir, "signer.json")
 	if _, statErr := os.Stat(signerFile); os.IsNotExist(statErr) {
-		s, err := file.CreateFileSystemSigner(signerDir, []byte(f.signerPassphrase))
+		s, err := file.CreateFileSystemSigner(signerDir, []byte(signerPassphrase))
 		if err != nil {
 			return fmt.Errorf("create file signer: %w", err)
 		}
@@ -165,20 +161,16 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 			return fmt.Errorf("signer address: %w", err)
 		}
 	}
-	// cmd.StartNode reads the passphrase from a file path stored in
-	// FlagSignerPassphraseFile; write the in-memory string out so
-	// the canonical signer-loading path works without a separate
-	// passphrase-flag flow.
-	passphraseFile := filepath.Join(f.homeDir, "passphrase.txt")
-	if err := os.WriteFile(passphraseFile, []byte(f.signerPassphrase), 0o600); err != nil {
+	passphraseFile := filepath.Join(cfg.RootDir, "passphrase.txt")
+	if err := os.WriteFile(passphraseFile, []byte(signerPassphrase), 0o600); err != nil {
 		return fmt.Errorf("write passphrase file: %w", err)
 	}
-	if err := cobraCmd.Flags().Set(evconfig.FlagSignerPassphraseFile, passphraseFile); err != nil {
+	if err := cobraCmd.Flags().Set(rollconf.FlagSignerPassphraseFile, passphraseFile); err != nil {
 		return fmt.Errorf("set passphrase flag: %w", err)
 	}
 
 	// Reload the signer to derive the genesis proposer address.
-	loaded, err := file.LoadFileSystemSigner(signerDir, []byte(f.signerPassphrase))
+	loaded, err := file.LoadFileSystemSigner(signerDir, []byte(signerPassphrase))
 	if err != nil {
 		return fmt.Errorf("load file signer: %w", err)
 	}
@@ -188,67 +180,30 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 	}
 
 	// 3) Genesis. Single proposer = our signer.
-	gen := genesis.NewGenesis(f.chainID, 1, time.Now().UTC(), signerAddr)
+	gen := genesis.NewGenesis(cfg.DA.Fiber.ConsensusChainID, 1, time.Now().UTC(), signerAddr)
 	if err := gen.Validate(); err != nil {
 		return fmt.Errorf("invalid genesis: %w", err)
 	}
 
-	// 4) ev-node config.
-	cfg := evconfig.DefaultConfig()
-	cfg.RootDir = f.homeDir
-	cfg.DBPath = "data"
-	cfg.Node.Aggregator = true
-	cfg.Node.BlockTime = evconfig.DurationWrapper{Duration: f.blockTime}
-	cfg.Node.LazyMode = false
-	cfg.Node.MaxPendingHeadersAndData = f.maxPending
-	cfg.Node.ScrapeInterval = evconfig.DurationWrapper{Duration: f.scrapeInterval}
-
-	cfg.DA.BlockTime = evconfig.DurationWrapper{Duration: f.daBlockTime}
-	cfg.DA.Namespace = f.headerNS
-	cfg.DA.DataNamespace = f.dataNS
-	cfg.DA.BatchingStrategy = f.batchingStrategy
-	cfg.DA.RequestTimeout = evconfig.DurationWrapper{Duration: 60 * time.Second}
-	cfg.DA.Fiber.Enabled = true
-	cfg.DA.Fiber.ConsensusAddress = f.consensusGRPC
-	cfg.DA.Fiber.ConsensusChainID = f.chainID
-	// BridgeAddress is required by config validation when fiber enabled,
-	// but we never use it. Set a syntactically-valid placeholder.
-	cfg.DA.Fiber.BridgeAddress = "ws://127.0.0.1:0"
-	cfg.DA.Fiber.KeyName = f.keyName
-
-	cfg.P2P.ListenAddress = "/ip4/127.0.0.1/tcp/0"
-	cfg.P2P.DisableConnectionGater = true
-
-	cfg.Instrumentation.Prometheus = f.prometheus
-	cfg.Instrumentation.PrometheusListenAddr = f.prometheusAddr
-	cfg.Instrumentation.Pprof = false
-
-	cfg.RPC.Address = "127.0.0.1:0"
-	cfg.Log.Level = f.logLevel
-	cfg.Signer.SignerType = "file"
-	cfg.Signer.SignerPath = signerDir
-
-	if err := cfg.DA.Fiber.Validate(); err != nil {
-		return fmt.Errorf("fiber config: %w", err)
-	}
-
-	// 5) Datastore + node-key + executor + sequencer. The first three
+	// 4) Datastore + node-key + executor + sequencer. The first three
 	// look identical to what testapp/cmd/run.go does; the executor
 	// is the bench-specific in-memory variant (constant state root,
 	// see executor.go for rationale) and the sequencer is solo (no
 	// based-sequencer / no forced inclusion machinery).
-	ds, err := store.NewDefaultKVStore(f.homeDir, cfg.DBPath, "fiber-bench")
+	ds, err := store.NewDefaultKVStore(cfg.RootDir, cfg.DBPath, "fiber-bench")
 	if err != nil {
 		return fmt.Errorf("open datastore: %w", err)
 	}
-	nodeKey, err := loadOrGenNodeKey(filepath.Join(f.homeDir, "node-key.json"))
+	// Match canonical layout: node_key.json under <home>/config/, the
+	// same dir testapp/cmd/run.go reads it from.
+	nodeKey, err := key.LoadOrGenNodeKey(filepath.Dir(cfg.ConfigPath()))
 	if err != nil {
 		return fmt.Errorf("node key: %w", err)
 	}
-	exec := newInMemExecutor(f.mempoolSize)
+	exec := newInMemExecutor(mempoolSize)
 	seq := solo.NewSoloSequencer(logger, []byte(gen.ChainID), exec)
 
-	// 6) Spawn loader + stats printer BEFORE cmd.StartNode (which
+	// 5) Spawn loader + stats printer BEFORE cmd.StartNode (which
 	// blocks). They run for the lifetime of the bench. cmd.StartNode
 	// owns its own signal-handling goroutine; we send SIGINT to
 	// ourselves when the duration timer expires so it can exit
@@ -260,25 +215,25 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 	loaderWg.Add(1)
 	go func() {
 		defer loaderWg.Done()
-		newLoader(exec, f.workers, f.txSize).run(bgCtx)
+		newLoader(exec, workers, txSize).run(bgCtx)
 	}()
 
-	printer := newStatsPrinter(exec, f.prometheusAddr, f.txSize, fiberClient)
-	printer.start(bgCtx, f.statsInterval)
+	printer := newStatsPrinter(exec, cfg.Instrumentation.PrometheusListenAddr, txSize, fiberClient)
+	printer.start(bgCtx, statsInterval)
 
 	logger.Info().
-		Dur("duration", f.duration).
-		Int("workers", f.workers).
-		Int("tx_size", f.txSize).
-		Int("mempool", f.mempoolSize).
-		Dur("block_time", f.blockTime).
-		Str("batching", f.batchingStrategy).
+		Dur("duration", duration).
+		Int("workers", workers).
+		Int("tx_size", txSize).
+		Int("mempool", mempoolSize).
+		Dur("block_time", cfg.Node.BlockTime.Duration).
+		Str("batching", cfg.DA.BatchingStrategy).
 		Msg("bench started")
 
-	if f.duration > 0 {
+	if duration > 0 {
 		go func() {
 			select {
-			case <-time.After(f.duration):
+			case <-time.After(duration):
 				logger.Info().Msg("duration elapsed, sending SIGINT to trigger shutdown")
 				_ = syscall.Kill(syscall.Getpid(), syscall.SIGINT)
 			case <-bgCtx.Done():
@@ -286,9 +241,9 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 		}()
 	}
 
-	// 7) The actual node — let cmd.StartNode do all the wiring
-	// (signer load, DA client, p2p, node.NewNode, run loop with
-	// shutdown). Same call testapp/evm/grpc apps make.
+	// 6) The actual node — let cmd.StartNode do all the wiring (signer
+	// load, DA client, p2p, node.NewNode, run loop with shutdown). Same
+	// call testapp/evm/grpc apps make.
 	startErr := rollcmd.StartNode(
 		logger, cobraCmd, exec, seq, nodeKey, ds, cfg, gen,
 		node.NodeOptions{}, fiberClient,
@@ -304,19 +259,48 @@ func runBench(cobraCmd *cobra.Command, f runFlags) error {
 	return nil
 }
 
-// loadOrGenNodeKey is a tiny shim around pkg/p2p/key.LoadOrGenNodeKey,
-// kept as a package-local helper so the bench can stay decoupled from
-// changes to that helper's import path. The behaviour is identical.
-func loadOrGenNodeKey(path string) (*key.NodeKey, error) {
-	return key.LoadOrGenNodeKey(filepath.Dir(path))
-}
-
-func setupLogger(level string) zerolog.Logger {
-	lvl, err := zerolog.ParseLevel(level)
-	if err != nil {
-		lvl = zerolog.InfoLevel
+// applyBenchDefaults overrides config fields that the bench needs forced
+// (Aggregator, Fiber.Enabled) and the canonical defaults that are wrong
+// for a throughput bench (DA block time, batching strategy, scrape
+// interval, namespaces). Anything the operator passed on the command line
+// is left untouched — we only override where the flag value still equals
+// its canonical default.
+func applyBenchDefaults(cmd *cobra.Command, cfg *rollconf.Config) {
+	// Forced for the bench: aggregator-only, Fibre DA, no P2P.
+	cfg.Node.Aggregator = true
+	cfg.Node.BasedSequencer = false
+	cfg.DA.Fiber.Enabled = true
+	if cfg.DA.Fiber.BridgeAddress == "" {
+		// FiberDAConfig.Validate requires a ws:// or wss:// address.
+		// Bench never dials it (see fibre.go: noBridgeBlob).
+		cfg.DA.Fiber.BridgeAddress = "ws://127.0.0.1:0"
+	}
+	cfg.P2P.ListenAddress = "/ip4/127.0.0.1/tcp/0"
+	cfg.P2P.DisableConnectionGater = true
+	cfg.RPC.Address = "127.0.0.1:0"
+	cfg.Signer.SignerType = "file"
+	cfg.Instrumentation.Pprof = false
+	// The stats printer scrapes /metrics every tick — keep Prometheus on
+	// even if the operator didn't pass --evnode.instrumentation.prometheus.
+	cfg.Instrumentation.Prometheus = true
+
+	// Operator-overridable bench defaults — applied only if the canonical
+	// flag wasn't passed on the command line.
+	overrideIfUnchanged := func(name string, set func()) {
+		if !cmd.Flags().Changed(name) {
+			set()
+		}
 	}
-	zerolog.SetGlobalLevel(lvl)
-	return zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}).
-		With().Timestamp().Str("component", "fiber-bench").Logger()
+	overrideIfUnchanged(rollconf.FlagDABlockTime, func() {
+		cfg.DA.BlockTime = rollconf.DurationWrapper{Duration: time.Second}
+	})
+	overrideIfUnchanged(rollconf.FlagDABatchingStrategy, func() { cfg.DA.BatchingStrategy = "immediate" })
+	overrideIfUnchanged(rollconf.FlagScrapeInterval, func() {
+		cfg.Node.ScrapeInterval = rollconf.DurationWrapper{Duration: 100 * time.Millisecond}
+	})
+	overrideIfUnchanged(rollconf.FlagDARequestTimeout, func() {
+		cfg.DA.RequestTimeout = rollconf.DurationWrapper{Duration: 60 * time.Second}
+	})
+	overrideIfUnchanged(rollconf.FlagDANamespace, func() { cfg.DA.Namespace = "fb-bench-h" })
+	overrideIfUnchanged(rollconf.FlagDADataNamespace, func() { cfg.DA.DataNamespace = "fb-bench-d" })
 }
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/util.go b/tools/celestia-node-fiber/cmd/fiber-bench/util.go
index c91354f989..666d00f041 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/util.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/util.go
@@ -6,21 +6,12 @@ import (
 )
 
 // defaultKeyringDir is where we put the bench's cosmos keyring by default.
-// Lives under the user's home so multiple bench runs share the same key.
+// Sibling of the ev-node home (~/.fiber-bench/node) so --keep-home=false
+// runs cannot wipe it.
 func defaultKeyringDir() string {
 	home, err := os.UserHomeDir()
 	if err != nil {
-		return ".fiber-bench"
+		return ".fiber-bench-keyring"
 	}
 	return filepath.Join(home, ".fiber-bench", "keyring")
 }
-
-// defaultNodeHome is the ev-node working directory (signer, store, config).
-// Cleared on each run by default — see runCmd's --keep-home flag.
-func defaultNodeHome() string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return ".fiber-bench-node"
-	}
-	return filepath.Join(home, ".fiber-bench", "node")
-}

From 9f4fe4cdadc8bfb264cf50121c2db2935682953b Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Tue, 28 Apr 2026 00:24:16 +0100
Subject: [PATCH 18/18] refactor(fiber-bench): inline loader backoff, drop
 yield.go

yield.go was a single-line wrapper around time.Sleep(100us) parked in
its own file with a long explanatory comment. The comment moves up to
the loaderBackoff const in loader.go (the only caller), the file goes
away. No behavioural change.
---
 .../cmd/fiber-bench/loader.go                 | 21 ++++++++++++-------
 .../cmd/fiber-bench/yield.go                  | 17 ---------------
 2 files changed, 14 insertions(+), 24 deletions(-)
 delete mode 100644 tools/celestia-node-fiber/cmd/fiber-bench/yield.go

diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/loader.go b/tools/celestia-node-fiber/cmd/fiber-bench/loader.go
index df03a8ef26..4f527e8dc8 100644
--- a/tools/celestia-node-fiber/cmd/fiber-bench/loader.go
+++ b/tools/celestia-node-fiber/cmd/fiber-bench/loader.go
@@ -5,8 +5,20 @@ import (
 	"encoding/binary"
 	"sync"
 	"sync/atomic"
+	"time"
 )
 
+// loaderBackoff is what each worker waits when InjectTx returns false
+// because the mempool channel is full. A real sleep (rather than
+// runtime.Gosched) caps the per-worker drop rate so allocation
+// pressure scales with actual drain throughput; without it, full-
+// mempool workers spin a tight allocate-then-drop loop at ~200k
+// iter/s/worker — millions of short-lived slices per second across the
+// pool, which drove the OOM kills we hit early in the investigation.
+// 100 µs caps each worker at ~10k drops/s when the mempool is
+// permanently full.
+const loaderBackoff = 100 * time.Microsecond
+
 // loader pumps fixed-size payloads into the in-mem executor as fast as it
 // can. Backpressure comes from the executor's bounded mempool channel:
 // when full, InjectTx returns false and we count it as dropped.
@@ -63,16 +75,11 @@ func (l *loader) run(ctx context.Context) {
 				tx := make([]byte, l.txSize)
 				copy(tx, buf)
 				if !l.exec.InjectTx(tx) {
-					// Mempool full — yield and retry. ev-node's
-					// reaper will drain it on its scrape interval.
+					// Mempool full — back off briefly and retry.
 					select {
 					case <-ctx.Done():
 						return
-					default:
-						// Tight retry: Gosched is enough to let the
-						// reaper goroutine make progress without us
-						// burning a syscall.
-						runtimeYield()
+					case <-time.After(loaderBackoff):
 					}
 				}
 			}
diff --git a/tools/celestia-node-fiber/cmd/fiber-bench/yield.go b/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
deleted file mode 100644
index acd76daabd..0000000000
--- a/tools/celestia-node-fiber/cmd/fiber-bench/yield.go
+++ /dev/null
@@ -1,17 +0,0 @@
-package main
-
-import "time"
-
-// loaderBackoff is what each worker waits when InjectTx returns false
-// because the mempool channel is full. Using a real sleep (rather than
-// runtime.Gosched) caps the per-worker drop rate, which keeps the
-// load generator's allocation pressure proportional to actual drain
-// throughput. Without this, full-mempool workers spin a tight
-// allocate-then-drop loop at ~200k iter/s/worker — millions of
-// short-lived 200 B slices per second across the pool, which drives GC
-// hard and drove the OOM kills observed at sustained load.
-//
-// 100 µs caps a single worker to ~10k drops/s when the mempool is
-// permanently full. Total drop rate scales with --workers and serves
-// as a bounded backpressure signal in the stats line.
-func runtimeYield() { time.Sleep(100 * time.Microsecond) }