diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 543e143c70..c6fe0d119a 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -547,10 +547,82 @@ jobs: path: integration_test/rpc_tests/reports/merged if-no-files-found: ignore + # Autobahn integration suite (Autobahn Basic) — boots its own cluster via + # TestMain (docker-cluster-start / -stop), so it runs in a separate job + # rather than as a matrix entry that would share the integration-tests + # cluster. + autobahn-integration-tests: + name: Integration Test (Autobahn Basic) + runs-on: ubuntu-large + timeout-minutes: 45 + needs: prepare-cluster + permissions: + packages: read + contents: read + env: + GHCR_LOCALNODE: ghcr.io/sei-protocol/sei-chain-integration-test-localnode + GHCR_RPCNODE: ghcr.io/sei-protocol/sei-chain-integration-test-rpcnode + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: + go-version: '1.25.6' + - name: Install jq + run: sudo apt-get install -y jq + - name: Login to Docker Hub + uses: docker/login-action@v3 + if: env.DOCKERHUB_USERNAME != '' + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download integration CI artifacts + uses: actions/download-artifact@v4 + with: + name: integration-ci-artifacts + - name: Load prebuilt seid and pull Docker images + run: | + tar -xzf integration-build.tar.gz + docker pull "${GHCR_LOCALNODE}:${{ github.run_id }}" + docker pull "${GHCR_RPCNODE}:${{ github.run_id }}" + docker tag "${GHCR_LOCALNODE}:${{ github.run_id }}" sei-chain/localnode + docker tag "${GHCR_RPCNODE}:${{ github.run_id }}" sei-chain/rpcnode + - name: Run autobahn integration tests + run: make autobahn-integration-test + - name: Print node logs on failure + if: ${{ failure() }} + run: | + set -euo pipefail + for c in sei-node-0 sei-node-1 sei-node-2 sei-node-3 sei-rpc-node; do + echo "==================== ${c} (docker logs tail) ====================" + docker logs --tail 200 "${c}" || true + done + - name: Collect logs directory + if: ${{ always() }} + run: | + mkdir -p artifacts/sei-autobahn-integration + if [ -d build/generated/logs ]; then + cp -r build/generated/logs artifacts/sei-autobahn-integration/ + fi + - name: Upload logs directory + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: integration-logs-autobahn-integration + path: artifacts/sei-autobahn-integration + if-no-files-found: warn + integration-test-check: name: Integration Test Check runs-on: ubuntu-latest - needs: [prepare-cluster, integration-tests] + needs: [prepare-cluster, integration-tests, autobahn-integration-tests] if: always() steps: - name: Verify prepare and test jobs succeeded @@ -563,4 +635,8 @@ jobs: echo "integration-tests job did not succeed (${{ needs.integration-tests.result }})" exit 1 fi + if [[ "${{ needs.autobahn-integration-tests.result }}" != "success" ]]; then + echo "autobahn-integration-tests job did not succeed (${{ needs.autobahn-integration-tests.result }})" + exit 1 + fi echo "All integration test jobs passed." diff --git a/Makefile b/Makefile index 3302baf5e5..085bf60949 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # - Prefer tag if bases are equal; otherwise use whichever base is newer. BRANCH_NAME := $(shell git rev-parse --abbrev-ref HEAD) BRANCH_VERSION := $(shell echo "$(BRANCH_NAME)" | sed -E -n 's|.*(v[0-9]+\.[0-9]+\.[0-9]+[-A-Za-z0-9._]*).*|\1|p') -TAG_VERSION := $(shell echo $(shell git describe --tags)) +TAG_VERSION := $(shell echo $(shell git describe --tags 2>/dev/null)) VERSION := $(shell \ bv="$(BRANCH_VERSION)"; tv="$(TAG_VERSION)"; \ bb=$$(echo "$$bv" | sed 's/^\(v[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\).*/\1/'); \ @@ -225,13 +225,13 @@ build-docker-node: .PHONY: build-docker-node build-rpc-node: - @cd docker && docker build --tag sei-chain/rpcnode rpcnode --platform linux/x86_64 + @cd docker && docker build --tag sei-chain/rpcnode rpcnode --platform $(DOCKER_PLATFORM) .PHONY: build-rpc-node -# Integration-test CI: verify images loaded from prepare-cluster artifacts. +# Integration-test CI: verify images pulled from GHCR by the matrix job. ensure-integration-ci-images: - @docker image inspect sei-chain/localnode >/dev/null 2>&1 || (echo "sei-chain/localnode image missing; load integration-docker-images.tar.zst from prepare-cluster" && exit 1) - @docker image inspect sei-chain/rpcnode >/dev/null 2>&1 || (echo "sei-chain/rpcnode image missing; load integration-docker-images.tar.zst from prepare-cluster" && exit 1) + @docker image inspect sei-chain/localnode >/dev/null 2>&1 || (echo "sei-chain/localnode image missing; pull from GHCR (see prepare-cluster job)" && exit 1) + @docker image inspect sei-chain/rpcnode >/dev/null 2>&1 || (echo "sei-chain/rpcnode image missing; pull from GHCR (see prepare-cluster job)" && exit 1) .PHONY: ensure-integration-ci-images # Build seid once inside the localnode image (integration-test prepare job). @@ -279,7 +279,7 @@ run-local-node: kill-sei-node build-docker-node -v $(PROJECT_HOME):/sei-protocol/sei-chain:Z \ -v $(GO_PKG_PATH)/mod:/root/go/pkg/mod:Z \ -v $(shell go env GOCACHE):/root/.cache/go-build:Z \ - --platform linux/x86_64 \ + --platform $(DOCKER_PLATFORM) \ sei-chain/localnode .PHONY: run-local-node @@ -296,9 +296,11 @@ run-rpc-node: build-rpc-node -v $(GO_PKG_PATH)/mod:/root/go/pkg/mod:Z \ -v $(shell go env GOCACHE):/root/.cache/go-build:Z \ -p 26668-26670:26656-26658 \ - --platform linux/x86_64 \ + --platform $(DOCKER_PLATFORM) \ --env GIGA_STORAGE=${GIGA_STORAGE} \ --env GIGA_FLATKV_ONLY=${GIGA_FLATKV_ONLY} \ + --env AUTOBAHN=${AUTOBAHN} \ + --env CLUSTER_SIZE=${CLUSTER_SIZE} \ --env RECEIPT_BACKEND=${RECEIPT_BACKEND} \ sei-chain/rpcnode .PHONY: run-rpc-node @@ -315,10 +317,12 @@ run-rpc-node-skipbuild: build-rpc-node -v $(GO_PKG_PATH)/mod:/root/go/pkg/mod:Z \ -v $(shell go env GOCACHE):/root/.cache/go-build:Z \ -p 26668-26670:26656-26658 \ - --platform linux/x86_64 \ + --platform $(DOCKER_PLATFORM) \ --env SKIP_BUILD=true \ --env GIGA_STORAGE=${GIGA_STORAGE} \ --env GIGA_FLATKV_ONLY=${GIGA_FLATKV_ONLY} \ + --env AUTOBAHN=${AUTOBAHN} \ + --env CLUSTER_SIZE=${CLUSTER_SIZE} \ --env RECEIPT_BACKEND=${RECEIPT_BACKEND} \ sei-chain/rpcnode .PHONY: run-rpc-node @@ -345,7 +349,7 @@ run-rpc-node-integration-ci: kill-rpc-node ensure-integration-ci-images -v $(GO_PKG_PATH)/mod:/root/go/pkg/mod:Z \ -v $(shell go env GOCACHE):/root/.cache/go-build:Z \ -p 26668-26670:26656-26658 \ - --platform linux/x86_64 \ + --platform $(DOCKER_PLATFORM) \ --env SKIP_BUILD=true \ --env GIGA_STORAGE=${GIGA_STORAGE} \ --env GIGA_FLATKV_ONLY=${GIGA_FLATKV_ONLY} \ diff --git a/docker/rpcnode/scripts/step1_configure_init.sh b/docker/rpcnode/scripts/step1_configure_init.sh index c2be823b6e..34efdcee36 100755 --- a/docker/rpcnode/scripts/step1_configure_init.sh +++ b/docker/rpcnode/scripts/step1_configure_init.sh @@ -1,4 +1,14 @@ #!/usr/bin/env sh +# +# rpcnode init script for the integration-test docker setup. NOT a +# production deploy script. Best-effort by design — curl probes against +# the validator RPC are allowed to fail (the script proceeds with +# whatever values came back, the container then converges as the cluster +# stabilises). The few cases where we DO want fail-fast — genesis file +# missing after the bounded wait, autobahn validator dirs missing — are +# guarded by explicit `if [ ! -f ]; exit 1` checks below. Don't add +# `set -e`: it makes those transient curl probes fatal and breaks +# init when the cluster is still warming up. # Set up GO PATH echo "Configure and initialize environment" @@ -7,14 +17,30 @@ echo "Configure and initialize environment" seid version # Uncomment the below line if there are any dependency issues # ldd build/seid -# Initialize validator node +# Initialize validator node. --overwrite so this is safe to re-run inside +# a recycled container; the script writes new configs over whatever was +# already on the previous run. MONIKER="sei-rpc-node" -seid init --chain-id sei "$MONIKER" +seid init --overwrite --chain-id sei "$MONIKER" + +# Wait for the chain genesis.json (validator step3 writes it). The test +# setup may spawn the rpc node in parallel with the cluster, so this can +# still be missing here — poll up to 5 minutes. +GENESIS_SRC="build/generated/genesis.json" +i=0 +while [ ! -f "$GENESIS_SRC" ] && [ "$i" -lt 300 ]; do + sleep 1 + i=$((i + 1)) +done +if [ ! -f "$GENESIS_SRC" ]; then + echo "ERROR: $GENESIS_SRC missing after 5 minutes; aborting." >&2 + exit 1 +fi # Copy configs cp docker/rpcnode/config/app.toml ~/.sei/config/app.toml cp docker/rpcnode/config/config.toml ~/.sei/config/config.toml -cp build/generated/genesis.json ~/.sei/config/genesis.json +cp "$GENESIS_SRC" ~/.sei/config/genesis.json # Apply Giga Storage overrides so the RPC node's app hash matches the validators. GIGA_STORAGE=${GIGA_STORAGE:-false} @@ -59,6 +85,55 @@ if [ -n "$RECEIPT_BACKEND" ]; then fi fi +# Generate Autobahn (GigaRouter) config when the validators are running +# Autobahn consensus. The RPC node uses mode = "full" (see config.toml), +# which makes it an fullnode autobahn participant — loads the committee +# for routing only and forwards eth_sendRawTransaction to the shard owner. +# Reuse the validator node directories under build/generated/ (mounted +# into the container) so the committee description matches the cluster. +AUTOBAHN=${AUTOBAHN:-false} +if [ "$AUTOBAHN" = "true" ]; then + echo "Generating Autobahn config for RPC node (fullnode via mode=full)..." + AUTOBAHN_CONFIG="$HOME/.sei/config/autobahn.json" + + # Default to 4 (the docker-compose cluster size) when CLUSTER_SIZE is unset. + CLUSTER_SIZE=${CLUSTER_SIZE:-4} + NODE_DIRS="" + i=0 + while [ "$i" -lt "$CLUSTER_SIZE" ]; do + NODE_DIRS="$NODE_DIRS build/generated/node_${i}" + i=$((i + 1)) + done + + # Wait for each validator dir to be fully populated. gen-autobahn-config + # reads validator_pubkey, node_pubkey, autobahn_address, evmrpc_url; the + # rpc container can be spawned in parallel with the cluster, so any of + # these may not yet exist. Poll up to 5 minutes for evmrpc_url.txt (the + # autobahn-specific file each validator step writes last). + for d in $NODE_DIRS; do + i=0 + while [ ! -f "$d/evmrpc_url.txt" ] && [ "$i" -lt 300 ]; do + sleep 1 + i=$((i + 1)) + done + if [ ! -f "$d/evmrpc_url.txt" ]; then + echo "ERROR: $d/evmrpc_url.txt missing after 5 minutes; aborting." >&2 + exit 1 + fi + done + + seid tendermint gen-autobahn-config $NODE_DIRS --output "$AUTOBAHN_CONFIG" + + # Inject autobahn-config-file as a top-level key in config.toml. It must + # precede any [section] header so the TOML parser sees it at root scope. + if grep -q "autobahn-config-file" ~/.sei/config/config.toml; then + sed -i 's|autobahn-config-file = .*|autobahn-config-file = "'"$AUTOBAHN_CONFIG"'"|' ~/.sei/config/config.toml + else + sed -i '1s|^|autobahn-config-file = "'"$AUTOBAHN_CONFIG"'"\n|' ~/.sei/config/config.toml + fi + echo "Autobahn config written to $AUTOBAHN_CONFIG (fullnode via mode=full)" +fi + # Override state sync configs STATE_SYNC_RPC="192.168.10.10:26657" STATE_SYNC_PEER="2f9846450b7a3dcf4af1ac0082e3279c16744df8@172.31.9.18:26656,ec98c4a28a2023f4f976828c8a8e7127bfef4e1b@172.31.4.96:26656,b03014d67384fb0ef6ad992c77cefe4f9d2c1640@172.31.4.219:26656" diff --git a/evmrpc/block_txcount_parity_test.go b/evmrpc/block_txcount_parity_test.go index c420350995..c09ef88ad6 100644 --- a/evmrpc/block_txcount_parity_test.go +++ b/evmrpc/block_txcount_parity_test.go @@ -17,6 +17,7 @@ import ( "github.com/sei-protocol/sei-chain/sei-cosmos/client" sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" tmbytes "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" tmmock "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" @@ -40,8 +41,8 @@ func (*parityTxCountTMClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (*parityTxCountTMClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (*parityTxCountTMClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() } func (c *parityTxCountTMClient) Block(_ context.Context, h *int64) (*coretypes.ResultBlock, error) { diff --git a/evmrpc/height_availability_test.go b/evmrpc/height_availability_test.go index 3fdd730b6f..931e4cc652 100644 --- a/evmrpc/height_availability_test.go +++ b/evmrpc/height_availability_test.go @@ -13,6 +13,7 @@ import ( "github.com/sei-protocol/sei-chain/sei-cosmos/client" sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" @@ -38,8 +39,8 @@ func (*heightTestClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (*heightTestClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (*heightTestClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() } func newHeightTestClient(highHeight, earliest, latest int64) *heightTestClient { diff --git a/evmrpc/send.go b/evmrpc/send.go index 45906c743d..60ac98d90d 100644 --- a/evmrpc/send.go +++ b/evmrpc/send.go @@ -92,7 +92,7 @@ func (s *SendAPI) SendRawTransaction(ctx context.Context, input hexutil.Bytes) ( // but we still need to handle it. sender, senderErr := getSender(tx, s.keeper.ChainID(s.ctxProvider(LatestCtxHeight))) if senderErr == nil { - if url, ok := s.tmClient.EvmProxy(sender); ok { + if url, ok := s.tmClient.EvmProxy(sender).Get(); ok { recordRedirectedRequest(ctx, "eth_sendRawTransaction", string(s.connectionType)) // HTTP transport pooling already happens globally underneath net/http, so // creating a fresh RPC client per proxied request is fine here. If we diff --git a/evmrpc/send_test.go b/evmrpc/send_test.go index d564c023af..49fbda5247 100644 --- a/evmrpc/send_test.go +++ b/evmrpc/send_test.go @@ -30,11 +30,11 @@ type sendProxyClient struct { proxyURL *url.URL } -func (c *sendProxyClient) EvmProxy(common.Address) (*url.URL, bool) { +func (c *sendProxyClient) EvmProxy(common.Address) utils.Option[*url.URL] { if c.proxyURL == nil { - return nil, false + return utils.None[*url.URL]() } - return c.proxyURL, true + return utils.Some(c.proxyURL) } func TestMnemonicToPrivateKey(t *testing.T) { diff --git a/evmrpc/setup_test.go b/evmrpc/setup_test.go index 1f8922dc1e..7385c12f67 100644 --- a/evmrpc/setup_test.go +++ b/evmrpc/setup_test.go @@ -31,6 +31,7 @@ import ( sdkerrors "github.com/sei-protocol/sei-chain/sei-cosmos/types/errors" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + tmutils "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" types2 "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" @@ -158,8 +159,8 @@ func (*MockClient) EvmTxByHash(hash common.Hash) (tmtypes.Tx, bool) { return tx, true } -func (*MockClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (*MockClient) EvmProxy(common.Address) tmutils.Option[*url.URL] { + return tmutils.None[*url.URL]() } func NewMockClientWithLatest(latest int64) *MockClient { diff --git a/evmrpc/simulate_test.go b/evmrpc/simulate_test.go index cfda51a7d3..de708ff5ea 100644 --- a/evmrpc/simulate_test.go +++ b/evmrpc/simulate_test.go @@ -30,6 +30,7 @@ import ( receipt "github.com/sei-protocol/sei-chain/sei-db/ledger_db/receipt" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" tenderminttypes "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" @@ -882,8 +883,8 @@ func (c *fixedBlockClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (c *fixedBlockClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (c *fixedBlockClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() } func (c *fixedBlockClient) Block(_ context.Context, _ *int64) (*coretypes.ResultBlock, error) { diff --git a/evmrpc/tests/mock_client.go b/evmrpc/tests/mock_client.go index 75d369544f..91a1a67b6c 100644 --- a/evmrpc/tests/mock_client.go +++ b/evmrpc/tests/mock_client.go @@ -16,6 +16,7 @@ import ( "github.com/sei-protocol/sei-chain/evmrpc" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" tmbytes "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" tmproto "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" @@ -45,8 +46,8 @@ func (*MockClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (c *MockClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (c *MockClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() } func (c *MockClient) Block(_ context.Context, h *int64) (*coretypes.ResultBlock, error) { diff --git a/evmrpc/tx.go b/evmrpc/tx.go index c2c6687af5..157324ce87 100644 --- a/evmrpc/tx.go +++ b/evmrpc/tx.go @@ -346,7 +346,7 @@ func (t *TransactionAPI) GetTransactionCount(ctx context.Context, address common }() if blockNrOrHash.BlockHash == nil && *blockNrOrHash.BlockNumber == rpc.PendingBlockNumber { - if url, ok := t.tmClient.EvmProxy(address); ok { + if url, ok := t.tmClient.EvmProxy(address).Get(); ok { recordRedirectedRequest(ctx, "eth_getTransactionCount", string(t.connectionType)) // HTTP transport pooling already happens globally underneath net/http, so diff --git a/evmrpc/tx_test.go b/evmrpc/tx_test.go index 4ba29a1d01..5670edaa0d 100644 --- a/evmrpc/tx_test.go +++ b/evmrpc/tx_test.go @@ -58,11 +58,11 @@ func (c *pendingNonceClient) EvmNextPendingNonce(common.Address) uint64 { return c.nextNonce } -func (c *pendingNonceClient) EvmProxy(common.Address) (*url.URL, bool) { +func (c *pendingNonceClient) EvmProxy(common.Address) utils.Option[*url.URL] { if c.proxyURL == nil { - return nil, false + return utils.None[*url.URL]() } - return c.proxyURL, true + return utils.Some(c.proxyURL) } func TestGetTransactionCount(t *testing.T) { @@ -305,7 +305,9 @@ func (c *lowLatestTMClient) EvmNextPendingNonce(common.Address) uint64 { return func (c *lowLatestTMClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (c *lowLatestTMClient) EvmProxy(common.Address) (*url.URL, bool) { return nil, false } +func (c *lowLatestTMClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() +} func (c *lowLatestTMClient) Status(context.Context) (*coretypes.ResultStatus, error) { return &coretypes.ResultStatus{ diff --git a/evmrpc/watermark_manager_test.go b/evmrpc/watermark_manager_test.go index 449c04ad1d..0d6c3b6cf1 100644 --- a/evmrpc/watermark_manager_test.go +++ b/evmrpc/watermark_manager_test.go @@ -20,6 +20,7 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/proto" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/client/mock" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" @@ -203,8 +204,8 @@ func (*fakeTMClient) EvmTxByHash(common.Hash) (tmtypes.Tx, bool) { return nil, false } -func (*fakeTMClient) EvmProxy(common.Address) (*url.URL, bool) { - return nil, false +func (*fakeTMClient) EvmProxy(common.Address) utils.Option[*url.URL] { + return utils.None[*url.URL]() } func (f *fakeTMClient) Status(context.Context) (*coretypes.ResultStatus, error) { diff --git a/integration_test/autobahn/autobahn_test.go b/integration_test/autobahn/autobahn_test.go index a91592d42d..218f94c8ec 100644 --- a/integration_test/autobahn/autobahn_test.go +++ b/integration_test/autobahn/autobahn_test.go @@ -29,7 +29,12 @@ import ( ) const ( - tmRPCBase = "http://localhost:26657" + // tmRPCBase points at the fullnode sidecar's CometBFT RPC (port 26657 + // inside the container, host-published at 26669 via the rpc-node's + // docker run port mapping). The whole test suite routes its RPC reads + // through here — matches the production shape where clients talk to + // fullnodes, not validators. + tmRPCBase = "http://localhost:26669" abciInfoURL = tmRPCBase + "/abci_info" heightRetries = 60 heightBackoff = 500 * time.Millisecond @@ -44,6 +49,33 @@ const ( clusterBootTimeout = 5 * time.Minute clusterBootPoll = 5 * time.Second autobahnSettleDelay = 30 * time.Second + + // Fullnode sidecar lifecycle (TestMain). + fullnodeContainer = "sei-rpc-node" + fullnodeBootTimeout = 5 * time.Minute + fullnodeBootPoll = 5 * time.Second + // evmRPCURLOnContainerLocalhost is the EVM RPC address inside the + // rpc-node container — used with `docker exec ... curl` for readiness + // checks (the rpc-node's 8545 isn't host-published). + evmRPCURLOnContainerLocalhost = "http://localhost:8545" + + // heightPoll governs both waitForStableHeight and waitForHeightAdvance: + // the fullnode's read of /abci_info trails the cluster while + // runExecute drains buffered blocks, and a killed-peer failover + // (DialInterval-bounded, ~10s) holds height static for that long. + // Polling lets each test absorb whatever combination of those delays + // actually applies, instead of guessing a sleep duration. + // Bounds are 2× the expected failover+drain so a slow CI runner has + // headroom without giving up another whole minute on every run. + heightPoll = 1 * time.Second + haltStableWindow = 20 * time.Second + // 2m / 90s give headroom for the fullnode catch-up backlog the + // preceding subtest may have left (failover delay during + // LivenessUnderMaxFaults can put the fullnode ~600 blocks behind, + // which takes ~60s to drain on top of the halt-detection window). + // CI runners are slower than local; 1m was tight enough to flake. + haltStableTimeout = 2 * time.Minute + heightAdvanceMax = 90 * time.Second ) var ( @@ -93,6 +125,50 @@ func getHeight(t *testing.T) int64 { return 0 } +// waitForStableHeight polls getHeight every heightPoll. It returns the +// height once the value has stayed constant for at least `window`. Useful +// after killing validators: cluster halt is observable through the rpc- +// only's read of /abci_info only once any in-flight blocks have drained +// through runExecute and any block-sync failover has finished — both +// bounded in absolute time but variable per run. Fails the test if no +// stable window appears within `timeout`. +func waitForStableHeight(t *testing.T, window, timeout time.Duration) int64 { + t.Helper() + deadline := time.Now().Add(timeout) + h := getHeight(t) + stableSince := time.Now() + for time.Now().Before(deadline) { + if time.Since(stableSince) >= window { + return h + } + time.Sleep(heightPoll) + nh := getHeight(t) + if nh != h { + h = nh + stableSince = time.Now() + } + } + t.Fatalf("height did not stabilize within %s (last seen %d)", timeout, h) + return 0 +} + +// waitForHeightAdvance polls getHeight every heightPoll, returning the +// first observed value strictly greater than `base`. Used by liveness +// tests where progress is expected but may be delayed by the fullnode's +// block-sync failover from a killed peer. Fails the test on timeout. +func waitForHeightAdvance(t *testing.T, base int64, timeout time.Duration) int64 { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if h := getHeight(t); h > base { + return h + } + time.Sleep(heightPoll) + } + t.Fatalf("height did not advance past %d within %s", base, timeout) + return 0 +} + func fetchHeight() (int64, error) { resp, err := heightClient.Get(abciInfoURL) if err != nil { @@ -169,6 +245,11 @@ func TestMain(m *testing.M) { teardownCluster() // best-effort os.Exit(1) } + if err := setupFullnodeNode(); err != nil { + fmt.Fprintf(os.Stderr, "fullnode sidecar setup failed: %v\n", err) + teardownCluster() + os.Exit(1) + } code := m.Run() teardownCluster() os.Exit(code) @@ -250,8 +331,112 @@ func countSeiContainers() (int, error) { return len(strings.Fields(strings.TrimSpace(string(out)))), nil } -// teardownCluster runs `make docker-cluster-stop`, ignoring errors. +// setupFullnodeNode boots an autobahn fullnode sidecar alongside the validator +// cluster. Backgrounded via cmd.Start() because `make run-rpc-node-skipbuild` +// uses `docker run --rm` (foreground until the container exits); the actual +// container detaches from this process once it starts. +// +// Uses run-rpc-node-skipbuild so the rpc-node reuses the seid binary the +// validator containers already compiled — skips a second multi-minute +// `go install` cycle. The autobahn role itself comes from mode = "full" +// in docker/rpcnode/config/config.toml — setup.go picks the fullnode +// constructor when there's no local validator key. +func setupFullnodeNode() error { + fmt.Println("=== Starting fullnode sidecar ===") + _ = runMake(nil, "kill-rpc-node") // best-effort cleanup + + // Discover the cluster size from docker so the rpc-node's autobahn config + // covers exactly the validators that came up — non-four-node test runs + // would otherwise produce a mismatched committee. + clusterSize, err := countSeiContainers() + if err != nil { + return fmt.Errorf("count cluster containers: %w", err) + } + if clusterSize == 0 { + return fmt.Errorf("no sei-node-* containers found; setupCluster must run first") + } + cmd := exec.Command("make", "run-rpc-node-skipbuild") + cmd.Env = append(os.Environ(), "AUTOBAHN=true", fmt.Sprintf("CLUSTER_SIZE=%d", clusterSize)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + return fmt.Errorf("start make run-rpc-node-skipbuild: %w", err) + } + // Reap the process when it eventually exits (e.g. on container kill); + // not blocking on Wait here since the container runs for the duration + // of the test suite. + go func() { _ = cmd.Wait() }() + + deadline := time.Now().Add(fullnodeBootTimeout) + for time.Now().Before(deadline) { + if fullnodeRunning() && fullnodeEVMReady() { + fmt.Println("fullnode sidecar is ready") + return nil + } + time.Sleep(fullnodeBootPoll) + } + return fmt.Errorf("fullnode sidecar didn't come up within %s", fullnodeBootTimeout) +} + +func fullnodeRunning() bool { + out, err := exec.Command("docker", "ps", + "--filter", "name="+fullnodeContainer, + "--filter", "status=running", + "--format", "{{.Names}}").Output() + if err != nil { + return false + } + return strings.TrimSpace(string(out)) == fullnodeContainer +} + +func fullnodeEVMReady() bool { + r, err := evmRPCInContainer(fullnodeContainer, "eth_chainId", []any{}) + return err == nil && r.Error == nil && len(r.Result) > 0 +} + +type evmRPCResponse struct { + Result json.RawMessage `json:"result"` + Error *evmRPCError `json:"error,omitempty"` +} + +type evmRPCError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// evmRPCInContainer POSTs a JSON-RPC call to the given container's +// localhost:8545. The fullnode container's 8545 isn't host-published; this +// is the only way to talk to it without changing the run target. +func evmRPCInContainer(container, method string, params any) (*evmRPCResponse, error) { + body, err := json.Marshal(map[string]any{ + "jsonrpc": "2.0", "id": 1, "method": method, "params": params, + }) + if err != nil { + return nil, err + } + out, err := exec.Command("docker", "exec", container, + "curl", "-sf", "-X", "POST", + "-H", "content-type: application/json", + "--data", string(body), + evmRPCURLOnContainerLocalhost).Output() + if err != nil { + return nil, fmt.Errorf("docker exec curl: %v", err) + } + var r evmRPCResponse + if err := json.Unmarshal(out, &r); err != nil { + return nil, fmt.Errorf("decode (body=%s): %w", out, err) + } + return &r, nil +} + +// teardownCluster tears down every container TestMain brought up: first +// the fullnode sidecar (so its run-rpc-node `docker run --rm` process +// exits cleanly), then the validator cluster. Best-effort — errors are +// ignored so a partially-failed setupCluster can still clean up. Adding +// new sidecars later goes here too. func teardownCluster() { + fmt.Println("=== Stopping fullnode sidecar ===") + _ = runMake(nil, "kill-rpc-node") fmt.Println("=== Stopping cluster ===") _ = runMake(nil, "docker-cluster-stop") } @@ -332,10 +517,10 @@ func testRecovery(t *testing.T) { killNode(t, clusterSize-1-i) } - // Let the chain settle into its halted height, then confirm it's halted. - time.Sleep(10 * time.Second) - hBefore := getHeight(t) - time.Sleep(5 * time.Second) + // Wait for the fullnode's view of height to stabilize (cluster halt + + // fullnode drain + any failover from a killed peer). The window inside + // waitForStableHeight already proves the chain isn't advancing. + hBefore := waitForStableHeight(t, haltStableWindow, haltStableTimeout) if h := getHeight(t); h != hBefore { t.Fatalf("expected halted chain after killing %d nodes, but height advanced (%d -> %d)", maxFaults+1, hBefore, h) @@ -347,21 +532,9 @@ func testRecovery(t *testing.T) { restartNode(t, target) // Poll for the chain to advance. Give the restarted seid time to init - // and rejoin consensus. - deadline := time.Now().Add(90 * time.Second) - var hAfter int64 - for time.Now().Before(deadline) { - hAfter = getHeight(t) - if hAfter > hBefore { - break - } - time.Sleep(2 * time.Second) - } + // and rejoin consensus, plus any fullnode failover. + hAfter := waitForHeightAdvance(t, hBefore, heightAdvanceMax) t.Logf("height after restart: %d", hAfter) - if hAfter <= hBefore { - t.Fatalf("chain did not resume advancing after restart of node %d (%d -> %d)", - target, hBefore, hAfter) - } // assertAutobahnEnabled greps every running container's log. The restarted // node is among them, and start_sei.sh truncates its log on restart (`>` @@ -545,6 +718,11 @@ func killNode(t *testing.T, i int) { // testLivenessUnderMaxFaults kills f = maxFaults nodes (from the highest index // downward). With clusterSize - f = 2f + 1 honest nodes left, the chain should // still advance. +// +// Polls for height to advance (instead of a fixed sleep): if the fullnode +// happened to be subscribed to the killed peer, its block-sync subscriber +// pauses for DialInterval (~10s) before failing over, so height stays at +// hBefore until then. func testLivenessUnderMaxFaults(t *testing.T) { assertAutobahnEnabled(t) hBefore := getHeight(t) @@ -552,27 +730,29 @@ func testLivenessUnderMaxFaults(t *testing.T) { for i := 0; i < maxFaults; i++ { killNode(t, clusterSize-1-i) } - time.Sleep(10 * time.Second) - hAfter := getHeight(t) + hAfter := waitForHeightAdvance(t, hBefore, heightAdvanceMax) t.Logf("height after: %d", hAfter) - if hAfter <= hBefore { - t.Fatalf("chain should continue with %d/%d validators (%d -> %d)", - clusterSize-maxFaults, clusterSize, hBefore, hAfter) - } } // testHaltsBeyondMaxFaults kills one more node beyond maxFaults (relies on the // prior LivenessUnderMaxFaults having already killed the first maxFaults). The // chain should stop advancing. +// +// Reads come through the fullnode sidecar, which lags the cluster while it +// drains buffered blocks through runExecute (and longer when the killed +// peer was the one fullnode was subscribed to — failover sleeps +// DialInterval before retrying). Instead of guessing a fixed settle, we +// poll getHeight and only sample once the value has been stable for a +// short window. func testHaltsBeyondMaxFaults(t *testing.T) { assertAutobahnEnabled(t) killNode(t, clusterSize-1-maxFaults) - time.Sleep(5 * time.Second) - hBefore := getHeight(t) + hBefore := waitForStableHeight(t, haltStableWindow, haltStableTimeout) t.Logf("height: %d (expecting halt)", hBefore) - time.Sleep(15 * time.Second) + // waitForStableHeight already returned only after haltStableWindow of + // no movement; the sample we just took is the halted height. hAfter := getHeight(t) - t.Logf("height after 15s: %d", hAfter) + t.Logf("height after stability: %d", hAfter) if hAfter != hBefore { t.Fatalf("chain should halt with %d/%d validators (height changed: %d -> %d)", clusterSize-maxFaults-1, clusterSize, hBefore, hAfter) diff --git a/sei-cosmos/client/context.go b/sei-cosmos/client/context.go index 1b56cdf7a9..e632c9ad1c 100644 --- a/sei-cosmos/client/context.go +++ b/sei-cosmos/client/context.go @@ -29,7 +29,7 @@ type LocalClient interface { Client EvmNextPendingNonce(addr common.Address) uint64 EvmTxByHash(hash common.Hash) (tmtypes.Tx, bool) - EvmProxy(sender common.Address) (*url.URL, bool) + EvmProxy(sender common.Address) utils.Option[*url.URL] } type Context struct { diff --git a/sei-cosmos/server/rollback_test.go b/sei-cosmos/server/rollback_test.go index 29dd83bd36..cecd249253 100644 --- a/sei-cosmos/server/rollback_test.go +++ b/sei-cosmos/server/rollback_test.go @@ -51,6 +51,10 @@ func (m *mockApplication) GetValidators() []abci.ValidatorUpdate { return nil } +func (m *mockApplication) LastBlockHeight() int64 { + return 0 +} + func (m *mockApplication) Close() error { return m.cms.Close() } diff --git a/sei-tendermint/abci/example/kvstore/kvstore.go b/sei-tendermint/abci/example/kvstore/kvstore.go index 983df9e8fb..60f3f20d8b 100644 --- a/sei-tendermint/abci/example/kvstore/kvstore.go +++ b/sei-tendermint/abci/example/kvstore/kvstore.go @@ -117,6 +117,12 @@ func (app *Application) Info(_ context.Context, req *types.RequestInfo) (*types. }, nil } +func (app *Application) LastBlockHeight() int64 { + app.mu.Lock() + defer app.mu.Unlock() + return app.state.Height +} + func (app *Application) GetValidators() []types.ValidatorUpdate { return app.Validators() } diff --git a/sei-tendermint/abci/types/application.go b/sei-tendermint/abci/types/application.go index 9c0f4e73c9..5db2209dd2 100644 --- a/sei-tendermint/abci/types/application.go +++ b/sei-tendermint/abci/types/application.go @@ -16,6 +16,10 @@ type Application interface { Info(context.Context, *RequestInfo) (*ResponseInfo, error) // Return application info Query(context.Context, *RequestQuery) (*ResponseQuery, error) // Query for state GetValidators() []ValidatorUpdate + // LastBlockHeight returns the height of the most recently committed + // block, as maintained by the app. Used by /status — must be a fast + // in-memory read; Info() is too heavy for the hot path. + LastBlockHeight() int64 // Mempool Connection CheckTx(context.Context, *RequestCheckTxV2) *ResponseCheckTxV2 // Validate a tx for the mempool @@ -49,6 +53,7 @@ func (BaseApplication) Info(_ context.Context, req *RequestInfo) (*ResponseInfo, return &ResponseInfo{}, nil } func (BaseApplication) GetValidators() []ValidatorUpdate { return nil } +func (BaseApplication) LastBlockHeight() int64 { return 0 } func (BaseApplication) CheckTx(_ context.Context, req *RequestCheckTxV2) *ResponseCheckTxV2 { return &ResponseCheckTxV2{ResponseCheckTx: &ResponseCheckTx{Code: CodeTypeOK}} diff --git a/sei-tendermint/abci/types/mocks/application.go b/sei-tendermint/abci/types/mocks/application.go index aa0c4bec12..c474b32770 100644 --- a/sei-tendermint/abci/types/mocks/application.go +++ b/sei-tendermint/abci/types/mocks/application.go @@ -277,6 +277,24 @@ func (_m *Application) InitChain(_a0 context.Context, _a1 *types.RequestInitChai return r0, r1 } +// LastBlockHeight provides a mock function with no fields +func (_m *Application) LastBlockHeight() int64 { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for LastBlockHeight") + } + + var r0 int64 + if rf, ok := ret.Get(0).(func() int64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int64) + } + + return r0 +} + // ListSnapshots provides a mock function with given fields: _a0, _a1 func (_m *Application) ListSnapshots(_a0 context.Context, _a1 *types.RequestListSnapshots) (*types.ResponseListSnapshots, error) { ret := _m.Called(_a0, _a1) diff --git a/sei-tendermint/cmd/tendermint/commands/gen_autobahn_config.go b/sei-tendermint/cmd/tendermint/commands/gen_autobahn_config.go index 1a11b18868..13736e876d 100644 --- a/sei-tendermint/cmd/tendermint/commands/gen_autobahn_config.go +++ b/sei-tendermint/cmd/tendermint/commands/gen_autobahn_config.go @@ -77,7 +77,7 @@ Output is written to the file specified by --output.`, ValidatorKey: valKey, NodeKey: nodeKey, Address: addr, - EVMRPC: utils.Some(evmRPC), + EVMRPC: evmRPC, }) } diff --git a/sei-tendermint/config/autobahn.go b/sei-tendermint/config/autobahn.go index b65ffaeaa2..6c08e0629c 100644 --- a/sei-tendermint/config/autobahn.go +++ b/sei-tendermint/config/autobahn.go @@ -2,6 +2,7 @@ package config import ( "errors" + "fmt" "net/url" atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" @@ -30,14 +31,7 @@ type AutobahnValidator struct { // Each validator is assigned a shard of EVM address space. // Upon receiving an EVM transaction, a node needs to proxy it // to validator owning the shard. - EVMRPC utils.Option[URL] `json:"evmrpc"` -} - -func (av *AutobahnValidator) GetEVMRPC() utils.Option[*url.URL] { - if u, ok := av.EVMRPC.Get(); ok { - return utils.Some(u.URL) - } - return utils.None[*url.URL]() + EVMRPC URL `json:"evmrpc"` } // AutobahnFileConfig is the JSON structure of the autobahn config file. @@ -50,13 +44,30 @@ type AutobahnFileConfig struct { ViewTimeout utils.Duration `json:"view_timeout"` PersistentStateDir utils.Option[string] `json:"persistent_state_dir"` DialInterval utils.Duration `json:"dial_interval"` + // MaxInboundFullnodePeers caps concurrent inbound block-sync from + // non-committee peers, applied on both validators and fullnodes (relay + // fullnodes serving downstream block-sync are subject to the same + // cap). Absent ⇒ DefaultMaxInboundFullnodePeers. Some(0) ⇒ reject all. + MaxInboundFullnodePeers utils.Option[uint64] `json:"max_inbound_fullnode_peers"` } +// DefaultMaxInboundFullnodePeers is the built-in cap used when +// AutobahnFileConfig.MaxInboundFullnodePeers is absent. +// +// TODO(autobahn-trusted-fullnode-peers): add an optional trusted-peer +// list whose keys bypass the cap. +const DefaultMaxInboundFullnodePeers = 10 + // Validate performs basic validation of the autobahn file config. func (fc *AutobahnFileConfig) Validate() error { if len(fc.Validators) == 0 { return errors.New("validators must not be empty") } + for _, v := range fc.Validators { + if v.EVMRPC.URL == nil { + return fmt.Errorf("validator %s is missing evmrpc URL", v.ValidatorKey) + } + } if fc.MaxTxsPerBlock == 0 { return errors.New("max_txs_per_block must be > 0") } diff --git a/sei-tendermint/config/autobahn_toml_test.go b/sei-tendermint/config/autobahn_toml_test.go new file mode 100644 index 0000000000..dac569581d --- /dev/null +++ b/sei-tendermint/config/autobahn_toml_test.go @@ -0,0 +1,93 @@ +package config_test + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/spf13/viper" + "github.com/stretchr/testify/require" + + "github.com/sei-protocol/sei-chain/sei-tendermint/cmd/tendermint/commands" + tmconfig "github.com/sei-protocol/sei-chain/sei-tendermint/config" +) + +// These tests mutate the global viper singleton via commands.ParseConfig, +// so they must not run in parallel with other tests in this package. + +// TestAutobahnKeysParseFromTopLevel guards against the trap where TOML keys +// authored after a [section] header get silently nested under that section. +// AutobahnConfigFile is a top-level field on Config +// (mapstructure:"autobahn-config-file"), so it must appear before any +// [section] header in the on-disk file. +func TestAutobahnKeysParseFromTopLevel(t *testing.T) { + viper.Reset() + t.Cleanup(viper.Reset) + + const content = ` +autobahn-config-file = "/etc/sei/autobahn.json" + +[rpc] +laddr = "tcp://127.0.0.1:26657" +` + configPath := filepath.Join(t.TempDir(), "config.toml") + require.NoError(t, os.WriteFile(configPath, []byte(content), 0600)) + + viper.SetConfigFile(configPath) + require.NoError(t, viper.ReadInConfig()) + + cfg, err := commands.ParseConfig(tmconfig.DefaultConfig()) + require.NoError(t, err) + require.Equal(t, "/etc/sei/autobahn.json", cfg.AutobahnConfigFile) +} + +// TestAutobahnKeysIgnoredUnderSectionHeader documents what breaks if the +// key is placed after a [section] header: TOML semantics nest it inside +// that section, viper looks for it at the root via mapstructure, and it +// silently parses as empty. +func TestAutobahnKeysIgnoredUnderSectionHeader(t *testing.T) { + viper.Reset() + t.Cleanup(viper.Reset) + + const content = ` +[self-remediation] +autobahn-config-file = "/etc/sei/autobahn.json" +` + configPath := filepath.Join(t.TempDir(), "config.toml") + require.NoError(t, os.WriteFile(configPath, []byte(content), 0600)) + + viper.SetConfigFile(configPath) + require.NoError(t, viper.ReadInConfig()) + + cfg, err := commands.ParseConfig(tmconfig.DefaultConfig()) + require.NoError(t, err) + // The field ends up empty — viper saw self-remediation.autobahn-config-file + // instead of the top-level key mapstructure was looking for. + require.Empty(t, cfg.AutobahnConfigFile) +} + +// TestRenderedTemplateAutobahnKeysAtTopLevel verifies that the freshly +// rendered config template puts autobahn-config-file at top level (i.e. +// above every [section] header). Pure structural check — guards against +// future edits accidentally moving it back under a section. +func TestRenderedTemplateAutobahnKeysAtTopLevel(t *testing.T) { + tmpDir := t.TempDir() + tmconfig.EnsureRoot(tmpDir) + require.NoError(t, tmconfig.WriteConfigFile(tmpDir, tmconfig.DefaultConfig())) + + data, err := os.ReadFile(filepath.Join(tmpDir, "config", "config.toml")) + require.NoError(t, err) + rendered := string(data) + + const key = "autobahn-config-file" + keyIdx := strings.Index(rendered, key) + require.NotEqual(t, -1, keyIdx, "key %q must appear in rendered template", key) + // Find the nearest [section] header above keyIdx, if any. + preamble := rendered[:keyIdx] + if lastSection := strings.LastIndex(preamble, "\n["); lastSection != -1 { + // There's a [section] above; reject — TOML would nest the key. + t.Fatalf("key %q follows a [section] header (parsed as nested):\n%s", + key, preamble[lastSection:]) + } +} diff --git a/sei-tendermint/config/config.go b/sei-tendermint/config/config.go index ce47df8c84..71cce25dd9 100644 --- a/sei-tendermint/config/config.go +++ b/sei-tendermint/config/config.go @@ -76,7 +76,11 @@ type Config struct { SelfRemediation *SelfRemediationConfig `mapstructure:"self-remediation"` // AutobahnConfigFile is the path to a JSON file containing the Autobahn (GigaRouter) - // configuration. Leave empty to disable Autobahn. + // configuration. Leave empty to disable Autobahn. The autobahn role + // follows the top-level `mode` field: "validator" runs the validator + // path; any other mode runs as a fullnode (loads the committee as a + // routing table and pulls blocks from committee members). A warning is + // logged at startup if mode disagrees with committee membership. AutobahnConfigFile string `mapstructure:"autobahn-config-file"` } diff --git a/sei-tendermint/config/toml.go b/sei-tendermint/config/toml.go index 4a21af33e4..a5951360df 100644 --- a/sei-tendermint/config/toml.go +++ b/sei-tendermint/config/toml.go @@ -141,6 +141,22 @@ genesis-file = "{{ js .BaseConfig.Genesis }}" # Path to the JSON file containing the private key to use for node authentication in the p2p protocol node-key-file = "{{ js .BaseConfig.NodeKey }}" +####################################################################### +### Autobahn Configuration ### +####################################################################### + +# Path to a JSON file containing the Autobahn (GigaRouter) configuration. +# Leave empty to disable Autobahn. The autobahn role follows the top-level +# "mode" field: mode = "validator" runs the validator path; any other mode +# runs as a fullnode that loads the committee for routing only and +# forwards eth_sendRawTransaction to the shard owner. A warning is logged +# at startup if mode disagrees with committee membership. +# +# Placed here (as a top-level key, before any [section] header) so the +# TOML parser sees it at root scope where mapstructure expects it — viper +# would otherwise nest it under the immediately preceding section. +autobahn-config-file = "{{ .AutobahnConfigFile }}" + ####################################################################### ### Advanced Configuration Options ### ####################################################################### @@ -640,10 +656,6 @@ blocks-behind-check-interval = {{ .SelfRemediation.BlocksBehindCheckIntervalSeco # Cooldown between each restart restart-cooldown-seconds = {{ .SelfRemediation.RestartCooldownSeconds }} -# Path to a JSON file containing the Autobahn (GigaRouter) configuration. -# Leave empty to disable Autobahn. -autobahn-config-file = "{{ .AutobahnConfigFile }}" - ` // defaultConfigTemplate combines manual and auto-managed templates for backward compatibility diff --git a/sei-tendermint/internal/autobahn/producer/mempool.go b/sei-tendermint/internal/autobahn/producer/mempool.go index 1927ae69b8..f269ec3910 100644 --- a/sei-tendermint/internal/autobahn/producer/mempool.go +++ b/sei-tendermint/internal/autobahn/producer/mempool.go @@ -68,7 +68,7 @@ func (s *State) EvmNextPendingNonce(addr common.Address) uint64 { return nonce } } - return s.cfg.App.EvmNonce(addr) + return s.app.EvmNonce(addr) } func (s *State) EvmTxByHash(hash common.Hash) (tmtypes.Tx, bool) { @@ -104,7 +104,7 @@ func (s *State) pruneMempool(n types.BlockNumber) { if wantNonce == m.evmNonces[addr] { // Happy path: all account's txs got executed. delete(m.evmNonces, addr) - } else if gotNonce := s.cfg.App.EvmNonce(addr); gotNonce < wantNonce { + } else if gotNonce := s.app.EvmNonce(addr); gotNonce < wantNonce { // Some txs have not been executed - reset account tracking. // NOTE: app execution is not synchronized with mempool, so nonce could have already // proceeded past wantNonce and that is expected. @@ -144,7 +144,7 @@ func (s *State) insertTx(ctx context.Context, tx tmtypes.Tx, waitIfFull bool) (* if uint64(len(tx)) > types.MaxTxsBytesPerBlock { return nil, errTooLarge } - resp, err := s.cfg.App.CheckTxSafe(ctx, &abci.RequestCheckTxV2{Tx: tx}) + resp, err := s.app.CheckTxSafe(ctx, &abci.RequestCheckTxV2{Tx: tx}) if err != nil { return nil, err } @@ -182,7 +182,7 @@ func (s *State) insertTx(ctx context.Context, tx tmtypes.Tx, waitIfFull bool) (* addr := resp.EVMSenderAddress nonce, ok := m.evmNonces[addr] if !ok { - nonce = s.cfg.App.EvmNonce(addr) + nonce = s.app.EvmNonce(addr) } if nonce != resp.EVMNonce { return nil, fmt.Errorf("%w: got %v, want %v", errBadNonce, resp.EVMNonce, nonce) diff --git a/sei-tendermint/internal/autobahn/producer/mempool_test.go b/sei-tendermint/internal/autobahn/producer/mempool_test.go index f1524c1a4e..3b83fc77f7 100644 --- a/sei-tendermint/internal/autobahn/producer/mempool_test.go +++ b/sei-tendermint/internal/autobahn/producer/mempool_test.go @@ -113,10 +113,13 @@ func (a *testApp) Cfg() *Config { MaxGasEstimatedPerBlock: 1000000, MaxTxsPerBlock: types.MaxTxsPerBlock, BlockInterval: time.Hour, - App: proxy.New(a, proxy.NopMetrics()), } } +func (a *testApp) Proxy() *proxy.Proxy { + return proxy.New(a, proxy.NopMetrics()) +} + func (a *testApp) EvmNonce(addr common.Address) uint64 { for inner := range a.inner.Lock() { return inner.nonces[addr] @@ -165,6 +168,7 @@ type testEnv struct { state *State consensus *consensus.State data *data.State + app *proxy.Proxy inner utils.Mutex[*testEnvInner] } @@ -212,7 +216,7 @@ func (env *testEnv) Run(ctx context.Context) error { // Mark block as executed. h := b.Header.Hash() - resp, err := env.state.cfg.App.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{Txs: b.Payload.Txs(), Hash: h[:]}) + resp, err := env.app.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{Txs: b.Payload.Txs(), Hash: h[:]}) if err != nil { return fmt.Errorf("app.FinalizeBlock(): %w", err) } @@ -223,7 +227,7 @@ func (env *testEnv) Run(ctx context.Context) error { })) } -func newTestEnv(rng utils.Rng, cfg *Config) *testEnv { +func newTestEnv(rng utils.Rng, cfg *Config, app *proxy.Proxy) *testEnv { committee, keys := types.GenCommittee(rng, 1) dataState := utils.OrPanic1(data.NewState( &data.Config{Committee: committee}, @@ -237,7 +241,8 @@ func newTestEnv(rng utils.Rng, cfg *Config) *testEnv { return &testEnv{ data: dataState, consensus: consensusState, - state: NewState(cfg, consensusState), + state: NewState(cfg, consensusState, app), + app: app, inner: utils.NewMutex(&testEnvInner{ sequenced: map[common.Address][]*txSpec{}, }), @@ -248,7 +253,7 @@ func TestInsertTx_TooLargeTx(t *testing.T) { ctx := t.Context() rng := utils.TestRng() app := newTestApp() - env := newTestEnv(rng, app.Cfg()) + env := newTestEnv(rng, app.Cfg(), app.Proxy()) // Tx with size exceeding block limit. tx := utils.GenBytes(rng, int(types.MaxTxsBytesPerBlock+1)) // Should be rejected by mempool. @@ -262,7 +267,7 @@ func TestInsertTx_GasWantedExceeded(t *testing.T) { rng := utils.TestRng() app := newTestApp() cfg := app.Cfg() - env := newTestEnv(rng, cfg) + env := newTestEnv(rng, cfg, app.Proxy()) // Tx with gas wanted exceeding block limit addr, nonce := app.NewAccount(rng) tx := env.genTx(rng, addr, nonce) @@ -280,7 +285,7 @@ func TestInsertTx_GasEstimatedExceeded(t *testing.T) { cfg := app.Cfg() cfg.MaxGasEstimatedPerBlock = 10000 cfg.MaxGasWantedPerBlock = cfg.MaxGasEstimatedPerBlock * 2 - env := newTestEnv(rng, cfg) + env := newTestEnv(rng, cfg, app.Proxy()) // Tx with gas wanted exceeding block limit addr, nonce := app.NewAccount(rng) tx := env.genTx(rng, addr, nonce) @@ -296,7 +301,7 @@ func TestInsertTx_AppRejectsTx(t *testing.T) { ctx := t.Context() rng := utils.TestRng() app := newTestApp() - env := newTestEnv(rng, app.Cfg()) + env := newTestEnv(rng, app.Cfg(), app.Proxy()) // Construct tx with invalid encoding. tx := utils.GenBytes(rng, 1) _, err := decodeTxSpec(tx) @@ -312,7 +317,7 @@ func TestMempool_BadNonce(t *testing.T) { ctx := t.Context() rng := utils.TestRng() app := newTestApp() - env := newTestEnv(rng, app.Cfg()) + env := newTestEnv(rng, app.Cfg(), app.Proxy()) // Initialize nonce for random account. addr := common.Address(utils.GenBytes(rng, len(common.Address{}))) nonce := uint64(rng.Intn(10000)) @@ -358,8 +363,7 @@ func TestMempool_HappyPath(t *testing.T) { cfg.MaxTxsPerBlock = 20 cfg.MaxGasWantedPerBlock = 100 cfg.MaxGasEstimatedPerBlock = 100 - cfg.App = proxy.New(app, proxy.NopMetrics()) - env := newTestEnv(rng, cfg) + env := newTestEnv(rng, cfg, app.Proxy()) want := utils.NewMutex(map[common.Address][]*txSpec{}) require.NoError(t, scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { s.SpawnBgNamed("env", func() error { return env.Run(ctx) }) @@ -441,7 +445,7 @@ func TestMempool_EvmTxByHash(t *testing.T) { app := newTestApp() cfg := app.Cfg() cfg.BlockInterval = time.Millisecond - env := newTestEnv(rng, cfg) + env := newTestEnv(rng, cfg, app.Proxy()) addr, nonce := app.NewAccount(rng) txs := utils.Slice( diff --git a/sei-tendermint/internal/autobahn/producer/state.go b/sei-tendermint/internal/autobahn/producer/state.go index c2b534dc86..650493166f 100644 --- a/sei-tendermint/internal/autobahn/producer/state.go +++ b/sei-tendermint/internal/autobahn/producer/state.go @@ -28,7 +28,6 @@ type Config struct { // benchmarks with stable throughput, in case execution performance degrades // when overloaded. MaxTxsPerSecond utils.Option[uint64] - App *proxy.Proxy } const minTxGas = 21000 @@ -40,6 +39,7 @@ func (c *Config) maxTxsPerBlock() uint64 { // State is the block producer state. type State struct { cfg *Config + app *proxy.Proxy mempool utils.Watch[*mempool] // consensus state to which published blocks will be reported. consensus *consensus.State @@ -47,11 +47,12 @@ type State struct { // NewState constructs a new block producer state. // Returns an error if the current node is NOT a producer. -func NewState(cfg *Config, consensus *consensus.State) *State { +func NewState(cfg *Config, consensus *consensus.State, app *proxy.Proxy) *State { lane := consensus.Avail().PublicKey() n := consensus.Avail().NextBlock(lane) return &State{ cfg: cfg, + app: app, mempool: utils.NewWatch(&mempool{ capacity: avail.BlocksPerLane, first: n, diff --git a/sei-tendermint/internal/p2p/giga/avail.go b/sei-tendermint/internal/p2p/giga/avail.go index 955c17eccb..d45b807f86 100644 --- a/sei-tendermint/internal/p2p/giga/avail.go +++ b/sei-tendermint/internal/p2p/giga/avail.go @@ -22,7 +22,7 @@ func (x *Service) serverStreamLaneProposals(ctx context.Context, server rpc.Serv if err != nil { return fmt.Errorf("StreamLaneProposalsReqConv.Decode(): %w", err) } - sub := x.state.Avail().SubscribeLaneProposals(req.FirstBlockNumber) + sub := x.validatorState().Avail().SubscribeLaneProposals(req.FirstBlockNumber) for { p, err := sub.Recv(ctx) if err != nil { @@ -42,7 +42,7 @@ func (x *Service) serverStreamLaneVotes(ctx context.Context, server rpc.Server[A return err } _ = reqRaw - sub := x.state.Avail().SubscribeLaneVotes() + sub := x.validatorState().Avail().SubscribeLaneVotes() for { batch, err := sub.RecvBatch(ctx) if err != nil { @@ -64,7 +64,7 @@ func (x *Service) serverStreamAppVotes(ctx context.Context, server rpc.Server[AP return err } _ = reqRaw - sub := x.state.Avail().SubscribeAppVotes() + sub := x.validatorState().Avail().SubscribeAppVotes() for { vote, err := sub.Recv(ctx) if err != nil { @@ -86,9 +86,9 @@ func (x *Service) serverStreamAppQCs(ctx context.Context, server rpc.Server[API] _ = reqRaw next := types.RoadIndex(0) for { - appQC, commitQC, err := x.state.Avail().WaitForAppQC(ctx, next) + appQC, commitQC, err := x.validatorState().Avail().WaitForAppQC(ctx, next) if err != nil { - return fmt.Errorf("x.state.Avail().WaitForAppQC(): %w", err) + return fmt.Errorf("x.validatorState().Avail().WaitForAppQC(): %w", err) } next = appQC.Next() if err := stream.Send(ctx, StreamAppQCsRespConv.Encode(&StreamAppQCsResp{ @@ -105,13 +105,13 @@ func (x *Service) serverStreamCommitQCs(ctx context.Context, server rpc.Server[A return StreamCommitQCs.Serve(ctx, server, func(ctx context.Context, stream rpc.Stream[*apb.CommitQC, *pb.StreamCommitQCsReq]) error { next := types.RoadIndex(0) for { - qc, err := x.state.Avail().CommitQC(ctx, next) + qc, err := x.validatorState().Avail().CommitQC(ctx, next) if err != nil { if errors.Is(err, data.ErrPruned) { - next = x.state.Avail().FirstCommitQC() + next = x.validatorState().Avail().FirstCommitQC() continue } - return fmt.Errorf("x.state.Avail().FirstCommitQC(): %w", err) + return fmt.Errorf("x.validatorState().Avail().FirstCommitQC(): %w", err) } next = qc.Index() + 1 if err := stream.Send(ctx, types.CommitQCConv.Encode(qc)); err != nil { @@ -149,7 +149,7 @@ func (x *Service) clientStreamLaneProposals(ctx context.Context, c rpc.Client[AP /*if got, want := proposal.Msg().Block().Header().Lane(), c.cfg.GetKey(); got != want { return fmt.Errorf("producer = %q, want %q", got, want) }*/ - if err := x.state.Avail().PushBlock(ctx, proposal); err != nil { + if err := x.validatorState().Avail().PushBlock(ctx, proposal); err != nil { return fmt.Errorf("s.PushLaneProposal(): %w", err) } } @@ -173,7 +173,7 @@ func (x *Service) clientStreamLaneVotes(ctx context.Context, c rpc.Client[API]) if err != nil { return fmt.Errorf("LaneVoteConv.Decode(): %w", err) } - if err := x.state.Avail().PushVote(ctx, vote); err != nil { + if err := x.validatorState().Avail().PushVote(ctx, vote); err != nil { return fmt.Errorf("s.PushLaneVote(): %w", err) } } @@ -197,7 +197,7 @@ func (x *Service) clientStreamCommitQCs(ctx context.Context, c rpc.Client[API]) if err != nil { return fmt.Errorf("types.CommitQCConv.Decode(): %w", err) } - if err := x.state.Avail().PushCommitQC(ctx, qc); err != nil { + if err := x.validatorState().Avail().PushCommitQC(ctx, qc); err != nil { return fmt.Errorf("s.PushFirstCommitQC(): %w", err) } } @@ -221,7 +221,7 @@ func (x *Service) clientStreamAppVotes(ctx context.Context, c rpc.Client[API]) e if err != nil { return fmt.Errorf("AppVoteConv.Decode(): %w", err) } - if err := x.state.Avail().PushAppVote(ctx, vote); err != nil { + if err := x.validatorState().Avail().PushAppVote(ctx, vote); err != nil { return fmt.Errorf("s.PushLaneVote(): %w", err) } } @@ -245,7 +245,7 @@ func (x *Service) clientStreamAppQCs(ctx context.Context, c rpc.Client[API]) err if err != nil { return fmt.Errorf("StreamAppQCsRespConv.Decode(): %w", err) } - if err := x.state.Avail().PushAppQC(msg.AppQC, msg.CommitQC); err != nil { + if err := x.validatorState().Avail().PushAppQC(msg.AppQC, msg.CommitQC); err != nil { return fmt.Errorf("s.PushFirstCommitQC(): %w", err) } } diff --git a/sei-tendermint/internal/p2p/giga/consensus.go b/sei-tendermint/internal/p2p/giga/consensus.go index b3069c4dd2..5fe5b91842 100644 --- a/sei-tendermint/internal/p2p/giga/consensus.go +++ b/sei-tendermint/internal/p2p/giga/consensus.go @@ -77,11 +77,11 @@ func (x *Service) clientPing(ctx context.Context, client rpc.Client[API]) error func (x *Service) clientConsensus(ctx context.Context, c rpc.Client[API]) error { return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { // Send updates about new consensus messages. - s.Spawn(func() error { return sendUpdates(ctx, c, x.state.SubscribeProposal()) }) - s.Spawn(func() error { return sendUpdates(ctx, c, x.state.SubscribePrepareVote()) }) - s.Spawn(func() error { return sendUpdates(ctx, c, x.state.SubscribeCommitVote()) }) - s.Spawn(func() error { return sendUpdates(ctx, c, x.state.SubscribeTimeoutVote()) }) - s.Spawn(func() error { return sendUpdates(ctx, c, x.state.SubscribeTimeoutQC()) }) + s.Spawn(func() error { return sendUpdates(ctx, c, x.validatorState().SubscribeProposal()) }) + s.Spawn(func() error { return sendUpdates(ctx, c, x.validatorState().SubscribePrepareVote()) }) + s.Spawn(func() error { return sendUpdates(ctx, c, x.validatorState().SubscribeCommitVote()) }) + s.Spawn(func() error { return sendUpdates(ctx, c, x.validatorState().SubscribeTimeoutVote()) }) + s.Spawn(func() error { return sendUpdates(ctx, c, x.validatorState().SubscribeTimeoutQC()) }) return nil }) } @@ -114,24 +114,24 @@ func (x *Service) serverConsensus(ctx context.Context, server rpc.Server[API]) e } switch req := req.(type) { case *types.ConsensusReqPrepareVote: - if err := x.state.PushPrepareVote(req.Signed); err != nil { - return fmt.Errorf("x.state.PushPrepareVote(): %w", err) + if err := x.validatorState().PushPrepareVote(req.Signed); err != nil { + return fmt.Errorf("x.validatorState().PushPrepareVote(): %w", err) } case *types.ConsensusReqCommitVote: - if err := x.state.PushCommitVote(req.Signed); err != nil { - return fmt.Errorf("x.state.PushCommitVote(): %w", err) + if err := x.validatorState().PushCommitVote(req.Signed); err != nil { + return fmt.Errorf("x.validatorState().PushCommitVote(): %w", err) } case *types.FullTimeoutVote: - if err := x.state.PushTimeoutVote(req); err != nil { - return fmt.Errorf("x.state.PushTimeoutVote(): %w", err) + if err := x.validatorState().PushTimeoutVote(req); err != nil { + return fmt.Errorf("x.validatorState().PushTimeoutVote(): %w", err) } case *types.FullProposal: - if err := x.state.PushProposal(ctx, req); err != nil { - return fmt.Errorf("x.state.PushProposal(): %w", err) + if err := x.validatorState().PushProposal(ctx, req); err != nil { + return fmt.Errorf("x.validatorState().PushProposal(): %w", err) } case *types.TimeoutQC: - if err := x.state.PushTimeoutQC(ctx, req); err != nil { - return fmt.Errorf("x.state.PushTimeoutQC(): %w", err) + if err := x.validatorState().PushTimeoutQC(ctx, req); err != nil { + return fmt.Errorf("x.validatorState().PushTimeoutQC(): %w", err) } default: return fmt.Errorf("unknown consensus request type: %T", req) diff --git a/sei-tendermint/internal/p2p/giga/data.go b/sei-tendermint/internal/p2p/giga/data.go index 3162b26873..7d1b7f4a9f 100644 --- a/sei-tendermint/internal/p2p/giga/data.go +++ b/sei-tendermint/internal/p2p/giga/data.go @@ -22,7 +22,7 @@ func (s *Service) clientStreamFullCommitQCs(ctx context.Context, client rpc.Clie } defer stream.Close() if err := stream.Send(ctx, StreamFullCommitQCsReqConv.Encode(&StreamFullCommitQCsReq{ - NextBlock: s.state.Data().NextBlock(), + NextBlock: s.data.NextBlock(), })); err != nil { return fmt.Errorf("stream.Send(): %w", err) } @@ -36,7 +36,7 @@ func (s *Service) clientStreamFullCommitQCs(ctx context.Context, client rpc.Clie return fmt.Errorf("types.CommitQCConv.Decode(): %w", err) } // TODO: add DoS protection (i.e. that only useful state.Data() has been actually sent). - if err := s.state.Data().PushQC(ctx, qc, nil); err != nil { + if err := s.data.PushQC(ctx, qc, nil); err != nil { return fmt.Errorf("s.PushCommitQC(): %w", err) } } @@ -49,6 +49,12 @@ const MaxConcurrentBlockFetches = 100 // BlockFetchTimeout after which the block fetch RPC is considered failed and needs to be retried. const BlockFetchTimeout = 2 * time.Second +// BlockFetchRetryInterval bounds how often runBlockFetcher resends a +// GetBlock for the same height when the chosen peer doesn't have it yet +// (empty Option response). Prevents a tight retry loop when our peer +// happens to be a fullnode that's also catching up. +const BlockFetchRetryInterval = 1 * time.Second + type req struct { n types.GlobalBlockNumber done chan struct{} @@ -84,9 +90,12 @@ func (s *Service) clientGetBlock(ctx context.Context, client rpc.Client[API]) er } b, ok := block.Get() if !ok { + // Peer doesn't have block n yet (e.g. they're a fullnode + // catching up too). runBlockFetcher's outer loop will + // retry after BlockFetchRetryInterval. return nil } - if err := s.state.Data().PushBlock(ctx, req.n, b); err != nil { + if err := s.data.PushBlock(ctx, req.n, b); err != nil { return fmt.Errorf("s.PushBlock(): %w", err) } return nil @@ -99,9 +108,9 @@ func (s *Service) clientGetBlock(ctx context.Context, client rpc.Client[API]) er func (x *Service) runBlockFetcher(ctx context.Context) error { sem := utils.NewSemaphore(MaxConcurrentBlockFetches) return scope.Run(ctx, func(ctx context.Context, scope scope.Scope) error { - for n := x.state.Data().NextBlock(); ; n += 1 { + for n := x.data.NextBlock(); ; n += 1 { // Wait for the QC. - if _, err := x.state.Data().QC(ctx, n); err != nil { + if _, err := x.data.QC(ctx, n); err != nil { return err } release, err := sem.Acquire(ctx) @@ -110,10 +119,18 @@ func (x *Service) runBlockFetcher(ctx context.Context) error { } scope.Spawn(func() error { defer release() - for { - if _, err := x.state.Data().TryBlock(n); !errors.Is(err, data.ErrNotFound) { + for first := true; ; first = false { + if _, err := x.data.TryBlock(n); !errors.Is(err, data.ErrNotFound) { return nil } + // Back off between repeated requests for the same block — + // avoids hammering a peer that responded with an empty + // block (doesn't have it yet). + if !first { + if err := utils.Sleep(ctx, BlockFetchRetryInterval); err != nil { + return err + } + } req := req{n: n, done: make(chan struct{})} if err := utils.Send(ctx, x.getBlockReqs, req); err != nil { return err @@ -139,9 +156,9 @@ func (s *Service) serverStreamFullCommitQCs(ctx context.Context, server rpc.Serv } prev := utils.None[*types.FullCommitQC]() for i := req.NextBlock; ; i++ { - qc, err := s.state.Data().QC(ctx, i) + qc, err := s.data.QC(ctx, i) if err != nil { - return fmt.Errorf("s.state.QC(): %w", err) + return fmt.Errorf("s.data.QC(): %w", err) } // Don't send the same QC twice. if types.NextIndexOpt(prev) > qc.Index() { @@ -165,7 +182,7 @@ func (x *Service) serverGetBlock(ctx context.Context, server rpc.Server[API]) er if err != nil { return fmt.Errorf("GetBlockReqConv.Decode(): %w", err) } - block, err := x.state.Data().TryBlock(req.GlobalNumber) + block, err := x.data.TryBlock(req.GlobalNumber) resp := utils.None[*types.Block]() if err == nil { resp = utils.Some(block) diff --git a/sei-tendermint/internal/p2p/giga/service.go b/sei-tendermint/internal/p2p/giga/service.go index 84e1fa48d2..7ce831e877 100644 --- a/sei-tendermint/internal/p2p/giga/service.go +++ b/sei-tendermint/internal/p2p/giga/service.go @@ -2,19 +2,64 @@ package giga import ( "context" + "fmt" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/data" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/rpc" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" ) +// Service serves the giga RPC API. NewService builds a full validator +// service (all streams); NewBlockSyncService builds a block-sync-only +// service (StreamFullCommitQCs + GetBlock) for fullnodes. state is None +// on block-sync-only services; the consensus / avail handlers reach it +// via validatorState() and panic if invoked outside RunServer / RunClient. type Service struct { getBlockReqs chan req - state *consensus.State + data *data.State + state utils.Option[*consensus.State] } func NewService(state *consensus.State) *Service { - return &Service{make(chan req), state} + return &Service{ + getBlockReqs: make(chan req), + data: state.Data(), + state: utils.Some(state), + } +} + +// NewBlockSyncService constructs a Service that only serves and consumes +// block-sync streams (no consensus / avail). +func NewBlockSyncService(d *data.State) *Service { + return &Service{ + getBlockReqs: make(chan req), + data: d, + } +} + +// RunInbound dispatches an inbound peer to the right handler set. +// Non-committee peers get the block-sync subset. Committee peers get the +// full RunServer on a validator (state present); on a non-validator the +// connection is refused — committee members shouldn't be dialing +// fullnodes in any healthy configuration, and we don't want a stale +// autobahn.json entry to take down RPC nodes via a reachable panic. +func (x *Service) RunInbound(ctx context.Context, server rpc.Server[API], isCommittee bool) error { + if !isCommittee { + return x.RunBlockSyncServer(ctx, server) + } + if !x.state.IsPresent() { + return fmt.Errorf("committee peer dialed a non-validator service") + } + return x.RunServer(ctx, server) +} + +// validatorState unwraps state for the validator-only handlers. Panics if +// called from a block-sync-only Service — which is structurally impossible +// because those handlers are only spawned by RunServer / RunClient. +func (x *Service) validatorState() *consensus.State { + return x.state.OrPanic("Service.state called from block-sync-only mode") } func (x *Service) Run(ctx context.Context) error { @@ -50,3 +95,25 @@ func (x *Service) RunClient(ctx context.Context, client rpc.Client[API]) error { return nil }) } + +// RunBlockSyncServer spawns only the block-sync server handlers. Used on +// both validator and fullnode inbound connections from non-committee peers. +func (x *Service) RunBlockSyncServer(ctx context.Context, server rpc.Server[API]) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + s.Spawn(func() error { return x.serverPing(ctx, server) }) + s.Spawn(func() error { return x.serverStreamFullCommitQCs(ctx, server) }) + s.Spawn(func() error { return x.serverGetBlock(ctx, server) }) + return nil + }) +} + +// RunBlockSyncClient spawns only the block-sync client handlers. Used by +// fullnodes dialing committee members. +func (x *Service) RunBlockSyncClient(ctx context.Context, client rpc.Client[API]) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + s.Spawn(func() error { return x.clientPing(ctx, client) }) + s.Spawn(func() error { return x.clientStreamFullCommitQCs(ctx, client) }) + s.Spawn(func() error { return x.clientGetBlock(ctx, client) }) + return nil + }) +} diff --git a/sei-tendermint/internal/p2p/giga_router.go b/sei-tendermint/internal/p2p/giga_router.go index 66b94d7096..a66536fc55 100644 --- a/sei-tendermint/internal/p2p/giga_router.go +++ b/sei-tendermint/internal/p2p/giga_router.go @@ -2,458 +2,67 @@ package p2p import ( "context" - "errors" "fmt" - "maps" "net/url" - "slices" "time" "github.com/ethereum/go-ethereum/common" - abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" - "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/data" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/giga" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/rpc" - tmbytes "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" - "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" "github.com/sei-protocol/sei-chain/sei-tendermint/types" - "github.com/sei-protocol/sei-chain/sei-tendermint/version" ) type GigaNodeAddr struct { Key NodePublicKey HostPort tcp.HostPort - EVMRPC utils.Option[*url.URL] + EVMRPC *url.URL } func (a GigaNodeAddr) String() string { return fmt.Sprintf("%v@%v", a.Key, a.HostPort) } -type GigaRouterConfig struct { +// GigaRouterCommonConfig is the slice of giga config shared by both +// validator and fullnode constructors. +type GigaRouterCommonConfig struct { DialInterval time.Duration ValidatorAddrs map[atypes.PublicKey]GigaNodeAddr - Consensus *consensus.Config - Producer *producer.Config GenDoc *types.GenesisDoc -} - -type GigaRouter struct { - cfg *GigaRouterConfig - key NodeSecretKey - data *data.State - producer *producer.State - consensus *consensus.State - service *giga.Service - poolIn *giga.Pool[NodePublicKey, rpc.Server[giga.API]] - poolOut *giga.Pool[NodePublicKey, rpc.Client[giga.API]] - - // lastCommitQCRecv is subscribed once at construction and reused for the - // lifetime of the GigaRouter. Load() is lock-free (a single - // atomic.Pointer.Load). - // - // Staleness-safety: the receiver points at the same atomicWatch held inside - // avail.inner.latestCommitQC — a value field on a heap-allocated *inner - // that is never replaced for the lifetime of the State, only Store()d - // into. Every Load therefore observes the most recent Store. A - // reconstructed avail.State (only on process restart) would also - // reconstruct this GigaRouter, so the receiver can't outlive its watch. - lastCommitQCRecv utils.AtomicRecv[utils.Option[*atypes.CommitQC]] -} - -func NewGigaRouter(cfg *GigaRouterConfig, key NodeSecretKey) (*GigaRouter, error) { - if cfg.GenDoc.InitialHeight < 1 { - return nil, fmt.Errorf("GenDoc.InitialHeight = %v, want >=1", cfg.GenDoc.InitialHeight) - } - committee, err := atypes.NewRoundRobinElection( - slices.Collect(maps.Keys(cfg.ValidatorAddrs)), - atypes.GlobalBlockNumber(cfg.GenDoc.InitialHeight), // nolint:gosec // verified to be positive. - cfg.GenDoc.GenesisTime, - ) - if err != nil { - return nil, fmt.Errorf("atypes.NewRoundRobinElection(): %w", err) - } - // Automated pruning is disabled, because it is controlled by the application. - // The data WAL piggybacks on Consensus.PersistentStateDir: the two layers - // share the same on-disk root and write to distinct subdirectories under - // it (inner / blocks / commitqcs for consensus, globalblocks / - // fullcommitqcs for data). - // - // TODO(autobahn): once sei-db/ledger_db/block.BlockDB has a writer wired - // (see BlockByNumber's TODO), the data layer's WAL is redundant — - // BlockDB is the long-term home for the block read path and survives - // process restarts on its own. At that point this NewDataWAL call can - // drop the directory and become a no-op. - dataWAL, err := data.NewDataWAL(cfg.Consensus.PersistentStateDir, committee) - if err != nil { - return nil, fmt.Errorf("data.NewDataWAL(): %w", err) - } - dataState, err := data.NewState(&data.Config{Committee: committee}, dataWAL) - if err != nil { - return nil, fmt.Errorf("data.NewState(): %w", err) - } - consensusState, err := consensus.NewState(cfg.Consensus, dataState) - if err != nil { - return nil, fmt.Errorf("consensus.NewState(): %w", err) - } - producerState := producer.NewState(cfg.Producer, consensusState) - logger.Info("GigaRouter initialized", "validators", len(cfg.ValidatorAddrs), "dial_interval", cfg.DialInterval) - return &GigaRouter{ - cfg: cfg, - key: key, - data: dataState, - consensus: consensusState, - producer: producerState, - service: giga.NewService(consensusState), - poolIn: giga.NewPool[NodePublicKey, rpc.Server[giga.API]](), - poolOut: giga.NewPool[NodePublicKey, rpc.Client[giga.API]](), - - // Subscribe once here (takes avail's internal lock once); subsequent - // Load() calls from RPC handlers are lock-free atomic pointer reads. - lastCommitQCRecv: consensusState.Avail().LastCommitQC(), - }, nil -} - -func (r *GigaRouter) InsertTx(ctx context.Context, tx types.Tx) (*abci.ResponseCheckTx, error) { - return r.producer.InsertTx(ctx, tx) -} - -// Mempool exposes Autobahn's producer-backed mempool surface to callers that -// need features not shared with CometBFT's TxMempool. -func (r *GigaRouter) Mempool() *producer.State { - return r.producer -} - -// LastCommittedBlockNumber returns the highest global block number finalized -// by consensus (derived from the latest CommitQC). When no CommitQC has been -// recorded yet, atypes.GlobalRangeOpt returns the committee's empty default -// range {First: FirstBlock, Next: FirstBlock}, so this returns FirstBlock-1. -// Safe for high-frequency callers — uses a cached lock-free receiver; no -// locks taken on this path. -func (r *GigaRouter) LastCommittedBlockNumber() int64 { - // GlobalRange is a half-open [First, Next) interval; the highest - // committed block number is Next-1. - gr := atypes.GlobalRangeOpt(r.lastCommitQCRecv.Load(), r.data.Committee()) - return int64(gr.Next) - 1 // nolint:gosec // gr.Next is uint64 but bounded by actual chain height. -} - -// MaxGasEstimatedPerBlock . -func (r *GigaRouter) MaxGasEstimatedPerBlock() uint64 { - return r.cfg.Producer.MaxGasEstimatedPerBlock -} - -// BlockByNumber returns the finalized global block at height n translated -// into the CometBFT coretypes.ResultBlock shape. This lets consumers -// (notably evmrpc, which wraps receipts/logs with block context) keep -// working under Autobahn without CometBFT's BlockStore being populated. -// -// Fields populated when the underlying GlobalBlock is well-formed: -// BlockID.Hash (Autobahn lane-block header hash — the same bytes passed to -// app.FinalizeBlock's Hash param, which the EVM receipt store records as -// blockHash), Block.Header.ChainID/Height/Time, Block.Data.Txs. Other -// fields (AppHash, ProposerAddress, LastCommit, …) stay at zero values — -// evmrpc does not read them on the receipt path. If gb.Header is nil -// BlockID.Hash also stays empty; if gb.Payload is nil Block.Data.Txs -// stays empty (see the malformed-block handling below). -// -// TODO(autobahn): switch this to read from sei-db/ledger_db/block.BlockDB -// once a writer is wired (e.g. from app.FinalizeBlocker or executeBlock). -// Today no production code calls BlockDB.WriteBlock, so Autobahn's in-memory -// data.State is the only place a full block lives — but it's pruned per -// Sei's RetainHeight and exposes only a height index (no GetBlockByHash). -// BlockDB has the right shape (height + hash indexes, async pruning) and -// is the long-term home for this read path. -func (r *GigaRouter) BlockByNumber(ctx context.Context, n atypes.GlobalBlockNumber) (*coretypes.ResultBlock, error) { - gb, err := r.data.GlobalBlock(ctx, n) - if err != nil { - // Map Autobahn's pruning sentinel to CometBFT's, so callers - // (env.Block, evmrpc, ops tooling) get the same error type they - // already handle on the CometBFT path. base is None because the - // active lower bound (data.State.inner.first) is internal to - // data.State; both call sites format through the same helper. - if errors.Is(err, data.ErrPruned) { - return nil, coretypes.WrapErrHeightNotAvailable(utils.Clamp[int64](n), utils.None[int64]()) - } - return nil, fmt.Errorf("data.GlobalBlock(%v): %w", n, err) - } - return r.translateGlobalBlock(gb), nil -} - -// BlockByHash returns the finalized global block keyed by Autobahn block- -// header hash, translated into the CometBFT coretypes.ResultBlock shape -// (same translation as BlockByNumber). Matches CometBFT semantics for -// unknown hashes: returns &ResultBlock{Block: nil} with no error. -// -// Lookup-and-construct happens under a single data.State lock acquire, so -// the returned block matches the requested hash atomically. Hashes below -// the pruning watermark are not indexed and read as "unknown". Wrong-size -// inputs are rejected at the call site (env.BlockByHash) so this method -// can stay strongly typed on atypes.BlockHeaderHash. -// -// TODO(autobahn): replace this with a direct read from -// sei-db/ledger_db/block.BlockDB.GetBlockByHash once a writer is wired into -// block execution. The data.State-side index can also go away at that point. -func (r *GigaRouter) BlockByHash(ctx context.Context, hash atypes.BlockHeaderHash) (*coretypes.ResultBlock, error) { - opt, err := r.data.GlobalBlockByHash(hash) - if err != nil { - return nil, fmt.Errorf("data.GlobalBlockByHash: %w", err) - } - // Reject the unknown-hash case here so translateGlobalBlock can rely - // on the *GlobalBlock type contract (non-nil, with non-nil Header - // and Payload) — same way executeBlock dereferences b.Header - // without checking. Mirrors CometBFT's BlockStore.LoadBlockByHash - // returning &ResultBlock{Block: nil} for an unknown hash. - gb, ok := opt.Get() - if !ok { - return &coretypes.ResultBlock{}, nil - } - return r.translateGlobalBlock(gb), nil -} - -// translateGlobalBlock converts an Autobahn GlobalBlock to the CometBFT -// coretypes.ResultBlock shape used by env.Block / env.BlockByHash and -// downstream evmrpc consumers. Caller must pass a non-nil *GlobalBlock with -// non-nil Header and Payload — that's the contract data.State guarantees on -// a successful lookup, and matches how executeBlock dereferences b.Header -// without a nil-check on the same type. The "no such block" case is -// rejected at the BlockByHash call site before delegating here. -// -// LastCommit is non-nil with empty Signatures, mirroring executeBlock's -// FinalizeBlock call which passes an empty abci.CommitInfo. Under Autobahn -// the committee is fixed by genesis (no validator-set updates), so the -// application is not in control of jailing — surfacing N "absent sig" -// entries here would make trace replay's BeginBlock bump missed-block -// counters and diverge from production. ToReqBeginBlock skips the per- -// validator loop when Signatures is empty, so empty Votes flow into -// distribution/slashing on both paths. -func (r *GigaRouter) translateGlobalBlock(gb *atypes.GlobalBlock) *coretypes.ResultBlock { - srcTxs := gb.Payload.Txs() - tmTxs := make(types.Txs, len(srcTxs)) - for i, tx := range srcTxs { - tmTxs[i] = tx - } - h := gb.Header.Hash() - return &coretypes.ResultBlock{ - BlockID: types.BlockID{Hash: tmbytes.HexBytes(h.Bytes())}, - Block: &types.Block{ - Header: types.Header{ - ChainID: r.cfg.GenDoc.ChainID, - // Clamp accepts any constraints.Integer for From, so - // gb.GlobalNumber (a typed uint64) goes in directly — no - // intermediate uint64() conversion needed. - Height: utils.Clamp[int64](gb.GlobalNumber), - Time: gb.Timestamp, - }, - Data: types.Data{Txs: tmTxs}, - LastCommit: &types.Commit{}, - }, - } -} - -func (r *GigaRouter) executeBlock(ctx context.Context, b *atypes.GlobalBlock) (*abci.ResponseCommit, error) { - app := r.cfg.Producer.App - hash := b.Header.Hash() - var proposerAddress types.Address - if vals := app.GetValidators(); len(vals) > 0 { - // Deterministically select a proposer from the app's validator committee. - // We need it so that app does not emit error logs. - proposer := slices.MinFunc(vals, func(a, b abci.ValidatorUpdate) int { return a.PubKey.Compare(b.PubKey) }) - key, err := crypto.PubKeyFromProto(proposer.PubKey) - if err != nil { - return nil, fmt.Errorf("crypto.PubKeyFromProto(): %w", err) - } - proposerAddress = key.Address() - } - - // TODO: add metrics to understand execution latency. - resp, err := app.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{ - Txs: b.Payload.Txs(), - // Empty DecidedLastCommit does not indicate missing votes. - DecidedLastCommit: abci.CommitInfo{}, - // WARNING: this is a hash of the autobahn block header. - // It is used to identify block processed optimistically - // and is fed as block hash to EVM contracts. - Hash: hash[:], - Header: (&types.Header{ - ChainID: r.cfg.GenDoc.ChainID, - Height: int64(b.GlobalNumber), // nolint:gosec // different representations of the same value - Time: b.Timestamp, - // WARNING: the reward distribution has corner cases where it forgets the proposer, - // because reward is distributed with a delay. This is not our problem here though. - ProposerAddress: proposerAddress, - }).ToProto(), - }) - if err != nil { - return nil, fmt.Errorf("r.cfg.App.FinalizeBlock(): %w", err) - } - commitResp, err := app.Commit(ctx) - if err != nil { - return nil, fmt.Errorf("r.cfg.App.Commit(): %w", err) - } - if err := r.data.PushAppHash(ctx, b.GlobalNumber, resp.AppHash); err != nil { - return nil, fmt.Errorf("r.data.PushAppHash(%v): %w", b.GlobalNumber, err) - } - return commitResp, nil -} - -func (r *GigaRouter) runExecute(ctx context.Context) error { - app := r.cfg.Producer.App - - info, err := app.Info(ctx, &version.RequestInfo) - if err != nil { - return fmt.Errorf("App.Info(): %w", err) - } - last, ok := utils.SafeCast[atypes.GlobalBlockNumber](info.LastBlockHeight) - if !ok { - return fmt.Errorf("invalid info.LastBlockHeight = %v", info.LastBlockHeight) - } - next := last + 1 - if last == 0 { - // Fresh start: the CometBFT handshaker is skipped in giga mode - // (see node.go: shouldHandshake = !stateSync && !gigaEnabled), so - // nobody has called InitChain yet. Call it here ourselves; this sets - // up the app's deliverState (matching real SDK: InitChain leaves - // deliverState populated with no intermediate Commit, so the first - // FinalizeBlock below runs against it). - // - // On restart (last > 0, below), InitChain must NOT be called again; - // the app's committed CMS already holds the latest state, and - // BaseApp.FinalizeBlock rebuilds deliverState from it via its - // nil-check fallback. - // - // Note: if a process crashed after InitChain but before the first - // Commit, LastBlockHeight is still 0 and we enter this branch again - // on restart. Re-calling InitChain is safe in that case because - // nothing was committed — it behaves as a fresh init. - if _, err := app.InitChain(ctx, r.cfg.GenDoc.ToRequestInitChain()); err != nil { - return fmt.Errorf("App.InitChain(): %w", err) - } - var ok bool - next, ok = utils.SafeCast[atypes.GlobalBlockNumber](r.cfg.GenDoc.InitialHeight) - if !ok { - return fmt.Errorf("invalid GenDoc.InitialHeight = %v", r.cfg.GenDoc.InitialHeight) - } - } else { - // NOTE that with the current implementation losing prefix of appHashes on crash is fine: - // if everyone votes on apphashes of a suffix of finalized blocks, then AppQC will be reached. - if err := r.data.PushAppHash(ctx, last, info.LastBlockAppHash); err != nil { - return fmt.Errorf("r.data.PushAppHash(): %w", err) - } - } - - for n := next; ; n += 1 { - b, err := r.data.GlobalBlock(ctx, n) - if err != nil { - return fmt.Errorf("r.data.GlobalBlock(%v): %w", n, err) - } - commitResp, err := r.executeBlock(ctx, b) - if err != nil { - return fmt.Errorf("r.executeBlock(%v): %w", n, err) - } - pruneBefore, ok := utils.SafeCast[atypes.GlobalBlockNumber](commitResp.RetainHeight) - if !ok { - return fmt.Errorf("invalid commitResp.RetainHeight = %v", commitResp.RetainHeight) - } - if err := r.data.PruneBefore(pruneBefore); err != nil { - return fmt.Errorf("r.data.PruneBefore(%v): %w", pruneBefore, err) - } - } -} - -func (r *GigaRouter) Run(ctx context.Context) error { - return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { - // Spawn outbound connections dialing. - for _, addr := range r.cfg.ValidatorAddrs { - s.Spawn(func() error { - for { - err := r.dialAndRunConn(ctx, addr.Key, addr.HostPort) - logger.Info("giga connection failed", "addr", addr, "err", err) - if err := utils.Sleep(ctx, r.cfg.DialInterval); err != nil { - return err - } - } - }) - } - s.SpawnNamed("data", func() error { return r.data.Run(ctx) }) - s.SpawnNamed("consensus", func() error { return r.consensus.Run(ctx) }) - s.SpawnNamed("producer", func() error { return r.producer.Run(ctx) }) - s.SpawnNamed("execute", func() error { return r.runExecute(ctx) }) - s.SpawnNamed("service", func() error { return r.service.Run(ctx) }) - return nil - }) -} - -func (r *GigaRouter) dialAndRunConn(ctx context.Context, key NodePublicKey, hp tcp.HostPort) error { - addrs, err := hp.Resolve(ctx) - if err != nil { - return fmt.Errorf("%v.Resolve(): %w", hp, err) - } - if len(addrs) == 0 { - return fmt.Errorf("%v.Resolve() = []", hp) - } - return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { - tcpConn, err := tcp.Dial(ctx, addrs[0]) - if err != nil { - return fmt.Errorf("tcp.Dial(%v): %w", addrs[0], err) - } - s.SpawnBg(func() error { return tcpConn.Run(ctx) }) - // TODO: handshake needs a timeout. - hConn, err := handshake(ctx, tcpConn, r.key, handshakeSpec{SeiGigaConnection: true}) - if err != nil { - return fmt.Errorf("handshake(): %w", err) - } - if !hConn.msg.SeiGigaConnection { - return fmt.Errorf("not a sei giga connection") - } - if got := hConn.msg.NodeAuth.Key(); got != key { - return fmt.Errorf("peer key = %v, want %v", got, key) - } - client := rpc.NewClient[giga.API]() - return r.poolOut.InsertAndRun(ctx, key, client, func(ctx context.Context) error { - return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { - s.Spawn(func() error { return client.Run(ctx, hConn.conn) }) - return r.service.RunClient(ctx, client) - }) - }) - }) -} - -func (r *GigaRouter) RunInboundConn(ctx context.Context, hConn *handshakedConn) error { - if !hConn.msg.SeiGigaConnection { - return fmt.Errorf("not a SeiGiga connection") - } - // Filter unwanded connections. - key := hConn.msg.NodeAuth.Key() - ok := false - for _, addr := range r.cfg.ValidatorAddrs { - if addr.Key == key { - ok = true - break - } - } - if !ok { - return fmt.Errorf("peer not whitelisted") - } - server := rpc.NewServer[giga.API]() - return r.poolIn.InsertAndRun(ctx, key, server, func(ctx context.Context) error { - return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { - s.Spawn(func() error { return server.Run(ctx, hConn.conn) }) - return r.service.RunServer(ctx, server) - }) - }) -} - -func (r *GigaRouter) EvmProxy(sender common.Address) (*url.URL, bool) { - shardValidator := r.data.Committee().EvmShard(sender) - if r.cfg.Consensus.Key.Public() == shardValidator { - return nil, false - } - return r.cfg.ValidatorAddrs[shardValidator].EVMRPC.Get() + // PersistentStateDir is the on-disk root for the data WAL (and the + // validator's consensus persister in a sibling subdir). None ⇒ in-memory. + PersistentStateDir utils.Option[string] + // App is the ABCI proxy executeBlock drives. NewGigaValidatorRouter + // also passes it to producer.NewState so the producer's internal + // mempool drives the same proxy. + App *proxy.Proxy + // MaxInboundFullnodePeers caps inbound block-sync from non-committee + // peers. 0 rejects all; positive caps at n, up to maxInboundFullnodePeers. + MaxInboundFullnodePeers int +} + +// GigaValidatorConfig configures a committee-member GigaRouter. +type GigaValidatorConfig struct { + GigaRouterCommonConfig + ValidatorKey atypes.SecretKey + ViewTimeout func(atypes.View) time.Duration + Producer *producer.Config +} + +// GigaRouter is the read-path / Run / EvmProxy surface. Implemented by +// *gigaValidatorRouter and *gigaFullnodeRouter; Mempool returns Some only +// on validators. RunInboundConn is served by both — non-committee peers +// get the block-sync subset only. +type GigaRouter interface { + Run(ctx context.Context) error + RunInboundConn(ctx context.Context, hConn *handshakedConn) error + LastCommittedBlockNumber() int64 + MaxGasEstimatedPerBlock() uint64 + BlockByNumber(ctx context.Context, n atypes.GlobalBlockNumber) (*coretypes.ResultBlock, error) + BlockByHash(ctx context.Context, hash atypes.BlockHeaderHash) (*coretypes.ResultBlock, error) + EvmProxy(sender common.Address) utils.Option[*url.URL] + Mempool() utils.Option[*producer.State] } diff --git a/sei-tendermint/internal/p2p/giga_router_common.go b/sei-tendermint/internal/p2p/giga_router_common.go new file mode 100644 index 0000000000..9ef33db906 --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_common.go @@ -0,0 +1,398 @@ +package p2p + +import ( + "context" + "errors" + "fmt" + "maps" + "net/url" + "slices" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common" + abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" + atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/data" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/giga" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/rpc" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" + tmbytes "github.com/sei-protocol/sei-chain/sei-tendermint/libs/bytes" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" + "github.com/sei-protocol/sei-chain/sei-tendermint/rpc/coretypes" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/version" +) + +// maxInboundFullnodePeers caps GigaRouterCommonConfig.MaxInboundFullnodePeers. +// Per-peer cost (~50-100 KB resident + ~8 goroutines + 1 fd) and NIC +// bandwidth bind well before this. Shard via an edge-fullnode tier if +// you need more. +const maxInboundFullnodePeers = 10000 + +type gigaRouterCommon struct { + cfg *GigaRouterCommonConfig + key NodeSecretKey + data *data.State + service *giga.Service + poolIn *giga.Pool[NodePublicKey, rpc.Server[giga.API]] + poolOut *giga.Pool[NodePublicKey, rpc.Client[giga.API]] + app *proxy.Proxy + + // inboundFullnodeCount tracks live non-committee inbound block-sync + // connections. Optimistic Add(1) + compare against cap; over-rejects + // by one or two under contention but never over-accepts. + inboundFullnodeCount atomic.Int64 + inboundFullnodeCap int64 +} + +// buildDataState validates the common config and constructs the data +// layer (committee, WAL, State) shared by both giga constructors. +// +// TODO(autobahn): once sei-db/ledger_db/block.BlockDB has a writer wired +// (see BlockByNumber's TODO), the data WAL is redundant. +func buildDataState(cfg *GigaRouterCommonConfig) (*data.State, error) { + if cfg.GenDoc.InitialHeight < 1 { + return nil, fmt.Errorf("GenDoc.InitialHeight = %v, want >=1", cfg.GenDoc.InitialHeight) + } + if cfg.DialInterval <= 0 { + return nil, fmt.Errorf("GigaRouterCommonConfig.DialInterval = %v, want > 0", cfg.DialInterval) + } + if cfg.MaxInboundFullnodePeers < 0 || cfg.MaxInboundFullnodePeers > maxInboundFullnodePeers { + return nil, fmt.Errorf("GigaRouterCommonConfig.MaxInboundFullnodePeers = %v, want 0..%v", cfg.MaxInboundFullnodePeers, maxInboundFullnodePeers) + } + committee, err := atypes.NewRoundRobinElection( + slices.Collect(maps.Keys(cfg.ValidatorAddrs)), + atypes.GlobalBlockNumber(cfg.GenDoc.InitialHeight), // nolint:gosec // verified to be positive. + cfg.GenDoc.GenesisTime, + ) + if err != nil { + return nil, fmt.Errorf("atypes.NewRoundRobinElection(): %w", err) + } + dataWAL, err := data.NewDataWAL(cfg.PersistentStateDir, committee) + if err != nil { + return nil, fmt.Errorf("data.NewDataWAL(): %w", err) + } + dataState, err := data.NewState(&data.Config{Committee: committee}, dataWAL) + if err != nil { + return nil, fmt.Errorf("data.NewState(): %w", err) + } + return dataState, nil +} + +func (r *gigaRouterCommon) LastCommittedBlockNumber() int64 { + return r.app.LastBlockHeight() +} + +// MaxGasEstimatedPerBlock reflects the network-wide block gas budget. Both +// roles ultimately resolve to genDoc.ConsensusParams.Block.MaxGas — the +// validator's producer.Config.MaxGasEstimatedPerBlock is also sourced from +// it at setup time, so read directly from genDoc here and skip the cache. +func (r *gigaRouterCommon) MaxGasEstimatedPerBlock() uint64 { + return r.cfg.GenDoc.ConsensusParams.Block.MaxGasUint64() +} + +// BlockByNumber returns the finalized global block at height n translated +// into the CometBFT coretypes.ResultBlock shape. This lets consumers +// (notably evmrpc, which wraps receipts/logs with block context) keep +// working under Autobahn without CometBFT's BlockStore being populated. +// +// Fields populated when the underlying GlobalBlock is well-formed: +// BlockID.Hash (Autobahn lane-block header hash — the same bytes passed to +// app.FinalizeBlock's Hash param, which the EVM receipt store records as +// blockHash), Block.Header.ChainID/Height/Time, Block.Data.Txs. Other +// fields (AppHash, ProposerAddress, LastCommit, …) stay at zero values — +// evmrpc does not read them on the receipt path. If gb.Header is nil +// BlockID.Hash also stays empty; if gb.Payload is nil Block.Data.Txs +// stays empty (see the malformed-block handling below). +// +// TODO(autobahn): switch this to read from sei-db/ledger_db/block.BlockDB +// once a writer is wired (e.g. from app.FinalizeBlocker or executeBlock). +// Today no production code calls BlockDB.WriteBlock, so Autobahn's in-memory +// data.State is the only place a full block lives — but it's pruned per +// Sei's RetainHeight and exposes only a height index (no GetBlockByHash). +// BlockDB has the right shape (height + hash indexes, async pruning) and +// is the long-term home for this read path. +func (r *gigaRouterCommon) BlockByNumber(ctx context.Context, n atypes.GlobalBlockNumber) (*coretypes.ResultBlock, error) { + gb, err := r.data.GlobalBlock(ctx, n) + if err != nil { + // Map Autobahn's pruning sentinel to CometBFT's, so callers + // (env.Block, evmrpc, ops tooling) get the same error type they + // already handle on the CometBFT path. base is None because the + // active lower bound (data.State.inner.first) is internal to + // data.State; both call sites format through the same helper. + if errors.Is(err, data.ErrPruned) { + return nil, coretypes.WrapErrHeightNotAvailable(utils.Clamp[int64](n), utils.None[int64]()) + } + return nil, fmt.Errorf("data.GlobalBlock(%v): %w", n, err) + } + return r.translateGlobalBlock(gb), nil +} + +// BlockByHash returns the finalized global block keyed by Autobahn block- +// header hash, translated into the CometBFT coretypes.ResultBlock shape +// (same translation as BlockByNumber). Matches CometBFT semantics for +// unknown hashes: returns &ResultBlock{Block: nil} with no error. +// +// Lookup-and-construct happens under a single data.State lock acquire, so +// the returned block matches the requested hash atomically. Hashes below +// the pruning watermark are not indexed and read as "unknown". Wrong-size +// inputs are rejected at the call site (env.BlockByHash) so this method +// can stay strongly typed on atypes.BlockHeaderHash. +// +// TODO(autobahn): replace this with a direct read from +// sei-db/ledger_db/block.BlockDB.GetBlockByHash once a writer is wired into +// block execution. The data.State-side index can also go away at that point. +func (r *gigaRouterCommon) BlockByHash(ctx context.Context, hash atypes.BlockHeaderHash) (*coretypes.ResultBlock, error) { + opt, err := r.data.GlobalBlockByHash(hash) + if err != nil { + return nil, fmt.Errorf("data.GlobalBlockByHash: %w", err) + } + // Reject the unknown-hash case here so translateGlobalBlock can rely + // on the *GlobalBlock type contract (non-nil, with non-nil Header + // and Payload) — same way executeBlock dereferences b.Header + // without checking. Mirrors CometBFT's BlockStore.LoadBlockByHash + // returning &ResultBlock{Block: nil} for an unknown hash. + gb, ok := opt.Get() + if !ok { + return &coretypes.ResultBlock{}, nil + } + return r.translateGlobalBlock(gb), nil +} + +// translateGlobalBlock converts an Autobahn GlobalBlock to the CometBFT +// coretypes.ResultBlock shape used by env.Block / env.BlockByHash and +// downstream evmrpc consumers. Caller must pass a non-nil *GlobalBlock with +// non-nil Header and Payload — that's the contract data.State guarantees on +// a successful lookup, and matches how executeBlock dereferences b.Header +// without a nil-check on the same type. The "no such block" case is +// rejected at the BlockByHash call site before delegating here. +// +// LastCommit is non-nil with empty Signatures, mirroring executeBlock's +// FinalizeBlock call which passes an empty abci.CommitInfo. Under Autobahn +// the committee is fixed by genesis (no validator-set updates), so the +// application is not in control of jailing — surfacing N "absent sig" +// entries here would make trace replay's BeginBlock bump missed-block +// counters and diverge from production. ToReqBeginBlock skips the per- +// validator loop when Signatures is empty, so empty Votes flow into +// distribution/slashing on both paths. +func (r *gigaRouterCommon) translateGlobalBlock(gb *atypes.GlobalBlock) *coretypes.ResultBlock { + srcTxs := gb.Payload.Txs() + tmTxs := make(types.Txs, len(srcTxs)) + for i, tx := range srcTxs { + tmTxs[i] = tx + } + h := gb.Header.Hash() + return &coretypes.ResultBlock{ + BlockID: types.BlockID{Hash: tmbytes.HexBytes(h.Bytes())}, + Block: &types.Block{ + Header: types.Header{ + ChainID: r.cfg.GenDoc.ChainID, + // Clamp accepts any constraints.Integer for From, so + // gb.GlobalNumber (a typed uint64) goes in directly — no + // intermediate uint64() conversion needed. + Height: utils.Clamp[int64](gb.GlobalNumber), + Time: gb.Timestamp, + }, + Data: types.Data{Txs: tmTxs}, + LastCommit: &types.Commit{}, + }, + } +} + +func (r *gigaRouterCommon) executeBlock(ctx context.Context, b *atypes.GlobalBlock) (*abci.ResponseCommit, error) { + app := r.app + hash := b.Header.Hash() + var proposerAddress types.Address + if vals := app.GetValidators(); len(vals) > 0 { + // Deterministically select a proposer from the app's validator committee. + // We need it so that app does not emit error logs. + proposer := slices.MinFunc(vals, func(a, b abci.ValidatorUpdate) int { return a.PubKey.Compare(b.PubKey) }) + key, err := crypto.PubKeyFromProto(proposer.PubKey) + if err != nil { + return nil, fmt.Errorf("crypto.PubKeyFromProto(): %w", err) + } + proposerAddress = key.Address() + } + + // TODO: add metrics to understand execution latency. + resp, err := app.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{ + Txs: b.Payload.Txs(), + // Empty DecidedLastCommit does not indicate missing votes. + DecidedLastCommit: abci.CommitInfo{}, + // WARNING: this is a hash of the autobahn block header. + // It is used to identify block processed optimistically + // and is fed as block hash to EVM contracts. + Hash: hash[:], + Header: (&types.Header{ + ChainID: r.cfg.GenDoc.ChainID, + Height: int64(b.GlobalNumber), // nolint:gosec // different representations of the same value + Time: b.Timestamp, + // WARNING: the reward distribution has corner cases where it forgets the proposer, + // because reward is distributed with a delay. This is not our problem here though. + ProposerAddress: proposerAddress, + }).ToProto(), + }) + if err != nil { + return nil, fmt.Errorf("app.FinalizeBlock(): %w", err) + } + commitResp, err := app.Commit(ctx) + if err != nil { + return nil, fmt.Errorf("app.Commit(): %w", err) + } + if err := r.data.PushAppHash(ctx, b.GlobalNumber, resp.AppHash); err != nil { + return nil, fmt.Errorf("r.data.PushAppHash(%v): %w", b.GlobalNumber, err) + } + return commitResp, nil +} + +func (r *gigaRouterCommon) runExecute(ctx context.Context) error { + app := r.app + + info, err := app.Info(ctx, &version.RequestInfo) + if err != nil { + return fmt.Errorf("App.Info(): %w", err) + } + last, ok := utils.SafeCast[atypes.GlobalBlockNumber](info.LastBlockHeight) + if !ok { + return fmt.Errorf("invalid info.LastBlockHeight = %v", info.LastBlockHeight) + } + next := last + 1 + if last == 0 { + // Fresh start: CometBFT handshaker is skipped in giga mode (see + // node.go: shouldHandshake = !stateSync && !gigaEnabled), so we + // call InitChain ourselves. It sets up the app's deliverState + // against which the first FinalizeBlock below runs. + // + // Re-entering on restart (crashed after InitChain, before first + // Commit) is safe — nothing was committed, so it behaves as a + // fresh init. + if _, err := app.InitChain(ctx, r.cfg.GenDoc.ToRequestInitChain()); err != nil { + return fmt.Errorf("App.InitChain(): %w", err) + } + var ok bool + next, ok = utils.SafeCast[atypes.GlobalBlockNumber](r.cfg.GenDoc.InitialHeight) + if !ok { + return fmt.Errorf("invalid GenDoc.InitialHeight = %v", r.cfg.GenDoc.InitialHeight) + } + } else { + // Losing a prefix of appHashes on crash is fine: AppQC is reached + // once everyone votes on apphashes of a suffix of finalized blocks. + if err := r.data.PushAppHash(ctx, last, info.LastBlockAppHash); err != nil { + return fmt.Errorf("r.data.PushAppHash(): %w", err) + } + } + + for n := next; ; n += 1 { + b, err := r.data.GlobalBlock(ctx, n) + if err != nil { + return fmt.Errorf("r.data.GlobalBlock(%v): %w", n, err) + } + commitResp, err := r.executeBlock(ctx, b) + if err != nil { + return fmt.Errorf("r.executeBlock(%v): %w", n, err) + } + pruneBefore, ok := utils.SafeCast[atypes.GlobalBlockNumber](commitResp.RetainHeight) + if !ok { + return fmt.Errorf("invalid commitResp.RetainHeight = %v", commitResp.RetainHeight) + } + if err := r.data.PruneBefore(pruneBefore); err != nil { + return fmt.Errorf("r.data.PruneBefore(%v): %w", pruneBefore, err) + } + } +} + +// dialAndRunConn dials a peer, handshakes as a SeiGiga connection, +// registers the rpc client in poolOut, and runs runClient for the +// connection's lifetime. expectedKey is enforced when Some (validator +// dialing a committee member); fullnodes pass None — block-sync data +// is QC-verified, so the peer's identity doesn't need to be checked +// here. +func (r *gigaRouterCommon) dialAndRunConn( + ctx context.Context, + expectedKey utils.Option[NodePublicKey], + hp tcp.HostPort, + runClient func(ctx context.Context, client rpc.Client[giga.API]) error, +) error { + addrs, err := hp.Resolve(ctx) + if err != nil { + return fmt.Errorf("%v.Resolve(): %w", hp, err) + } + if len(addrs) == 0 { + return fmt.Errorf("%v.Resolve() = []", hp) + } + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + tcpConn, err := tcp.Dial(ctx, addrs[0]) + if err != nil { + return fmt.Errorf("tcp.Dial(%v): %w", addrs[0], err) + } + s.SpawnBg(func() error { return tcpConn.Run(ctx) }) + // TODO: handshake needs a timeout. + hConn, err := handshake(ctx, tcpConn, r.key, handshakeSpec{SeiGigaConnection: true}) + if err != nil { + return fmt.Errorf("handshake(): %w", err) + } + if !hConn.msg.SeiGigaConnection { + return fmt.Errorf("not a sei giga connection") + } + peerKey := hConn.msg.NodeAuth.Key() + if want, ok := expectedKey.Get(); ok && peerKey != want { + return fmt.Errorf("peer key = %v, want %v", peerKey, want) + } + client := rpc.NewClient[giga.API]() + return r.poolOut.InsertAndRun(ctx, peerKey, client, func(ctx context.Context) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + s.Spawn(func() error { return client.Run(ctx, hConn.conn) }) + return runClient(ctx, client) + }) + }) + }) +} + +// RunInboundConn serves an inbound giga connection. Non-committee peers +// get the block-sync subset (StreamFullCommitQCs + GetBlock), capped at +// inboundFullnodeCap. Committee peers get the full RunServer on +// validators; on a fullnode the connection is refused (committee peers +// shouldn't be dialing fullnodes — see Service.RunInbound). +func (r *gigaRouterCommon) RunInboundConn(ctx context.Context, hConn *handshakedConn) error { + if !hConn.msg.SeiGigaConnection { + return fmt.Errorf("not a SeiGiga connection") + } + // Filter unwanded connections. + key := hConn.msg.NodeAuth.Key() + isCommittee := false + for _, addr := range r.cfg.ValidatorAddrs { + if addr.Key == key { + isCommittee = true + break + } + } + if !isCommittee { + // Optimistic acquire: Add(1), compare, Add(-1) on overflow. + if r.inboundFullnodeCount.Add(1) > r.inboundFullnodeCap { + r.inboundFullnodeCount.Add(-1) + return fmt.Errorf("inbound fullnode peer limit (%d) reached", r.inboundFullnodeCap) + } + defer r.inboundFullnodeCount.Add(-1) + } + server := rpc.NewServer[giga.API]() + return r.poolIn.InsertAndRun(ctx, key, server, func(ctx context.Context) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + s.Spawn(func() error { return server.Run(ctx, hConn.conn) }) + if err := r.service.RunInbound(ctx, server, isCommittee); err != nil { + return fmt.Errorf("inbound from %v: %w", key, err) + } + return nil + }) + }) +} + +// EvmProxy returns the shard owner's EVMRPC URL for an EVM tx sender, or +// None if the caller should handle it locally. Overridden on +// *gigaValidatorRouter to short-circuit self-shard sends. +func (r *gigaRouterCommon) EvmProxy(sender common.Address) utils.Option[*url.URL] { + shardValidator := r.data.Committee().EvmShard(sender) + return utils.Some(r.cfg.ValidatorAddrs[shardValidator].EVMRPC) +} diff --git a/sei-tendermint/internal/p2p/giga_router_fullnode.go b/sei-tendermint/internal/p2p/giga_router_fullnode.go new file mode 100644 index 0000000000..afd0e1fa3f --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_fullnode.go @@ -0,0 +1,82 @@ +package p2p + +import ( + "context" + "maps" + "math/rand/v2" + "slices" + + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/giga" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/rpc" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" +) + +type gigaFullnodeRouter struct { + *gigaRouterCommon +} + +func NewGigaFullnodeRouter(cfg *GigaRouterCommonConfig, key NodeSecretKey) (*gigaFullnodeRouter, error) { + dataState, err := buildDataState(cfg) + if err != nil { + return nil, err + } + logger.Info("GigaRouter initialized (fullnode)", "validators", len(cfg.ValidatorAddrs), "dial_interval", cfg.DialInterval, "inbound_fullnode_cap", cfg.MaxInboundFullnodePeers) + return &gigaFullnodeRouter{ + gigaRouterCommon: &gigaRouterCommon{ + cfg: cfg, + key: key, + data: dataState, + service: giga.NewBlockSyncService(dataState), + poolIn: giga.NewPool[NodePublicKey, rpc.Server[giga.API]](), + poolOut: giga.NewPool[NodePublicKey, rpc.Client[giga.API]](), + app: cfg.App, + inboundFullnodeCap: int64(cfg.MaxInboundFullnodePeers), + }, + }, nil +} + +func (r *gigaFullnodeRouter) Mempool() utils.Option[*producer.State] { + return utils.None[*producer.State]() +} + +func (r *gigaFullnodeRouter) Run(ctx context.Context) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + // Single-active subscriber: walk the committee in a stable order, + // move to the next on disconnect. Avoids the N× QC duplication of + // fanning out to every committee member. + // + // TODO(autobahn-fullnode): allow hard-configuring a preferred + // validator (or a subset of trusted validators) instead of walking + // the whole committee. + s.Spawn(func() error { return r.runFullnodeSubscriber(ctx) }) + s.SpawnNamed("data", func() error { return r.data.Run(ctx) }) + s.SpawnNamed("execute", func() error { return r.runExecute(ctx) }) + s.SpawnNamed("service", func() error { return r.service.Run(ctx) }) + return nil + }) +} + +// runFullnodeSubscriber: pick a committee member, dial + block-sync, +// advance on disconnect/reject. Committee list shuffled once at startup +// so multiple fullnodes don't all converge on the same first choice. +// +// TODO(autobahn-state-sync): block sync from a single peer is bounded by +// GetBlock's per-stream rate limit (rpc.Limit{Rate:10, Concurrent:10}) — +// initial catch-up of a fresh node joining an established cluster is +// slow. Long-term fix is autobahn snapshot transfer (CometBFT-style state +// sync). This loop is correct for "fresh cluster" and "restart of a +// near-tip node." +func (r *gigaFullnodeRouter) runFullnodeSubscriber(ctx context.Context) error { + addrs := slices.Collect(maps.Values(r.cfg.ValidatorAddrs)) + rand.Shuffle(len(addrs), func(i, j int) { addrs[i], addrs[j] = addrs[j], addrs[i] }) + for i := 0; ; i = (i + 1) % len(addrs) { + addr := addrs[i] + err := r.dialAndRunConn(ctx, utils.None[NodePublicKey](), addr.HostPort, r.service.RunBlockSyncClient) + logger.Info("fullnode giga connection ended; failing over", "addr", addr, "err", err) + if err := utils.Sleep(ctx, r.cfg.DialInterval); err != nil { + return err + } + } +} diff --git a/sei-tendermint/internal/p2p/giga_router_fullnode_test.go b/sei-tendermint/internal/p2p/giga_router_fullnode_test.go new file mode 100644 index 0000000000..ce5a64d778 --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_fullnode_test.go @@ -0,0 +1,107 @@ +package p2p + +import ( + "fmt" + "net/url" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + + atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +// TestGigaRouter_Fullnode covers the construction shape of the non-validator +// (fullnode) GigaRouter: routing always picks a remote shard owner (no +// local short-circuit because there is no validator key), data + service +// are constructed but consensus/producer are not, and the read-path +// passthrough methods source values from the local data.State + genesis +// doc (no errFullnode-style sentinels). The end-to-end block-sync / +// executeBlock behaviour is covered by the autobahn integration test +// where a real validator cluster supplies finalized blocks; this unit +// test only verifies the construction surface. +func TestGigaRouter_Fullnode(t *testing.T) { + rng := utils.TestRng() + _, validatorKeys := atypes.GenCommittee(rng, 5) + addrs := map[atypes.PublicKey]GigaNodeAddr{} + urlByValidator := map[atypes.PublicKey]*url.URL{} + for i, validatorKey := range validatorKeys { + nodeKey := makeKey(rng) + // Every committee member needs an EVMRPC URL for fullnode mode — + // NewGigaRouter enforces this at construction so a missing URL + // can't lead to silently-dropped txs. + rpcURL, err := url.Parse(fmt.Sprintf("http://validator-%d.example.com:8545", i)) + require.NoError(t, err) + addrs[validatorKey.Public()] = GigaNodeAddr{ + Key: nodeKey.Public(), + HostPort: tcp.HostPort{Hostname: "127.0.0.1", Port: 26657}, + EVMRPC: rpcURL, + } + urlByValidator[validatorKey.Public()] = rpcURL + } + cp := types.DefaultConsensusParams() + cp.Block.MaxGas = 12345 + genDoc := &types.GenesisDoc{ + ChainID: "giga-router-fullnode-test", + InitialHeight: 1, + AppState: testAppStateJSON(rng), + ConsensusParams: cp, + } + require.NoError(t, genDoc.ValidateAndComplete()) + + app := newTestApp() + proxyApp := proxy.New(app, proxy.NopMetrics()) + + // Fullnodes have no validator key and no Producer config. The data WAL + // reads PersistentStateDir = None (in-memory) for this construction- + // shape check; App is required for executeBlock but isn't exercised by + // this test. + router, err := NewGigaFullnodeRouter(&GigaRouterCommonConfig{ + DialInterval: time.Second, + ValidatorAddrs: addrs, + PersistentStateDir: utils.None[string](), + App: proxyApp, + GenDoc: genDoc, + }, makeKey(rng)) + require.NoError(t, err) + + // EvmProxy: for every sender, the fullnode router resolves to the + // shard owner's URL. NewGigaRouter rejects configs where any + // committee member is missing an EVMRPC URL, so the (nil,false) + // branch is unreachable here. Crucially, no sender is ever proxied + // "to ourselves" — that short-circuit doesn't exist in fullnode mode. + expectedRemoteURLs := map[string]struct{}{} + for _, rpcURL := range urlByValidator { + expectedRemoteURLs[rpcURL.String()] = struct{}{} + } + returnedRemoteURLs := map[string]struct{}{} + for range 200 { + sender := common.BytesToAddress(utils.GenBytes(rng, common.AddressLength)) + shardValidator := router.data.Committee().EvmShard(sender) + expectedURL := urlByValidator[shardValidator] + proxyURL, ok := router.EvmProxy(sender).Get() + require.True(t, ok) + require.Equal(t, expectedURL.String(), proxyURL.String()) + returnedRemoteURLs[proxyURL.String()] = struct{}{} + } + // Sanity: with 200 random senders mapped uniformly over 5 shards we + // expect to have hit every shard owner at least once. + require.Equal(t, expectedRemoteURLs, returnedRemoteURLs) + + // Read-path methods source from local data.State + genesis doc — no + // sentinels. Before any block is pushed (and InitChain hasn't run), + // app.LastBlockHeight() is 0, so LastCommittedBlockNumber returns 0. + // MaxGasEstimatedPerBlock reflects the genesis consensus param. + require.Equal(t, int64(0), router.LastCommittedBlockNumber()) + require.Equal(t, uint64(12345), router.MaxGasEstimatedPerBlock()) + // BlockByHash returns &ResultBlock{Block:nil} for an unknown hash, the + // same shape the validator path returns — no sentinel mode-check. + rb, err := router.BlockByHash(t.Context(), atypes.BlockHeaderHash{}) + require.NoError(t, err) + require.Nil(t, rb.Block) +} diff --git a/sei-tendermint/internal/p2p/giga_router_test.go b/sei-tendermint/internal/p2p/giga_router_test.go deleted file mode 100644 index 9d9c47936b..0000000000 --- a/sei-tendermint/internal/p2p/giga_router_test.go +++ /dev/null @@ -1,502 +0,0 @@ -package p2p - -import ( - "context" - "crypto/sha256" - "encoding/json" - "fmt" - "net/netip" - "net/url" - "slices" - "testing" - "time" - - "github.com/ethereum/go-ethereum/common" - dbm "github.com/tendermint/tm-db" - "golang.org/x/time/rate" - - abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" - atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" - "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" - "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/ed25519" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/conn" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" - "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" - "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" - "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" - "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" - "github.com/sei-protocol/sei-chain/sei-tendermint/types" -) - -type shaHash = [sha256.Size]byte - -type testAppState struct { - Init utils.Option[*abci.RequestInitChain] - Validators []abci.ValidatorUpdate - Blocks []*abci.RequestFinalizeBlock - Txs map[shaHash]bool - AppHash shaHash - // Committed tracks whether FinalizeBlock is allowed. - // Set to true by InitChain (so FinalizeBlock can follow without Commit, - // matching the CometBFT handshaker flow) and by Commit. - // Cleared by FinalizeBlock. - Committed bool -} - -func testAppStateJSON(rng utils.Rng) json.RawMessage { - return utils.OrPanic1(json.Marshal(&abci.ValidatorUpdate{ - PubKey: crypto.PubKeyToProto(ed25519.TestSecretKey(utils.GenBytes(rng, 32)).Public()), - Power: rng.Int63(), - })) -} - -type testApp struct { - abci.BaseApplication - state utils.Watch[*testAppState] -} - -func newTestApp() *testApp { - return &testApp{state: utils.NewWatch(&testAppState{ - Txs: map[shaHash]bool{}, - })} -} - -func (a *testApp) GetValidators() []abci.ValidatorUpdate { - for state := range a.state.Lock() { - return slices.Clone(state.Validators) - } - panic("unreachable") -} - -func (a *testApp) Info(_ context.Context, _ *abci.RequestInfo) (*abci.ResponseInfo, error) { - for state := range a.state.Lock() { - init, ok := state.Init.Get() - if !ok { - return &abci.ResponseInfo{}, nil - } - if len(state.Blocks) == 0 { - // Match the real SDK: InitChain without Commit leaves LastBlockHeight=0. - return &abci.ResponseInfo{ - LastBlockHeight: 0, - LastBlockAppHash: slices.Clone(state.AppHash[:]), - }, nil - } - return &abci.ResponseInfo{ - LastBlockHeight: init.InitialHeight + int64(len(state.Blocks)) - 1, - LastBlockAppHash: slices.Clone(state.AppHash[:]), - }, nil - } - panic("unreachable") -} - -func (a *testApp) CheckTx(context.Context, *abci.RequestCheckTxV2) *abci.ResponseCheckTxV2 { - return &abci.ResponseCheckTxV2{ - ResponseCheckTx: &abci.ResponseCheckTx{ - Code: abci.CodeTypeOK, - GasWanted: 1, - GasEstimated: 1, - }, - } -} - -func (a *testApp) InitChain(_ context.Context, req *abci.RequestInitChain) (*abci.ResponseInitChain, error) { - for state, ctrl := range a.state.Lock() { - if state.Init.IsPresent() { - return nil, fmt.Errorf("chain already initialized") - } - if req.InitialHeight < 1 { - return nil, fmt.Errorf("InitialHeight = %v, want >=1", req.InitialHeight) - } - var val abci.ValidatorUpdate - if err := json.Unmarshal(req.AppStateBytes, &val); err != nil { - return nil, fmt.Errorf("proto.Unmarshal(): %w", err) - } - state.Init = utils.Some(req) - state.AppHash = sha256.Sum256(req.AppStateBytes) - state.Validators = utils.Slice(val) - state.Committed = true - ctrl.Updated() - return &abci.ResponseInitChain{ - AppHash: slices.Clone(state.AppHash[:]), - Validators: slices.Clone(state.Validators), - }, nil - } - panic("unreachable") -} - -func (a *testApp) FinalizeBlock(_ context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) { - for state, ctrl := range a.state.Lock() { - if !state.Committed { - return nil, fmt.Errorf("FinalizeBlock before Commit") - } - init, ok := state.Init.Get() - if !ok { - return nil, fmt.Errorf("app not initialized") - } - state.Blocks = append(state.Blocks, req) - state.AppHash = sha256.Sum256(slices.Concat(req.Hash, state.AppHash[:])) - for _, tx := range req.Txs { - state.Txs[sha256.Sum256(tx)] = true - } - logger.Info("FinalizeBlock", "n", req.Header.Height-init.InitialHeight) - state.Committed = false - ctrl.Updated() - return &abci.ResponseFinalizeBlock{ - AppHash: slices.Clone(state.AppHash[:]), - TxResults: slices.Repeat([]*abci.ExecTxResult{{Code: abci.CodeTypeOK}}, len(req.Txs)), - }, nil - } - panic("unreachable") -} - -func (a *testApp) Commit(context.Context) (*abci.ResponseCommit, error) { - for state, ctrl := range a.state.Lock() { - if state.Committed { - return nil, fmt.Errorf("double commit") - } - state.Committed = true - ctrl.Updated() - } - return &abci.ResponseCommit{ - // Don't prune anything. - RetainHeight: 0, - }, nil -} - -func (a *testApp) WaitForTx(ctx context.Context, tx []byte) error { - h := sha256.Sum256(tx) - for state, ctrl := range a.state.Lock() { - return ctrl.WaitUntil(ctx, func() bool { - _, ok := state.Txs[h] - return ok - }) - } - panic("unreachable") -} - -func (a *testApp) Snapshot() testAppState { - for state := range a.state.Lock() { - s := *state - // Txs is derived and Committed is not deterministic. - s.Txs = nil - s.Committed = false - return s - } - panic("unreachable") -} - -type testNodeCfg struct { - validatorKey atypes.SecretKey - nodeKey NodeSecretKey - addr netip.AddrPort -} - -func (c *testNodeCfg) GigaNodeAddr() GigaNodeAddr { - return GigaNodeAddr{ - Key: c.nodeKey.Public(), - HostPort: tcp.HostPort{Hostname: c.addr.Addr().String(), Port: c.addr.Port()}, - } -} - -// TestInitChainCommitThenFinalize is a contract test for testApp: it verifies -// that testApp supports the autobahn block execution flow where runExecute -// calls InitChain (no Commit), then FinalizeBlock at InitialHeight using the -// deliverState set up by InitChain, followed by Commit. -func TestInitChainCommitThenFinalize(t *testing.T) { - rng := utils.TestRng() - app := newTestApp() - ctx := t.Context() - - initialHeight := rng.Int63n(100000) + 1 - appState := testAppStateJSON(rng) - - // InitChain - _, err := app.InitChain(ctx, &abci.RequestInitChain{ - InitialHeight: initialHeight, - AppStateBytes: appState, - }) - require.NoError(t, err) - - // No Commit after InitChain — the SDK expects FinalizeBlock at InitialHeight - // using the deliverState set up by InitChain. - - // Verify app reports correct height after InitChain (no blocks yet) - info, err := app.Info(ctx, &abci.RequestInfo{}) - require.NoError(t, err) - require.Equal(t, int64(0), info.LastBlockHeight, - "testApp should report 0 after InitChain with no committed blocks (matches real SDK)") - - // FinalizeBlock should succeed — deliverState was set up by InitChain - blockHash := sha256.Sum256([]byte("test-block")) - _, err = app.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{ - Hash: blockHash[:], - Header: (&types.Header{ - Height: initialHeight, - }).ToProto(), - }) - require.NoError(t, err) - - // Second Commit should succeed - _, err = app.Commit(ctx) - require.NoError(t, err) - - // Verify height advanced - info, err = app.Info(ctx, &abci.RequestInfo{}) - require.NoError(t, err) - require.Equal(t, initialHeight, info.LastBlockHeight, - "testApp should report InitialHeight after 1 block") -} - -func TestGigaRouter_FinalizeBlocks(t *testing.T) { - const maxTxsPerBlock = 20 - const blocksPerLane = 5 - const txGasUsed = 21_000 - - ctx := t.Context() - rng := utils.TestRng() - _, keys := atypes.GenCommittee(rng, 4) - var cfgs []*testNodeCfg - for _, key := range keys { - cfgs = append(cfgs, &testNodeCfg{ - validatorKey: key, - nodeKey: makeKey(rng), - addr: tcp.TestReserveAddr(), - }) - } - addrs := map[atypes.PublicKey]GigaNodeAddr{} - for _, cfg := range cfgs { - addrs[cfg.validatorKey.Public()] = cfg.GigaNodeAddr() - } - genDoc := &types.GenesisDoc{ - ChainID: "giga-router-test", - InitialHeight: rng.Int63n(100000) + 1, - AppState: testAppStateJSON(rng), - } - require.NoError(t, genDoc.ValidateAndComplete()) - - err := scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { - var apps []*testApp - var routers []*Router - var allTxs [][]byte - for i, cfg := range cfgs { - nodeInfo := makeInfo(cfg.nodeKey) - nodeInfo.ListenAddr = cfg.addr.String() - nodeInfo.Network = genDoc.ChainID - e := Endpoint{AddrPort: cfg.addr} - app := newTestApp() - proxyApp := proxy.New(app, proxy.NopMetrics()) - router, err := NewRouter( - NopMetrics(), - cfg.nodeKey, - func() *types.NodeInfo { return &nodeInfo }, - dbm.NewMemDB(), - &RouterOptions{ - SelfAddress: utils.Some(e.NodeAddress(cfg.nodeKey.Public().NodeID())), - Endpoint: e, - Connection: conn.DefaultMConnConfig(), - IncomingConnectionWindow: utils.Some(time.Duration(0)), - MaxAcceptRate: utils.Some(rate.Inf), - MaxDialRate: utils.Some(rate.Limit(30)), - Giga: utils.Some(&GigaRouterConfig{ - // Aggressive dialing rate to speed up startup. - DialInterval: 100 * time.Millisecond, - ValidatorAddrs: addrs, - Consensus: &consensus.Config{ - Key: cfg.validatorKey, - ViewTimeout: func(atypes.View) time.Duration { return time.Hour }, - PersistentStateDir: utils.None[string](), - }, - Producer: &producer.Config{ - App: proxyApp, - MaxGasWantedPerBlock: txGasUsed * maxTxsPerBlock, - MaxGasEstimatedPerBlock: txGasUsed * maxTxsPerBlock, - MaxTxsPerBlock: maxTxsPerBlock, - MaxTxsPerSecond: utils.None[uint64](), - BlockInterval: 100 * time.Millisecond, - }, - GenDoc: genDoc, - }), - }, - ) - require.NoError(t, err, "NewRouter[%v]", i) - s.SpawnBgNamed(fmt.Sprintf("router[%v]", i), func() error { return utils.IgnoreCancel(router.Run(ctx)) }) - apps = append(apps, app) - routers = append(routers, router) - var txs [][]byte - for range maxTxsPerBlock * blocksPerLane { - tx := utils.GenBytes(rng, 100) - txs = append(txs, tx) - allTxs = append(allTxs, tx) - } - s.SpawnNamed(fmt.Sprintf("producer[%v]", i), func() error { - giga := router.Giga().OrPanic("non-giga router") - for _, tx := range txs { - if _, err := giga.InsertTx(ctx, tx); err != nil { - return fmt.Errorf("txMempool.CheckTx(): %w", err) - } - } - return nil - }) - } - // Each node should finalize all txs locally. - for _, app := range apps { - for _, tx := range allTxs { - require.NoError(t, app.WaitForTx(ctx, tx), "WaitForTx") - } - } - // Nodes should agree on the final state. - want := apps[0].Snapshot() - for i, app := range apps { - t.Logf("app[%v]", i) - require.NoError(t, utils.TestDiff(want, app.Snapshot()), "state mismatch app[%v]", i) - } - // Covers Router.Giga() + GigaRouter.LastCommittedBlockNumber() — after - // blocks have been finalized every node should report a non-zero - // consensus-committed height through the new accessors used by /status. - for i, r := range routers { - giga := r.Giga().OrPanic("non-giga router") - committed := giga.LastCommittedBlockNumber() - require.Positive(t, committed, "router[%v].LastCommittedBlockNumber()", i) - // Covers GigaRouter.BlockByNumber — the accessor used by the - // Autobahn branch in env.Block to serve /block and evmrpc block - // lookups. Fetch the last committed block and verify it carries - // the expected height + hash, the right chain id, and that the - // payload Txs round-tripped (we just submitted txs). - rb, err := giga.BlockByNumber(ctx, atypes.GlobalBlockNumber(committed)) //nolint:gosec // committed is positive (validated above) - require.NoError(t, err, "router[%v].BlockByNumber(%v)", i, committed) - require.NotNil(t, rb.Block, "router[%v].BlockByNumber(%v).Block", i, committed) - require.Equal(t, committed, rb.Block.Height, "router[%v].BlockByNumber(%v) height", i, committed) - require.NotEmpty(t, rb.BlockID.Hash, "router[%v].BlockByNumber(%v) block hash", i, committed) - require.Equal(t, genDoc.ChainID, rb.Block.Header.ChainID, "router[%v].BlockByNumber(%v) chain id", i, committed) - // LastCommit is non-nil with empty Signatures — mirrors - // executeBlock's FinalizeBlock(DecidedLastCommit: empty) - // so trace replay and production both see "no votes" on - // the prior block. ToReqBeginBlock skips the per-val loop - // when Signatures is empty, so this is also enough to - // avoid the OOB deref the original PR was guarding against. - require.NotNil(t, rb.Block.LastCommit, "router[%v].BlockByNumber(%v) LastCommit", i, committed) - require.Empty(t, rb.Block.LastCommit.Signatures, "router[%v].BlockByNumber(%v) Signatures", i, committed) - // Round-trip the just-fetched block hash back through - // BlockByHash and assert we get the same ResultBlock back. - var hashKey atypes.BlockHeaderHash - copy(hashKey[:], rb.BlockID.Hash) - rbh, err := giga.BlockByHash(ctx, hashKey) - require.NoError(t, err, "router[%v].BlockByHash(%x)", i, rb.BlockID.Hash) - require.Equal(t, rb, rbh, "router[%v].BlockByHash(%x) ≠ BlockByNumber(%v)", i, rb.BlockID.Hash, committed) - } - // Payload.Txs round-trips: for every retained block, the txs the - // data layer holds (GlobalBlock.Payload.Txs) must equal the txs - // surfaced through BlockByNumber. Iterates the full retain window - // rather than a fixed tail so the assertion holds regardless of - // where producers placed the test txs. - giga0, _ := routers[0].Giga().Get() - latest := giga0.LastCommittedBlockNumber() - for h := int64(1); h <= latest; h++ { - gbn := atypes.GlobalBlockNumber(h) //nolint:gosec // h is positive - gb, err := giga0.data.GlobalBlock(ctx, gbn) - if err != nil { - continue // pruned out of the retain window - } - rb, err := giga0.BlockByNumber(ctx, gbn) - require.NoError(t, err, "router[0].BlockByNumber(%v)", h) - // Convert rb.Block.Data.Txs ([]types.Tx) back to [][]byte - // to compare against gb.Payload.Txs() directly. - rbBytes := make([][]byte, len(rb.Block.Data.Txs)) - for j, t := range rb.Block.Data.Txs { - rbBytes[j] = t - } - require.Equal(t, gb.Payload.Txs(), rbBytes, "router[0].BlockByNumber(%v).Block.Data.Txs ≠ data.GlobalBlock(%v).Payload.Txs", h, h) - } - return nil - }) - require.NoError(t, err) -} - -func TestGigaRouter_EvmProxy(t *testing.T) { - rng := utils.TestRng() - _, validatorKeys := atypes.GenCommittee(rng, 10) - var nodeKeys []NodeSecretKey - addrs := map[atypes.PublicKey]GigaNodeAddr{} - urlByValidator := map[atypes.PublicKey]*url.URL{} - for i, validatorKey := range validatorKeys { - nodeKey := makeKey(rng) - nodeKeys = append(nodeKeys, nodeKey) - addr := GigaNodeAddr{ - Key: nodeKey.Public(), - HostPort: tcp.HostPort{Hostname: "127.0.0.1", Port: 26657}, - } - if i < 7 { - rpcURL, err := url.Parse(fmt.Sprintf("http://validator-%d.example.com:8545", i)) - require.NoError(t, err) - addr.EVMRPC = utils.Some(rpcURL) - urlByValidator[validatorKey.Public()] = rpcURL - } - addrs[validatorKey.Public()] = addr - } - genDoc := &types.GenesisDoc{ - ChainID: "giga-router-proxy-test", - InitialHeight: 1, - AppState: testAppStateJSON(rng), - } - require.NoError(t, genDoc.ValidateAndComplete()) - - router, err := NewGigaRouter(&GigaRouterConfig{ - DialInterval: time.Second, - ValidatorAddrs: addrs, - Consensus: &consensus.Config{ - Key: validatorKeys[0], - ViewTimeout: func(atypes.View) time.Duration { return time.Second }, - PersistentStateDir: utils.None[string](), - }, - Producer: &producer.Config{ - App: proxy.New(newTestApp(), proxy.NopMetrics()), - MaxGasWantedPerBlock: 1, - MaxGasEstimatedPerBlock: 1, - MaxTxsPerBlock: 1, - MaxTxsPerSecond: utils.None[uint64](), - BlockInterval: time.Second, - }, - GenDoc: genDoc, - }, nodeKeys[0]) - require.NoError(t, err) - - localValidator := validatorKeys[0].Public() - localURL, ok := urlByValidator[localValidator] - require.True(t, ok) - - expectedRemoteURLs := map[string]struct{}{} - for validator, rpcURL := range urlByValidator { - if validator == localValidator { - continue - } - expectedRemoteURLs[rpcURL.String()] = struct{}{} - } - returnedRemoteURLs := map[string]struct{}{} - - for range 200 { - sender := common.BytesToAddress(utils.GenBytes(rng, common.AddressLength)) - shardValidator := router.data.Committee().EvmShard(sender) - - proxyURL, ok := router.EvmProxy(sender) - expectedURL, hasURL := urlByValidator[shardValidator] - - switch { - case shardValidator == localValidator: - require.False(t, ok) - require.Nil(t, proxyURL) - case hasURL: - require.True(t, ok) - require.NotNil(t, proxyURL) - require.Equal(t, expectedURL.String(), proxyURL.String()) - require.NotEqual(t, localURL.String(), proxyURL.String()) - returnedRemoteURLs[proxyURL.String()] = struct{}{} - default: - require.False(t, ok) - require.Nil(t, proxyURL) - } - } - - require.Equal(t, expectedRemoteURLs, returnedRemoteURLs) -} diff --git a/sei-tendermint/internal/p2p/giga_router_testhelper_test.go b/sei-tendermint/internal/p2p/giga_router_testhelper_test.go new file mode 100644 index 0000000000..f63de09719 --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_testhelper_test.go @@ -0,0 +1,254 @@ +package p2p + +import ( + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "net/netip" + "net/url" + "slices" + "testing" + + abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" + atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/ed25519" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +type shaHash = [sha256.Size]byte + +type testAppState struct { + Init utils.Option[*abci.RequestInitChain] + Validators []abci.ValidatorUpdate + Blocks []*abci.RequestFinalizeBlock + Txs map[shaHash]bool + AppHash shaHash + // Committed tracks whether FinalizeBlock is allowed. + // Set to true by InitChain (so FinalizeBlock can follow without Commit, + // matching the CometBFT handshaker flow) and by Commit. + // Cleared by FinalizeBlock. + Committed bool +} + +func testAppStateJSON(rng utils.Rng) json.RawMessage { + return utils.OrPanic1(json.Marshal(&abci.ValidatorUpdate{ + PubKey: crypto.PubKeyToProto(ed25519.TestSecretKey(utils.GenBytes(rng, 32)).Public()), + Power: rng.Int63(), + })) +} + +type testApp struct { + abci.BaseApplication + state utils.Watch[*testAppState] +} + +func newTestApp() *testApp { + return &testApp{state: utils.NewWatch(&testAppState{ + Txs: map[shaHash]bool{}, + })} +} + +func (a *testApp) GetValidators() []abci.ValidatorUpdate { + for state := range a.state.Lock() { + return slices.Clone(state.Validators) + } + panic("unreachable") +} + +func (a *testApp) Info(_ context.Context, _ *abci.RequestInfo) (*abci.ResponseInfo, error) { + for state := range a.state.Lock() { + init, ok := state.Init.Get() + if !ok { + return &abci.ResponseInfo{}, nil + } + if len(state.Blocks) == 0 { + // Match the real SDK: InitChain without Commit leaves LastBlockHeight=0. + return &abci.ResponseInfo{ + LastBlockHeight: 0, + LastBlockAppHash: slices.Clone(state.AppHash[:]), + }, nil + } + return &abci.ResponseInfo{ + LastBlockHeight: init.InitialHeight + int64(len(state.Blocks)) - 1, + LastBlockAppHash: slices.Clone(state.AppHash[:]), + }, nil + } + panic("unreachable") +} + +func (a *testApp) LastBlockHeight() int64 { + for state := range a.state.Lock() { + init, ok := state.Init.Get() + if !ok || len(state.Blocks) == 0 { + return 0 + } + return init.InitialHeight + int64(len(state.Blocks)) - 1 + } + panic("unreachable") +} + +func (a *testApp) CheckTx(context.Context, *abci.RequestCheckTxV2) *abci.ResponseCheckTxV2 { + return &abci.ResponseCheckTxV2{ + ResponseCheckTx: &abci.ResponseCheckTx{ + Code: abci.CodeTypeOK, + GasWanted: 1, + GasEstimated: 1, + }, + } +} + +func (a *testApp) InitChain(_ context.Context, req *abci.RequestInitChain) (*abci.ResponseInitChain, error) { + for state, ctrl := range a.state.Lock() { + if state.Init.IsPresent() { + return nil, fmt.Errorf("chain already initialized") + } + if req.InitialHeight < 1 { + return nil, fmt.Errorf("InitialHeight = %v, want >=1", req.InitialHeight) + } + var val abci.ValidatorUpdate + if err := json.Unmarshal(req.AppStateBytes, &val); err != nil { + return nil, fmt.Errorf("proto.Unmarshal(): %w", err) + } + state.Init = utils.Some(req) + state.AppHash = sha256.Sum256(req.AppStateBytes) + state.Validators = utils.Slice(val) + state.Committed = true + ctrl.Updated() + return &abci.ResponseInitChain{ + AppHash: slices.Clone(state.AppHash[:]), + Validators: slices.Clone(state.Validators), + }, nil + } + panic("unreachable") +} + +func (a *testApp) FinalizeBlock(_ context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) { + for state, ctrl := range a.state.Lock() { + if !state.Committed { + return nil, fmt.Errorf("FinalizeBlock before Commit") + } + init, ok := state.Init.Get() + if !ok { + return nil, fmt.Errorf("app not initialized") + } + state.Blocks = append(state.Blocks, req) + state.AppHash = sha256.Sum256(slices.Concat(req.Hash, state.AppHash[:])) + for _, tx := range req.Txs { + state.Txs[sha256.Sum256(tx)] = true + } + logger.Info("FinalizeBlock", "n", req.Header.Height-init.InitialHeight) + state.Committed = false + ctrl.Updated() + return &abci.ResponseFinalizeBlock{ + AppHash: slices.Clone(state.AppHash[:]), + TxResults: slices.Repeat([]*abci.ExecTxResult{{Code: abci.CodeTypeOK}}, len(req.Txs)), + }, nil + } + panic("unreachable") +} + +func (a *testApp) Commit(context.Context) (*abci.ResponseCommit, error) { + for state, ctrl := range a.state.Lock() { + if state.Committed { + return nil, fmt.Errorf("double commit") + } + state.Committed = true + ctrl.Updated() + } + return &abci.ResponseCommit{ + // Don't prune anything. + RetainHeight: 0, + }, nil +} + +func (a *testApp) WaitForTx(ctx context.Context, tx []byte) error { + h := sha256.Sum256(tx) + for state, ctrl := range a.state.Lock() { + return ctrl.WaitUntil(ctx, func() bool { + _, ok := state.Txs[h] + return ok + }) + } + panic("unreachable") +} + +func (a *testApp) Snapshot() testAppState { + for state := range a.state.Lock() { + s := *state + // Txs is derived and Committed is not deterministic. + s.Txs = nil + s.Committed = false + return s + } + panic("unreachable") +} + +type testNodeCfg struct { + validatorKey atypes.SecretKey + nodeKey NodeSecretKey + addr netip.AddrPort +} + +func (c *testNodeCfg) GigaNodeAddr() GigaNodeAddr { + // EVMRPC must be present for NewGigaRouter to accept the config on + // either path; the URL value is unused by the tests in this file. + return GigaNodeAddr{ + Key: c.nodeKey.Public(), + HostPort: tcp.HostPort{Hostname: c.addr.Addr().String(), Port: c.addr.Port()}, + EVMRPC: utils.OrPanic1(url.Parse(fmt.Sprintf("http://%s:8545", c.addr.Addr().String()))), + } +} + +// TestInitChainCommitThenFinalize is a contract test for testApp: it verifies +// that testApp supports the autobahn block execution flow where runExecute +// calls InitChain (no Commit), then FinalizeBlock at InitialHeight using the +// deliverState set up by InitChain, followed by Commit. +func TestInitChainCommitThenFinalize(t *testing.T) { + rng := utils.TestRng() + app := newTestApp() + ctx := t.Context() + + initialHeight := rng.Int63n(100000) + 1 + appState := testAppStateJSON(rng) + + // InitChain + _, err := app.InitChain(ctx, &abci.RequestInitChain{ + InitialHeight: initialHeight, + AppStateBytes: appState, + }) + require.NoError(t, err) + + // No Commit after InitChain — the SDK expects FinalizeBlock at InitialHeight + // using the deliverState set up by InitChain. + + // Verify app reports correct height after InitChain (no blocks yet) + info, err := app.Info(ctx, &abci.RequestInfo{}) + require.NoError(t, err) + require.Equal(t, int64(0), info.LastBlockHeight, + "testApp should report 0 after InitChain with no committed blocks (matches real SDK)") + + // FinalizeBlock should succeed — deliverState was set up by InitChain + blockHash := sha256.Sum256([]byte("test-block")) + _, err = app.FinalizeBlock(ctx, &abci.RequestFinalizeBlock{ + Hash: blockHash[:], + Header: (&types.Header{ + Height: initialHeight, + }).ToProto(), + }) + require.NoError(t, err) + + // Second Commit should succeed + _, err = app.Commit(ctx) + require.NoError(t, err) + + // Verify height advanced + info, err = app.Info(ctx, &abci.RequestInfo{}) + require.NoError(t, err) + require.Equal(t, initialHeight, info.LastBlockHeight, + "testApp should report InitialHeight after 1 block") +} diff --git a/sei-tendermint/internal/p2p/giga_router_validator.go b/sei-tendermint/internal/p2p/giga_router_validator.go new file mode 100644 index 0000000000..4c65cd1538 --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_validator.go @@ -0,0 +1,97 @@ +package p2p + +import ( + "context" + "fmt" + "net/url" + + "github.com/ethereum/go-ethereum/common" + atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/giga" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/rpc" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" +) + +type gigaValidatorRouter struct { + *gigaRouterCommon + + consensus *consensus.State + producer *producer.State + // validatorKey is the cached public form of cfg.ValidatorKey, used by + // EvmProxy to short-circuit self-shard sends to the local mempool. + validatorKey atypes.PublicKey +} + +func NewGigaValidatorRouter(cfg *GigaValidatorConfig, key NodeSecretKey) (*gigaValidatorRouter, error) { + dataState, err := buildDataState(&cfg.GigaRouterCommonConfig) + if err != nil { + return nil, err + } + consensusState, err := consensus.NewState(&consensus.Config{ + Key: cfg.ValidatorKey, + ViewTimeout: cfg.ViewTimeout, + PersistentStateDir: cfg.PersistentStateDir, + }, dataState) + if err != nil { + return nil, fmt.Errorf("consensus.NewState(): %w", err) + } + producerState := producer.NewState(cfg.Producer, consensusState, cfg.App) + logger.Info("GigaRouter initialized (validator)", "validators", len(cfg.ValidatorAddrs), "dial_interval", cfg.DialInterval, "inbound_fullnode_cap", cfg.MaxInboundFullnodePeers) + return &gigaValidatorRouter{ + gigaRouterCommon: &gigaRouterCommon{ + cfg: &cfg.GigaRouterCommonConfig, + key: key, + data: dataState, + service: giga.NewService(consensusState), + poolIn: giga.NewPool[NodePublicKey, rpc.Server[giga.API]](), + poolOut: giga.NewPool[NodePublicKey, rpc.Client[giga.API]](), + app: cfg.App, + inboundFullnodeCap: int64(cfg.MaxInboundFullnodePeers), + }, + consensus: consensusState, + producer: producerState, + validatorKey: cfg.ValidatorKey.Public(), + }, nil +} + +func (r *gigaValidatorRouter) Mempool() utils.Option[*producer.State] { + return utils.Some(r.producer) +} + +func (r *gigaValidatorRouter) Run(ctx context.Context) error { + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + // Validators dial every committee member in parallel — consensus + // voting needs fan-out, not stickiness. Same connections also + // serve block sync between committee peers. + for _, addr := range r.cfg.ValidatorAddrs { + s.Spawn(func() error { + for { + err := r.dialAndRunConn(ctx, utils.Some(addr.Key), addr.HostPort, r.service.RunClient) + logger.Info("giga connection failed", "addr", addr, "err", err) + if err := utils.Sleep(ctx, r.cfg.DialInterval); err != nil { + return err + } + } + }) + } + s.SpawnNamed("consensus", func() error { return r.consensus.Run(ctx) }) + s.SpawnNamed("producer", func() error { return r.producer.Run(ctx) }) + s.SpawnNamed("data", func() error { return r.data.Run(ctx) }) + s.SpawnNamed("execute", func() error { return r.runExecute(ctx) }) + s.SpawnNamed("service", func() error { return r.service.Run(ctx) }) + return nil + }) +} + +// EvmProxy on the validator returns None when the sender's shard owner is +// us (handle locally via mempool, no HTTP round-trip to self). +func (r *gigaValidatorRouter) EvmProxy(sender common.Address) utils.Option[*url.URL] { + shardValidator := r.data.Committee().EvmShard(sender) + if r.validatorKey == shardValidator { + return utils.None[*url.URL]() + } + return utils.Some(r.cfg.ValidatorAddrs[shardValidator].EVMRPC) +} diff --git a/sei-tendermint/internal/p2p/giga_router_validator_test.go b/sei-tendermint/internal/p2p/giga_router_validator_test.go new file mode 100644 index 0000000000..8f971356b4 --- /dev/null +++ b/sei-tendermint/internal/p2p/giga_router_validator_test.go @@ -0,0 +1,278 @@ +package p2p + +import ( + "context" + "fmt" + "net/url" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + dbm "github.com/tendermint/tm-db" + "golang.org/x/time/rate" + + atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/conn" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/tcp" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +func TestGigaRouter_FinalizeBlocks(t *testing.T) { + const maxTxsPerBlock = 20 + const blocksPerLane = 5 + const txGasUsed = 21_000 + + ctx := t.Context() + rng := utils.TestRng() + _, keys := atypes.GenCommittee(rng, 4) + var cfgs []*testNodeCfg + for _, key := range keys { + cfgs = append(cfgs, &testNodeCfg{ + validatorKey: key, + nodeKey: makeKey(rng), + addr: tcp.TestReserveAddr(), + }) + } + addrs := map[atypes.PublicKey]GigaNodeAddr{} + for _, cfg := range cfgs { + addrs[cfg.validatorKey.Public()] = cfg.GigaNodeAddr() + } + genDoc := &types.GenesisDoc{ + ChainID: "giga-router-test", + InitialHeight: rng.Int63n(100000) + 1, + AppState: testAppStateJSON(rng), + } + require.NoError(t, genDoc.ValidateAndComplete()) + + err := scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { + var apps []*testApp + var gigas []*gigaValidatorRouter + var allTxs [][]byte + for i, cfg := range cfgs { + nodeInfo := makeInfo(cfg.nodeKey) + nodeInfo.ListenAddr = cfg.addr.String() + nodeInfo.Network = genDoc.ChainID + e := Endpoint{AddrPort: cfg.addr} + app := newTestApp() + proxyApp := proxy.New(app, proxy.NopMetrics()) + // In giga mode the CometBFT handshaker is skipped; the router's + // runExecute calls InitChain itself on fresh start. + giga, err := NewGigaValidatorRouter(&GigaValidatorConfig{ + GigaRouterCommonConfig: GigaRouterCommonConfig{ + // Aggressive dialing rate to speed up startup. + DialInterval: 100 * time.Millisecond, + ValidatorAddrs: addrs, + PersistentStateDir: utils.None[string](), + App: proxyApp, + GenDoc: genDoc, + }, + ValidatorKey: cfg.validatorKey, + ViewTimeout: func(atypes.View) time.Duration { return time.Hour }, + Producer: &producer.Config{ + MaxGasWantedPerBlock: txGasUsed * maxTxsPerBlock, + MaxGasEstimatedPerBlock: txGasUsed * maxTxsPerBlock, + MaxTxsPerBlock: maxTxsPerBlock, + MaxTxsPerSecond: utils.None[uint64](), + BlockInterval: 100 * time.Millisecond, + AllowEmptyBlocks: false, + }, + }, cfg.nodeKey) + require.NoError(t, err, "NewGigaValidatorRouter[%v]", i) + router, err := NewRouter( + NopMetrics(), + cfg.nodeKey, + func() *types.NodeInfo { return &nodeInfo }, + dbm.NewMemDB(), + &RouterOptions{ + SelfAddress: utils.Some(e.NodeAddress(cfg.nodeKey.Public().NodeID())), + Endpoint: e, + Connection: conn.DefaultMConnConfig(), + IncomingConnectionWindow: utils.Some(time.Duration(0)), + MaxAcceptRate: utils.Some(rate.Inf), + MaxDialRate: utils.Some(rate.Limit(30)), + Giga: utils.Some[GigaRouter](giga), + }, + ) + require.NoError(t, err, "NewRouter[%v]", i) + s.SpawnBgNamed(fmt.Sprintf("router[%v]", i), func() error { return utils.IgnoreCancel(router.Run(ctx)) }) + s.SpawnBgNamed(fmt.Sprintf("giga[%v]", i), func() error { return utils.IgnoreCancel(giga.Run(ctx)) }) + apps = append(apps, app) + gigas = append(gigas, giga) + var txs [][]byte + for range maxTxsPerBlock * blocksPerLane { + tx := utils.GenBytes(rng, 100) + txs = append(txs, tx) + allTxs = append(allTxs, tx) + } + s.SpawnNamed(fmt.Sprintf("producer[%v]", i), func() error { + v := giga.Mempool().OrPanic("validator giga must have a mempool") + for _, tx := range txs { + if _, err := v.InsertTx(ctx, tx); err != nil { + return fmt.Errorf("producer.InsertTx(): %w", err) + } + } + return nil + }) + } + // Each node should finalize all txs locally. + for _, app := range apps { + for _, tx := range allTxs { + require.NoError(t, app.WaitForTx(ctx, tx), "WaitForTx") + } + } + // Nodes should agree on the final state. + want := apps[0].Snapshot() + for i, app := range apps { + t.Logf("app[%v]", i) + require.NoError(t, utils.TestDiff(want, app.Snapshot()), "state mismatch app[%v]", i) + } + // Covers GigaRouter.LastCommittedBlockNumber() — after blocks have + // been finalized every node should report a non-zero + // consensus-committed height through the accessor used by /status. + for i, giga := range gigas { + committed := giga.LastCommittedBlockNumber() + require.Positive(t, committed, "router[%v].LastCommittedBlockNumber()", i) + // Covers GigaRouter.BlockByNumber — the accessor used by the + // Autobahn branch in env.Block to serve /block and evmrpc block + // lookups. Fetch the last committed block and verify it carries + // the expected height + hash, the right chain id, and that the + // payload Txs round-tripped (we just submitted txs). + rb, err := giga.BlockByNumber(ctx, atypes.GlobalBlockNumber(committed)) //nolint:gosec // committed is positive (validated above) + require.NoError(t, err, "router[%v].BlockByNumber(%v)", i, committed) + require.NotNil(t, rb.Block, "router[%v].BlockByNumber(%v).Block", i, committed) + require.Equal(t, committed, rb.Block.Height, "router[%v].BlockByNumber(%v) height", i, committed) + require.NotEmpty(t, rb.BlockID.Hash, "router[%v].BlockByNumber(%v) block hash", i, committed) + require.Equal(t, genDoc.ChainID, rb.Block.Header.ChainID, "router[%v].BlockByNumber(%v) chain id", i, committed) + // LastCommit is non-nil with empty Signatures — mirrors + // executeBlock's FinalizeBlock(DecidedLastCommit: empty) + // so trace replay and production both see "no votes" on + // the prior block. ToReqBeginBlock skips the per-val loop + // when Signatures is empty, so this is also enough to + // avoid the OOB deref the original PR was guarding against. + require.NotNil(t, rb.Block.LastCommit, "router[%v].BlockByNumber(%v) LastCommit", i, committed) + require.Empty(t, rb.Block.LastCommit.Signatures, "router[%v].BlockByNumber(%v) Signatures", i, committed) + // Round-trip the just-fetched block hash back through + // BlockByHash and assert we get the same ResultBlock back. + var hashKey atypes.BlockHeaderHash + copy(hashKey[:], rb.BlockID.Hash) + rbh, err := giga.BlockByHash(ctx, hashKey) + require.NoError(t, err, "router[%v].BlockByHash(%x)", i, rb.BlockID.Hash) + require.Equal(t, rb, rbh, "router[%v].BlockByHash(%x) ≠ BlockByNumber(%v)", i, rb.BlockID.Hash, committed) + } + // Payload.Txs round-trips: for every retained block, the txs the + // data layer holds (GlobalBlock.Payload.Txs) must equal the txs + // surfaced through BlockByNumber. Iterates the full retain window + // rather than a fixed tail so the assertion holds regardless of + // where producers placed the test txs. Reaches into giga0.data + // directly — internal same-package access. + giga0 := gigas[0] + latest := giga0.LastCommittedBlockNumber() + for h := int64(1); h <= latest; h++ { + gbn := atypes.GlobalBlockNumber(h) //nolint:gosec // h is positive + gb, err := giga0.data.GlobalBlock(ctx, gbn) + if err != nil { + continue // pruned out of the retain window + } + rb, err := giga0.BlockByNumber(ctx, gbn) + require.NoError(t, err, "router[0].BlockByNumber(%v)", h) + // Convert rb.Block.Data.Txs ([]types.Tx) back to [][]byte + // to compare against gb.Payload.Txs() directly. + rbBytes := make([][]byte, len(rb.Block.Data.Txs)) + for j, t := range rb.Block.Data.Txs { + rbBytes[j] = t + } + require.Equal(t, gb.Payload.Txs(), rbBytes, "router[0].BlockByNumber(%v).Block.Data.Txs ≠ data.GlobalBlock(%v).Payload.Txs", h, h) + } + return nil + }) + require.NoError(t, err) +} + +func TestGigaRouter_EvmProxy(t *testing.T) { + rng := utils.TestRng() + _, validatorKeys := atypes.GenCommittee(rng, 10) + var nodeKeys []NodeSecretKey + addrs := map[atypes.PublicKey]GigaNodeAddr{} + urlByValidator := map[atypes.PublicKey]*url.URL{} + // NewGigaRouter requires EVMRPC on every committee member in both + // validator and fullnode modes; the missing-URL branch of EvmProxy is + // unreachable. + for i, validatorKey := range validatorKeys { + nodeKey := makeKey(rng) + nodeKeys = append(nodeKeys, nodeKey) + rpcURL, err := url.Parse(fmt.Sprintf("http://validator-%d.example.com:8545", i)) + require.NoError(t, err) + addrs[validatorKey.Public()] = GigaNodeAddr{ + Key: nodeKey.Public(), + HostPort: tcp.HostPort{Hostname: "127.0.0.1", Port: 26657}, + EVMRPC: rpcURL, + } + urlByValidator[validatorKey.Public()] = rpcURL + } + genDoc := &types.GenesisDoc{ + ChainID: "giga-router-proxy-test", + InitialHeight: 1, + AppState: testAppStateJSON(rng), + } + require.NoError(t, genDoc.ValidateAndComplete()) + + router, err := NewGigaValidatorRouter(&GigaValidatorConfig{ + GigaRouterCommonConfig: GigaRouterCommonConfig{ + DialInterval: time.Second, + ValidatorAddrs: addrs, + PersistentStateDir: utils.None[string](), + App: proxy.New(newTestApp(), proxy.NopMetrics()), + GenDoc: genDoc, + }, + ValidatorKey: validatorKeys[0], + ViewTimeout: func(atypes.View) time.Duration { return time.Second }, + Producer: &producer.Config{ + MaxGasWantedPerBlock: 1, + MaxGasEstimatedPerBlock: 1, + MaxTxsPerBlock: 1, + MaxTxsPerSecond: utils.None[uint64](), + BlockInterval: time.Second, + }, + }, nodeKeys[0]) + require.NoError(t, err) + + localValidator := validatorKeys[0].Public() + localURL, ok := urlByValidator[localValidator] + require.True(t, ok) + + expectedRemoteURLs := map[string]struct{}{} + for validator, rpcURL := range urlByValidator { + if validator == localValidator { + continue + } + expectedRemoteURLs[rpcURL.String()] = struct{}{} + } + returnedRemoteURLs := map[string]struct{}{} + + for range 200 { + sender := common.BytesToAddress(utils.GenBytes(rng, common.AddressLength)) + shardValidator := router.data.Committee().EvmShard(sender) + + proxyURL, ok := router.EvmProxy(sender).Get() + expectedURL := urlByValidator[shardValidator] + + if shardValidator == localValidator { + // Self-shard: validator short-circuits to local mempool. + require.False(t, ok) + require.Nil(t, proxyURL) + } else { + require.True(t, ok) + require.NotNil(t, proxyURL) + require.Equal(t, expectedURL.String(), proxyURL.String()) + require.NotEqual(t, localURL.String(), proxyURL.String()) + returnedRemoteURLs[proxyURL.String()] = struct{}{} + } + } + + require.Equal(t, expectedRemoteURLs, returnedRemoteURLs) +} diff --git a/sei-tendermint/internal/p2p/router.go b/sei-tendermint/internal/p2p/router.go index fba795598a..494953f707 100644 --- a/sei-tendermint/internal/p2p/router.go +++ b/sei-tendermint/internal/p2p/router.go @@ -43,7 +43,7 @@ type Router struct { nodeInfoProducer func() *types.NodeInfo channels utils.RWMutex[map[ChannelID]*channel] - giga utils.Option[*GigaRouter] + giga utils.Option[GigaRouter] started chan struct{} } @@ -102,13 +102,10 @@ func NewRouter( peerDB: utils.NewWatch(peerDB), started: make(chan struct{}), } - if gigaCfg, ok := options.Giga.Get(); ok { - gr, err := NewGigaRouter(gigaCfg, privKey) - if err != nil { - return nil, fmt.Errorf("NewGigaRouter(): %w", err) - } - router.giga = utils.Some(gr) - } + // The GigaRouter is constructed by setup-side code (which picks the + // validator vs fullnode constructor based on whether a local validator + // key is present) and passed in via options.Giga. Just attach. + router.giga = options.Giga router.BaseService = service.NewBaseService("router", router) return router, nil } @@ -146,7 +143,7 @@ func (r *Router) AllAddrs() []NodeAddress { return r.peerManager.AllAddrs() } // Giga returns the GigaRouter if Autobahn is enabled, None otherwise. // Consumers (e.g. the /status RPC handler) use this to reach Autobahn-specific // state like the last committed block number. -func (r *Router) Giga() utils.Option[*GigaRouter] { return r.giga } +func (r *Router) Giga() utils.Option[GigaRouter] { return r.giga } // OpenChannel opens a new channel for the given message type. func OpenChannel[T gogoproto.Message](r *Router, chDesc ChannelDescriptor[T]) (*Channel[T], error) { @@ -431,9 +428,6 @@ func (r *Router) Run(ctx context.Context) error { s.SpawnNamed("dialPeers", func() error { return r.dialPeersRoutine(ctx) }) s.SpawnNamed("storePeers", func() error { return r.storePeersRoutine(ctx) }) s.SpawnNamed("metrics", func() error { return r.metricsRoutine(ctx) }) - if giga, ok := r.giga.Get(); ok { - s.SpawnNamed("giga", func() error { return giga.Run(ctx) }) - } return nil }) } diff --git a/sei-tendermint/internal/p2p/router_test.go b/sei-tendermint/internal/p2p/router_test.go index 1638408ab0..d660b4f7a7 100644 --- a/sei-tendermint/internal/p2p/router_test.go +++ b/sei-tendermint/internal/p2p/router_test.go @@ -14,13 +14,7 @@ import ( dbm "github.com/tendermint/tm-db" "golang.org/x/time/rate" - abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" - atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" - "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/ed25519" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p/conn" - "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/scope" @@ -309,77 +303,6 @@ func TestRouter_GigaNotSetByDefault(t *testing.T) { require.False(t, router.giga.IsPresent(), "GigaRouter should not be set with default options") } -func TestRouter_GigaSetWhenConfigured(t *testing.T) { - rng := utils.TestRng() - nodeKey := makeKey(rng) - // Use a separate key for the validator to verify both propagate independently. - valKey := atypes.SecretKeyFromED25519(ed25519.SecretKey(makeKey(rng))) - - validatorAddrs := map[atypes.PublicKey]GigaNodeAddr{ - valKey.Public(): { - Key: nodeKey.Public(), - HostPort: tcp.HostPort{Hostname: "10.0.0.1", Port: 9999}, - }, - } - - // Use intentionally non-default values to ensure config actually propagates. - opts := makeRouterOptions() - proxyApp := proxy.New(abci.BaseApplication{}, proxy.NopMetrics()) - opts.Giga = utils.Some(&GigaRouterConfig{ - DialInterval: 7 * time.Second, - ValidatorAddrs: validatorAddrs, - Consensus: &consensus.Config{ - Key: valKey, - ViewTimeout: func(atypes.View) time.Duration { return 3 * time.Second }, - PersistentStateDir: utils.None[string](), - }, - Producer: &producer.Config{ - App: proxyApp, - MaxGasWantedPerBlock: 77_000_000, - MaxGasEstimatedPerBlock: 76_000_000, - MaxTxsPerBlock: 7_777, - MaxTxsPerSecond: utils.Some(uint64(999)), - BlockInterval: 777 * time.Millisecond, - }, - GenDoc: &types.GenesisDoc{ - ChainID: "giga-e2e-test", - InitialHeight: 42, - GenesisTime: time.Now(), - }, - }) - - router := makeRouterWithOptionsAndKey(opts, nodeKey) - require.True(t, router.giga.IsPresent(), "GigaRouter should be set when Giga config is provided") - - giga, _ := router.giga.Get() - - // Verify non-default config values were propagated. - require.Equal(t, 7*time.Second, giga.cfg.DialInterval) - require.Len(t, giga.cfg.ValidatorAddrs, 1) - addr, ok := giga.cfg.ValidatorAddrs[valKey.Public()] - require.True(t, ok, "validator key should be in ValidatorAddrs") - require.Equal(t, nodeKey.Public(), addr.Key, "node key should match") - require.Equal(t, "10.0.0.1", addr.HostPort.Hostname) - require.Equal(t, uint16(9999), addr.HostPort.Port) - - // Verify consensus key is the validator key (distinct from node key). - require.Equal(t, valKey.Public(), giga.cfg.Consensus.Key.Public()) - require.Equal(t, 3*time.Second, giga.cfg.Consensus.ViewTimeout(atypes.View{})) - - // Verify producer config with non-default values. - require.Equal(t, uint64(77_000_000), giga.cfg.Producer.MaxGasWantedPerBlock) - require.Equal(t, uint64(76_000_000), giga.cfg.Producer.MaxGasEstimatedPerBlock) - require.Equal(t, uint64(7_777), giga.cfg.Producer.MaxTxsPerBlock) - maxTps, tpsOk := giga.cfg.Producer.MaxTxsPerSecond.Get() - require.True(t, tpsOk) - require.Equal(t, uint64(999), maxTps) - require.Equal(t, 777*time.Millisecond, giga.cfg.Producer.BlockInterval) - - // Verify genesis doc. - require.Equal(t, "giga-e2e-test", giga.cfg.GenDoc.ChainID) - require.Equal(t, int64(42), giga.cfg.GenDoc.InitialHeight) -} - func blindHandshake(ctx context.Context, c tcp.Conn, key NodeSecretKey, info types.NodeInfo) error { return utils.IgnoreCancel(scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { sc, err := conn.MakeSecretConnection(ctx, c) diff --git a/sei-tendermint/internal/p2p/routeroptions.go b/sei-tendermint/internal/p2p/routeroptions.go index 6f2bb04c86..b7eca55273 100644 --- a/sei-tendermint/internal/p2p/routeroptions.go +++ b/sei-tendermint/internal/p2p/routeroptions.go @@ -46,8 +46,11 @@ type RouterOptions struct { // (it is getting large anyway) but thats a major refactor. PexOnHandshake bool - // Whether sei giga connections should be established. - Giga utils.Option[*GigaRouterConfig] + // Giga, if Some, is the already-constructed GigaRouter the Router + // should attach. Setup-side code (node/setup.go) picks the right + // constructor — NewGigaValidatorRouter or NewGigaFullnodeRouter — + // based on whether the node has a local validator key. + Giga utils.Option[GigaRouter] // Local endpoint to listen for p2p connections on. // SelfAddress should point to this endpoint. diff --git a/sei-tendermint/internal/proxy/proxy.go b/sei-tendermint/internal/proxy/proxy.go index de8abac1a9..7c5aa7349d 100644 --- a/sei-tendermint/internal/proxy/proxy.go +++ b/sei-tendermint/internal/proxy/proxy.go @@ -95,6 +95,10 @@ func (app *Proxy) GetValidators() []types.ValidatorUpdate { return app.app.GetValidators() } +func (app *Proxy) LastBlockHeight() int64 { + return app.app.LastBlockHeight() +} + func (app *Proxy) ListSnapshots(ctx context.Context, req *types.RequestListSnapshots) (*types.ResponseListSnapshots, error) { defer addTimeSample(app.metrics.MethodTiming.With("method", "list_snapshots", "type", "sync"))() return app.app.ListSnapshots(ctx, req) diff --git a/sei-tendermint/internal/rpc/core/env.go b/sei-tendermint/internal/rpc/core/env.go index b59cbda690..5fada14787 100644 --- a/sei-tendermint/internal/rpc/core/env.go +++ b/sei-tendermint/internal/rpc/core/env.go @@ -129,9 +129,9 @@ func validatePage(pagePtr *int, perPage, totalCount int) (int, error) { // if r, ok := env.gigaRouter().Get(); ok { // // Autobahn path, r is the router // } -func (env *Environment) gigaRouter() utils.Option[*p2p.GigaRouter] { +func (env *Environment) gigaRouter() utils.Option[p2p.GigaRouter] { if env.Router == nil { // inspect mode - return utils.None[*p2p.GigaRouter]() + return utils.None[p2p.GigaRouter]() } return env.Router.Giga() } diff --git a/sei-tendermint/internal/rpc/core/mempool.go b/sei-tendermint/internal/rpc/core/mempool.go index f1b45d03c0..20235f7256 100644 --- a/sei-tendermint/internal/rpc/core/mempool.go +++ b/sei-tendermint/internal/rpc/core/mempool.go @@ -17,18 +17,23 @@ import ( ) // EvmProxy returns the EVM RPC URL of the autobahn validator that owns the -// sender shard. If the sender maps to the local validator, or if no EVM RPC -// endpoint is configured for the shard owner, it returns (nil, false). -func (env *Environment) EvmProxy(sender common.Address) (*url.URL, bool) { +// sender shard, or None if the sender maps to the local validator (handle +// locally) or autobahn isn't configured. +func (env *Environment) EvmProxy(sender common.Address) utils.Option[*url.URL] { if r, ok := env.gigaRouter().Get(); ok { return r.EvmProxy(sender) } - return nil, false + return utils.None[*url.URL]() } func (env *Environment) EvmTxByHash(hash common.Hash) (types.Tx, bool) { if giga, ok := env.gigaRouter().Get(); ok { - return giga.Mempool().EvmTxByHash(hash) + if v, ok := giga.Mempool().Get(); ok { + return v.EvmTxByHash(hash) + } + // Fullnode: no local mempool. The tx (if it exists locally at all) + // would be at the shard owner; we can't easily query it from here. + return nil, false } if mp, ok := env.Mempool.Get(); ok { return mp.EvmTxByHash(hash) @@ -46,7 +51,11 @@ func (env *Environment) EvmTxByHash(hash common.Hash) (types.Tx, bool) { // Deprecated and should be removed in 0.37 func (env *Environment) BroadcastTxAsync(ctx context.Context, req *coretypes.RequestBroadcastTx) (*coretypes.ResultBroadcastTx, error) { if giga, ok := env.gigaRouter().Get(); ok { - go func() { _, _ = giga.Mempool().TryInsertTx(ctx, req.Tx) }() + v, ok := giga.Mempool().Get() + if !ok { + return nil, errors.New("autobahn fullnode has no local mempool; broadcast_tx_* must be sent to a validator") + } + go func() { _, _ = v.TryInsertTx(ctx, req.Tx) }() return &coretypes.ResultBroadcastTx{Hash: req.Tx.Hash().Bytes()}, nil } mp, err := env.requireMempool() @@ -68,7 +77,11 @@ func (env *Environment) BroadcastTxSync(ctx context.Context, req *coretypes.Requ // More: https://docs.tendermint.com/master/rpc/#/Tx/broadcast_tx_sync func (env *Environment) BroadcastTx(ctx context.Context, req *coretypes.RequestBroadcastTx) (*coretypes.ResultBroadcastTx, error) { if giga, ok := env.gigaRouter().Get(); ok { - r, err := giga.Mempool().InsertTx(ctx, req.Tx) + v, ok := giga.Mempool().Get() + if !ok { + return nil, errors.New("autobahn fullnode has no local mempool; broadcast_tx_* must be sent to a validator") + } + r, err := v.InsertTx(ctx, req.Tx) if err != nil { return nil, err } @@ -107,7 +120,11 @@ func (env *Environment) BroadcastTxCommit(ctx context.Context, req *coretypes.Re } if giga, ok := env.gigaRouter().Get(); ok { - r, err := giga.Mempool().InsertTx(ctx, req.Tx) + v, ok := giga.Mempool().Get() + if !ok { + return nil, errors.New("autobahn fullnode has no local mempool; broadcast_tx_* must be sent to a validator") + } + r, err := v.InsertTx(ctx, req.Tx) if err != nil { return nil, err } @@ -189,9 +206,14 @@ func (env *Environment) broadcastTxCommitFromCheckTx(ctx context.Context, req *c // More: https://docs.tendermint.com/master/rpc/#/Info/unconfirmed_txs func (env *Environment) UnconfirmedTxs(ctx context.Context, req *coretypes.RequestUnconfirmedTxs) (*coretypes.ResultUnconfirmedTxs, error) { if giga, ok := env.gigaRouter().Get(); ok { + v, ok := giga.Mempool().Get() + if !ok { + // Fullnode: no mempool; return empty (no pending txs locally). + return &coretypes.ResultUnconfirmedTxs{}, nil + } // NOTE: this pagination seems to be useless, given that the mempool content is // constantly changing and we don't have any snapshot marker in the request. - rawTxs := giga.Mempool().UnconfirmedTxs() + rawTxs := v.UnconfirmedTxs() perPage := env.validatePerPage(req.PerPage.IntPtr()) page, err := validatePage(req.Page.IntPtr(), perPage, len(rawTxs)) if err != nil { @@ -244,7 +266,11 @@ func (env *Environment) UnconfirmedTxs(ctx context.Context, req *coretypes.Reque // More: https://docs.tendermint.com/master/rpc/#/Info/num_unconfirmed_txs func (env *Environment) NumUnconfirmedTxs(ctx context.Context) (*coretypes.ResultUnconfirmedTxs, error) { if giga, ok := env.gigaRouter().Get(); ok { - rawTxs := giga.Mempool().UnconfirmedTxs() + v, ok := giga.Mempool().Get() + if !ok { + return &coretypes.ResultUnconfirmedTxs{}, nil + } + rawTxs := v.UnconfirmedTxs() sizeBytes := 0 for _, tx := range rawTxs { sizeBytes += len(tx) diff --git a/sei-tendermint/node/node.go b/sei-tendermint/node/node.go index c834cf8f11..2388d5ffee 100644 --- a/sei-tendermint/node/node.go +++ b/sei-tendermint/node/node.go @@ -58,6 +58,7 @@ type nodeImpl struct { // network router *p2p.Router + giga utils.Option[p2p.GigaRouter] ServiceRestartCh utils.Option[chan []string] nodeInfo types.NodeInfo nodeKey types.NodeKey // our node privkey @@ -192,16 +193,19 @@ func makeNode( }, } - // Autobahn requires a local validator key; remote signers are not supported. - if cfg.AutobahnConfigFile != "" && cfg.PrivValidator.ListenAddr != "" { - return nil, fmt.Errorf("autobahn does not support remote validator signers (priv-validator.laddr is set)") - } gigaEnabled := cfg.AutobahnConfigFile != "" + // Pass the local key when autobahn is on; setup.go's + // buildGigaRouter picks validator-vs-fullnode by cfg.Mode and + // uses the key to check that a validator-mode node is in the committee. + gigaValidatorKey := utils.None[atypes.SecretKey]() + if gigaEnabled { + gigaValidatorKey = utils.Some(atypes.SecretKeyFromED25519(filePrivval.Key.PrivKey)) + } router, peerCloser, err := createRouter( nodeMetrics.p2p, node.NodeInfo, nodeKey, - utils.Some(atypes.SecretKeyFromED25519(filePrivval.Key.PrivKey)), + gigaValidatorKey, cfg, utils.Some(proxyApp), genDoc, @@ -212,6 +216,7 @@ func makeNode( return nil, fmt.Errorf("failed to create router: %w", err) } node.router = router + node.giga = router.Giga() node.rpcEnv.Router = router evReactor, evPool, edbCloser, err := createEvidenceReactor(cfg, dbProvider, @@ -521,6 +526,12 @@ func (n *nodeImpl) OnStart(ctx context.Context) error { if m, ok := n.mempool.Get(); ok { n.SpawnCritical("mempool", m.Run) } + // Run the GigaRouter alongside the transport. n.giga is the canonical + // reference; the Router holds a copy only for its own internal use + // (dispatching inbound giga connections). Lifecycle is owned here. + if giga, ok := n.giga.Get(); ok { + n.SpawnCritical("giga", giga.Run) + } for _, reactor := range n.services { if err := reactor.Start(ctx); err != nil { diff --git a/sei-tendermint/node/setup.go b/sei-tendermint/node/setup.go index 2479a698c0..ca5a429317 100644 --- a/sei-tendermint/node/setup.go +++ b/sei-tendermint/node/setup.go @@ -15,7 +15,6 @@ import ( atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" "github.com/sei-protocol/sei-chain/sei-tendermint/config" "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" - autobahnConsensus "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/consensus" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/autobahn/producer" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/blocksync" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/consensus" @@ -190,39 +189,49 @@ func loadAutobahnFileConfig(path string) (*config.AutobahnFileConfig, error) { return &fc, nil } -// buildGigaConfig constructs a GigaRouterConfig from the autobahn config file, node key, and genesis doc. -func buildGigaConfig( - autobahnConfigFile string, - nodeKey types.NodeKey, - validatorKey atypes.SecretKey, - app *proxy.Proxy, - genDoc *types.GenesisDoc, -) (*p2p.GigaRouterConfig, error) { +// loadAutobahnCommittee reads the autobahn config file and builds the +// committee map (validator pubkey → GigaNodeAddr) used by both router +// modes. Rejects duplicate validator/node keys. +func loadAutobahnCommittee(autobahnConfigFile string) (*config.AutobahnFileConfig, map[atypes.PublicKey]p2p.GigaNodeAddr, error) { if autobahnConfigFile == "" { - return nil, errors.New("autobahn config file path must not be empty") + return nil, nil, errors.New("autobahn config file path must not be empty") } fc, err := loadAutobahnFileConfig(autobahnConfigFile) if err != nil { - return nil, fmt.Errorf("loading autobahn config from %q: %w", autobahnConfigFile, err) + return nil, nil, fmt.Errorf("loading autobahn config from %q: %w", autobahnConfigFile, err) } - validatorAddrs := map[atypes.PublicKey]p2p.GigaNodeAddr{} seenNodeKeys := map[p2p.NodePublicKey]bool{} - for _, entry := range fc.Validators { if _, exists := validatorAddrs[entry.ValidatorKey]; exists { - return nil, fmt.Errorf("duplicate validator key in autobahn validators: %s", entry.ValidatorKey) + return nil, nil, fmt.Errorf("duplicate validator key in autobahn validators: %s", entry.ValidatorKey) } if seenNodeKeys[entry.NodeKey] { - return nil, fmt.Errorf("duplicate node key in autobahn validators: %s", entry.NodeKey) + return nil, nil, fmt.Errorf("duplicate node key in autobahn validators: %s", entry.NodeKey) } seenNodeKeys[entry.NodeKey] = true validatorAddrs[entry.ValidatorKey] = p2p.GigaNodeAddr{ Key: entry.NodeKey, HostPort: entry.Address, - EVMRPC: entry.GetEVMRPC(), + EVMRPC: entry.EVMRPC.URL, } } + return fc, validatorAddrs, nil +} + +// buildValidatorGigaConfig assembles a GigaValidatorConfig. Errors if +// self isn't in the committee or the node key doesn't match. +func buildValidatorGigaConfig( + autobahnConfigFile string, + nodeKey types.NodeKey, + validatorKey atypes.SecretKey, + app *proxy.Proxy, + genDoc *types.GenesisDoc, +) (*p2p.GigaValidatorConfig, error) { + fc, validatorAddrs, err := loadAutobahnCommittee(autobahnConfigFile) + if err != nil { + return nil, err + } // Verify self is in the validator set. selfAddr, ok := validatorAddrs[validatorKey.Public()] @@ -233,25 +242,23 @@ func buildGigaConfig( if selfAddr.Key != selfNodePub { return nil, fmt.Errorf("node key mismatch for own validator entry: config has %s, but node key is %s", selfAddr.Key, selfNodePub) } - - // The producer's max-gas-per-block is the chain's gas-limit consensus - // rule, which lives in genesis (consensus_params.block.max_gas) — the - // same number the EVM runtime reads via ctx.ConsensusParams().Block.MaxGas. - if genDoc.ConsensusParams == nil || genDoc.ConsensusParams.Block.MaxGas <= 0 { - return nil, fmt.Errorf("%w (got %v)", ErrGenesisMaxGasInvalid, genDoc.ConsensusParams) - } - return &p2p.GigaRouterConfig{ - DialInterval: time.Duration(fc.DialInterval), - ValidatorAddrs: validatorAddrs, - Consensus: &autobahnConsensus.Config{ - Key: validatorKey, - ViewTimeout: func(atypes.View) time.Duration { - return time.Duration(fc.ViewTimeout) - }, - PersistentStateDir: fc.PersistentStateDir, + if _, err := genesisMaxGas(genDoc); err != nil { + return nil, err + } + return &p2p.GigaValidatorConfig{ + GigaRouterCommonConfig: p2p.GigaRouterCommonConfig{ + DialInterval: time.Duration(fc.DialInterval), + ValidatorAddrs: validatorAddrs, + PersistentStateDir: fc.PersistentStateDir, + App: app, + GenDoc: genDoc, + MaxInboundFullnodePeers: resolveMaxInboundFullnodePeers(fc.MaxInboundFullnodePeers), + }, + ValidatorKey: validatorKey, + ViewTimeout: func(atypes.View) time.Duration { + return time.Duration(fc.ViewTimeout) }, Producer: &producer.Config{ - App: app, MaxGasWantedPerBlock: genDoc.ConsensusParams.Block.MaxGasWantedUint64(), MaxGasEstimatedPerBlock: genDoc.ConsensusParams.Block.MaxGasUint64(), MaxTxsPerBlock: fc.MaxTxsPerBlock, @@ -259,7 +266,117 @@ func buildGigaConfig( AllowEmptyBlocks: fc.AllowEmptyBlocks, BlockInterval: time.Duration(fc.BlockInterval), }, - GenDoc: genDoc, + }, nil +} + +// buildGigaRouter picks validator-vs-fullnode by cfg.Mode: +// "validator" runs the validator path, any other mode runs as a fullnode. +// Mode is the operator's explicit role declaration, kept separate from +// committee membership so a newly-joined committee member can finish +// catch-up as a fullnode before the operator flips to mode = "validator". +// A warning is logged if mode and committee membership disagree so an +// operator misconfiguration is visible at startup. +func buildGigaRouter( + cfg *config.Config, + nodeKey types.NodeKey, + validatorKey utils.Option[atypes.SecretKey], + app *proxy.Proxy, + genDoc *types.GenesisDoc, +) (p2p.GigaRouter, error) { + _, validatorAddrs, err := loadAutobahnCommittee(cfg.AutobahnConfigFile) + if err != nil { + return nil, err + } + if valKey, ok := validatorKey.Get(); ok { + _, inCommittee := validatorAddrs[valKey.Public()] + switch { + case cfg.Mode == config.ModeValidator && !inCommittee: + logger.Warn("Autobahn: mode is \"validator\" but local validator key is not in the committee", "valKey", valKey.Public()) + case cfg.Mode != config.ModeValidator && inCommittee: + logger.Warn("Autobahn: local validator key is in the committee but mode is not \"validator\"; starting as fullnode", "mode", cfg.Mode) + } + } + if cfg.Mode == config.ModeValidator { + valKey, ok := validatorKey.Get() + if !ok { + return nil, fmt.Errorf("autobahn: mode = %q requires a local validator key", cfg.Mode) + } + // Remote signers aren't supported on the validator path — + // autobahn signs in-process. Fullnodes don't sign and aren't + // penalised for having priv-validator.laddr set. + if cfg.PrivValidator.ListenAddr != "" { + return nil, fmt.Errorf("autobahn does not support remote validator signers (priv-validator.laddr is set)") + } + valCfg, err := buildValidatorGigaConfig(cfg.AutobahnConfigFile, nodeKey, valKey, app, genDoc) + if err != nil { + return nil, fmt.Errorf("buildValidatorGigaConfig: %w", err) + } + rootifyPersistentStateDir(cfg.RootDir, &valCfg.GigaRouterCommonConfig) + logger.Info("Autobahn: starting as validator", "validators", len(valCfg.ValidatorAddrs)) + return p2p.NewGigaValidatorRouter(valCfg, p2p.NodeSecretKey(nodeKey)) + } + fnCfg, err := buildFullnodeGigaConfig(cfg.AutobahnConfigFile, app, genDoc) + if err != nil { + return nil, fmt.Errorf("buildFullnodeGigaConfig: %w", err) + } + rootifyPersistentStateDir(cfg.RootDir, fnCfg) + logger.Info("Autobahn: starting as fullnode", "mode", cfg.Mode, "validators", len(validatorAddrs)) + return p2p.NewGigaFullnodeRouter(fnCfg, p2p.NodeSecretKey(nodeKey)) +} + +// rootifyPersistentStateDir resolves a relative PersistentStateDir +// against the node's --home dir (mirrors config.go's rootify). Absolute +// paths pass through; None stays None. +func rootifyPersistentStateDir(rootDir string, c *p2p.GigaRouterCommonConfig) { + if dir, ok := c.PersistentStateDir.Get(); ok && !filepath.IsAbs(dir) { + c.PersistentStateDir = utils.Some(filepath.Join(rootDir, dir)) + } +} + +// resolveMaxInboundFullnodePeers: None ⇒ default, Some(0) ⇒ reject all, +// Some(n) ⇒ n. The default lives in the config package so giga_router +// doesn't carry an operator-facing knob. +func resolveMaxInboundFullnodePeers(o utils.Option[uint64]) int { + if v, ok := o.Get(); ok { + return int(v) //nolint:gosec // bounded by maxInboundFullnodePeers in giga_router_common + } + return config.DefaultMaxInboundFullnodePeers +} + +// genesisMaxGas returns consensus_params.block.max_gas as uint64. Errors +// when missing or <= 0 (CometBFT uses -1 for "unlimited" which neither +// path supports). +func genesisMaxGas(genDoc *types.GenesisDoc) (uint64, error) { + if genDoc.ConsensusParams == nil || genDoc.ConsensusParams.Block.MaxGas <= 0 { + return 0, fmt.Errorf("%w (got %v)", ErrGenesisMaxGasInvalid, genDoc.ConsensusParams) + } + return uint64(genDoc.ConsensusParams.Block.MaxGas), nil //nolint:gosec // validated > 0 above +} + +// buildFullnodeGigaConfig assembles the common config for a fullnode +// GigaRouter. No consensus/producer/mempool — fullnodes pull blocks rather +// than producing them and forward every EVM tx to the shard owner. +func buildFullnodeGigaConfig( + autobahnConfigFile string, + app *proxy.Proxy, + genDoc *types.GenesisDoc, +) (*p2p.GigaRouterCommonConfig, error) { + fc, validatorAddrs, err := loadAutobahnCommittee(autobahnConfigFile) + if err != nil { + return nil, err + } + // MaxGasEstimatedPerBlock reads through to genDoc; validate the source + // so a malformed genesis can't silently expose 0 to clients. + if _, err := genesisMaxGas(genDoc); err != nil { + return nil, err + } + return &p2p.GigaRouterCommonConfig{ + DialInterval: time.Duration(fc.DialInterval), + ValidatorAddrs: validatorAddrs, + PersistentStateDir: fc.PersistentStateDir, + App: app, + GenDoc: genDoc, + MaxInboundFullnodePeers: resolveMaxInboundFullnodePeers(fc.MaxInboundFullnodePeers), }, nil } @@ -353,31 +470,19 @@ func createRouter( for _, p := range tmstrings.SplitAndTrimEmpty(cfg.P2P.UnconditionalPeerIDs, ",", " ") { options.UnconditionalPeers = append(options.UnconditionalPeers, types.NodeID(p)) } - // Wire up Autobahn (GigaRouter) if enabled. + // Wire up Autobahn if enabled. Role dispatch (validator vs fullnode) + // happens inside buildGigaRouter based on cfg.Mode. if cfg.AutobahnConfigFile != "" { - logger.Info("Autobahn config enabled", "config_file", cfg.AutobahnConfigFile) - // TODO: add support for autobahn non-validator (observer) nodes that don't need a signing key. - valKey, ok := validatorKey.Get() - if !ok { - return nil, closer, fmt.Errorf("autobahn non-validator nodes are not supported yet; a local validator key is required") - } - app, ok := app.Get() + logger.Info("Autobahn config enabled", "config_file", cfg.AutobahnConfigFile, "mode", cfg.Mode) + proxyApp, ok := app.Get() if !ok { return nil, closer, fmt.Errorf("autobahn requires app") } - gigaCfg, err := buildGigaConfig(cfg.AutobahnConfigFile, nodeKey, valKey, app, genDoc) + giga, err := buildGigaRouter(cfg, nodeKey, validatorKey, proxyApp, genDoc) if err != nil { - return nil, closer, fmt.Errorf("buildGigaConfig: %w", err) - } - // Resolve a relative persistent_state_dir against the node's --home dir, - // matching how other paths in the tendermint config are handled - // (config.go's rootify). Absolute paths pass through unchanged. None - // means the operator opted into in-memory-only mode and stays None. - if dir, ok := gigaCfg.Consensus.PersistentStateDir.Get(); ok && !filepath.IsAbs(dir) { - gigaCfg.Consensus.PersistentStateDir = utils.Some(filepath.Join(cfg.RootDir, dir)) + return nil, closer, err } - logger.Info("Autobahn config loaded", "validators", len(gigaCfg.ValidatorAddrs)) - options.Giga = utils.Some(gigaCfg) + options.Giga = utils.Some(giga) } peerDB, err := dbProvider(&config.DBContext{ID: "peerstore", Config: cfg}) diff --git a/sei-tendermint/node/setup_test.go b/sei-tendermint/node/setup_test.go index 97b02ca9ce..375a0098fa 100644 --- a/sei-tendermint/node/setup_test.go +++ b/sei-tendermint/node/setup_test.go @@ -2,6 +2,7 @@ package node import ( "encoding/json" + "net/url" "os" "path/filepath" "testing" @@ -40,6 +41,7 @@ func makeValidator(valSeed, nodeSeed []byte, address string) config.AutobahnVali ValidatorKey: valKey.Public(), NodeKey: nodeKey.Public(), Address: hp, + EVMRPC: config.URL{URL: utils.OrPanic1(url.Parse("http://" + address))}, } } @@ -86,7 +88,7 @@ func TestBuildGigaConfig_EmptyPathErrors(t *testing.T) { nodeKey := makeTestNodeKey([]byte("test-node-key")) valKey := makeTestValidatorKey([]byte("val-seed")) txMempool, genDoc := makeTestGigaDeps() - _, err := buildGigaConfig("", nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig("", nodeKey, valKey, txMempool, genDoc) assert.Error(t, err, "empty path should error") } @@ -112,23 +114,21 @@ func TestBuildGigaConfig_EnabledWithValidators(t *testing.T) { valKey := makeTestValidatorKey([]byte("val1-seed")) txMempool, genDoc := makeTestGigaDeps() - result, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + result, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) require.NoError(t, err) require.NotNil(t, result) assert.Len(t, result.ValidatorAddrs, 3) assert.Equal(t, 5*time.Second, result.DialInterval) - // Consensus config. - require.NotNil(t, result.Consensus) - assert.Equal(t, 3*time.Second, result.Consensus.ViewTimeout(atypes.View{})) - persistDir, ok := result.Consensus.PersistentStateDir.Get() + assert.Equal(t, 3*time.Second, result.ViewTimeout(atypes.View{})) + persistDir, ok := result.PersistentStateDir.Get() require.True(t, ok) assert.Equal(t, "/tmp/autobahn-state", persistDir) - // Verify the consensus key is derived from the validator key, not the node key. + // Verify the validator key is derived from the validator-key seed, not the node key. expectedValPub := makeTestValidatorKey([]byte("val1-seed")).Public() - assert.Equal(t, expectedValPub, result.Consensus.Key.Public()) + assert.Equal(t, expectedValPub, result.ValidatorKey.Public()) // Producer config. require.NotNil(t, result.Producer) @@ -151,8 +151,9 @@ func TestBuildGigaConfig_NoneMaxTxsPerSecond(t *testing.T) { valKey := makeTestValidatorKey([]byte("val-seed")) txMempool, genDoc := makeTestGigaDeps() - result, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + result, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) require.NoError(t, err) + require.NotNil(t, result.Producer) assert.False(t, result.Producer.MaxTxsPerSecond.IsPresent()) } @@ -164,9 +165,9 @@ func TestBuildGigaConfig_NonePersistentStateDir(t *testing.T) { valKey := makeTestValidatorKey([]byte("val-seed")) txMempool, genDoc := makeTestGigaDeps() - result, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + result, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) require.NoError(t, err) - assert.False(t, result.Consensus.PersistentStateDir.IsPresent()) + assert.False(t, result.PersistentStateDir.IsPresent()) } func TestBuildGigaConfig_InvalidConfigFile(t *testing.T) { @@ -175,21 +176,21 @@ func TestBuildGigaConfig_InvalidConfigFile(t *testing.T) { txMempool, genDoc := makeTestGigaDeps() t.Run("missing file", func(t *testing.T) { - _, err := buildGigaConfig("/nonexistent/autobahn.json", nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig("/nonexistent/autobahn.json", nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) }) t.Run("invalid json", func(t *testing.T) { path := filepath.Join(t.TempDir(), "bad.json") require.NoError(t, os.WriteFile(path, []byte("not json"), 0644)) - _, err := buildGigaConfig(path, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(path, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) }) t.Run("empty validators", func(t *testing.T) { fc := defaultFileConfig([]config.AutobahnValidator{}) cfgFile := writeAutobahnConfig(t, fc) - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) assert.Contains(t, err.Error(), "validators must not be empty") }) @@ -206,21 +207,21 @@ func TestBuildGigaConfig_GenesisMaxGas(t *testing.T) { t.Run("nil ConsensusParams", func(t *testing.T) { txMempool, genDoc := makeTestGigaDeps() genDoc.ConsensusParams = nil - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.ErrorIs(t, err, ErrGenesisMaxGasInvalid) }) t.Run("zero MaxGas", func(t *testing.T) { txMempool, genDoc := makeTestGigaDeps() genDoc.ConsensusParams.Block.MaxGas = 0 - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.ErrorIs(t, err, ErrGenesisMaxGasInvalid) }) t.Run("negative MaxGas", func(t *testing.T) { txMempool, genDoc := makeTestGigaDeps() genDoc.ConsensusParams.Block.MaxGas = -1 - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.ErrorIs(t, err, ErrGenesisMaxGasInvalid) }) } @@ -236,7 +237,7 @@ func TestBuildGigaConfig_DuplicateValidatorKey(t *testing.T) { valKey := makeTestValidatorKey([]byte("val-seed")) txMempool, genDoc := makeTestGigaDeps() - _, err := buildGigaConfig(path, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(path, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) assert.Contains(t, err.Error(), "duplicate validator key") } @@ -252,7 +253,7 @@ func TestBuildGigaConfig_DuplicateNodeKey(t *testing.T) { valKey := makeTestValidatorKey([]byte("val1")) txMempool, genDoc := makeTestGigaDeps() - _, err := buildGigaConfig(path, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(path, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) assert.Contains(t, err.Error(), "duplicate node key") } @@ -264,7 +265,7 @@ func TestBuildGigaConfig_SelfNotInValidators(t *testing.T) { valKey := makeTestValidatorKey([]byte("my-val")) txMempool, genDoc := makeTestGigaDeps() - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) assert.Contains(t, err.Error(), "validator key not found") } @@ -277,7 +278,7 @@ func TestBuildGigaConfig_NodeKeyMismatch(t *testing.T) { valKey := makeTestValidatorKey([]byte("my-val")) txMempool, genDoc := makeTestGigaDeps() - _, err := buildGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) + _, err := buildValidatorGigaConfig(cfgFile, nodeKey, valKey, txMempool, genDoc) assert.Error(t, err) assert.Contains(t, err.Error(), "node key mismatch") } diff --git a/sei-tendermint/rpc/client/local/local.go b/sei-tendermint/rpc/client/local/local.go index ec8b43c25a..8f85290b45 100644 --- a/sei-tendermint/rpc/client/local/local.go +++ b/sei-tendermint/rpc/client/local/local.go @@ -107,7 +107,14 @@ func (c *Local) CheckTx(ctx context.Context, tx types.Tx) (*coretypes.ResultChec func (c *Local) EvmNextPendingNonce(addr common.Address) uint64 { if giga, ok := c.Environment.Router.Giga().Get(); ok { - return giga.Mempool().EvmNextPendingNonce(addr) + if v, ok := giga.Mempool().Get(); ok { + return v.EvmNextPendingNonce(addr) + } + // Fullnode: no local mempool; the pending nonce lives on the + // shard owner. Returning 0 here defers to the on-chain confirmed + // nonce; callers that need the pending value should query the + // shard owner's EVM RPC directly via EvmProxy. + return 0 } if mp, ok := c.Mempool.Get(); ok { return mp.EvmNextPendingNonce(addr)