diff --git a/app/abci.go b/app/abci.go index bcc8c3cbb8..af75430e7e 100644 --- a/app/abci.go +++ b/app/abci.go @@ -5,6 +5,9 @@ import ( "crypto/sha256" "time" + "go.opentelemetry.io/otel/attribute" + otelmetrics "go.opentelemetry.io/otel/metric" + "github.com/sei-protocol/sei-chain/app/legacyabci" "github.com/sei-protocol/sei-chain/sei-cosmos/tasks" "github.com/sei-protocol/sei-chain/sei-cosmos/telemetry" @@ -13,7 +16,7 @@ import ( "github.com/sei-protocol/sei-chain/sei-cosmos/types/legacytm" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" ) func (app *App) BeginBlock( @@ -27,14 +30,18 @@ func (app *App) BeginBlock( defer beginBlockSpan.End() ctx = ctx.WithTraceSpanContext(spanCtx) ctx = ctx.WithEventManager(sdk.NewEventManager()) - defer telemetry.MeasureSince(time.Now(), "abci", "begin_block") + beginBlockStart := time.Now() + defer func() { + telemetry.MeasureSince(beginBlockStart, "abci", "begin_block") // TODO(PLT-327): remove once app_abci_begin_block_duration_seconds verified + appMetrics.beginBlockDuration.Record(ctx.Context(), time.Since(beginBlockStart).Seconds()) + }() // inline begin block if checkHeight { if err := app.ValidateHeight(height); err != nil { panic(err) } } - metrics.GaugeSeidVersionAndCommit(app.versionInfo.Version, app.versionInfo.GitCommit) + utilmetrics.GaugeSeidVersionAndCommit(app.versionInfo.Version, app.versionInfo.GitCommit) // TODO(PLT-327): remove once app_build_info observable gauge verified // check if we've reached a target height, if so, execute any applicable handlers if app.forkInitializer != nil { app.forkInitializer(ctx) @@ -59,10 +66,18 @@ func (app *App) EndBlock(ctx sdk.Context, height int64, blockGasUsed int64) (res spanCtx, span := app.GetBaseApp().TracingInfo.StartWithContext("EndBlock", ctx.TraceSpanContext()) defer span.End() ctx = ctx.WithTraceSpanContext(spanCtx) - defer telemetry.MeasureSince(time.Now(), "abci", "end_block") + endBlockStart := time.Now() + defer func() { + telemetry.MeasureSince(endBlockStart, "abci", "end_block") // TODO(PLT-327): remove once app_abci_end_block_duration_seconds verified + appMetrics.endBlockDuration.Record(ctx.Context(), time.Since(endBlockStart).Seconds()) + }() ctx = ctx.WithEventManager(sdk.NewEventManager()) - defer telemetry.MeasureSince(time.Now(), "module", "total_end_block") + moduleEndBlockStart := time.Now() + defer func() { + telemetry.MeasureSince(moduleEndBlockStart, "module", "total_end_block") // TODO(PLT-327): remove once app_abci_module_end_block_duration_seconds verified + }() res.ValidatorUpdates = legacyabci.EndBlock(ctx, height, blockGasUsed, app.EndBlockKeepers) + appMetrics.moduleEndBlockDuration.Record(ctx.Context(), time.Since(moduleEndBlockStart).Seconds()) res.Events = sdk.MarkEventsToIndex(ctx.EventManager().ABCIEvents(), app.IndexEvents) if cp := app.GetConsensusParams(ctx); cp != nil { res.ConsensusParamUpdates = legacytm.ABCIToLegacyConsensusParams(cp) @@ -83,7 +98,11 @@ func (app *App) CheckTx(ctx context.Context, req *abci.RequestCheckTxV2) *abci.R } _, span := app.GetBaseApp().TracingInfo.StartWithContext("CheckTx", ctx) defer span.End() - defer telemetry.MeasureSince(time.Now(), "abci", "check_tx") + checkTxStart := time.Now() + defer func() { + telemetry.MeasureSince(checkTxStart, "abci", "check_tx") // TODO(PLT-327): remove once app_abci_check_tx_duration_seconds verified + appMetrics.checkTxDuration.Record(ctx, time.Since(checkTxStart).Seconds()) + }() sdkCtx := app.GetCheckTxContext(req.Tx, req.Type == abci.CheckTxTypeV2Recheck) tx, err := app.txDecoder(req.Tx) if err != nil { @@ -121,22 +140,30 @@ func (app *App) CheckTx(ctx context.Context, req *abci.RequestCheckTxV2) *abci.R } func (app *App) DeliverTx(ctx sdk.Context, req abci.RequestDeliverTxV2, tx sdk.Tx, checksum [32]byte) abci.ResponseDeliverTx { - defer metrics.MeasureDeliverTxDuration(time.Now()) + deliverTxStart := time.Now() // ensure we carry the initial context from tracer here spanCtx, span := app.GetBaseApp().TracingInfo.StartWithContext("DeliverTx", ctx.TraceSpanContext()) defer span.End() // update context with trace span new context ctx = ctx.WithTraceSpanContext(spanCtx) - defer telemetry.MeasureSince(time.Now(), "abci", "deliver_tx") + defer func() { + utilmetrics.MeasureDeliverTxDuration(deliverTxStart) // TODO(PLT-327): remove once app_abci_deliver_tx_duration_seconds verified + telemetry.MeasureSince(deliverTxStart, "abci", "deliver_tx") // TODO(PLT-327): remove once app_abci_deliver_tx_duration_seconds verified + appMetrics.deliverTxDuration.Record(ctx.Context(), time.Since(deliverTxStart).Seconds()) + }() gInfo := sdk.GasInfo{} resultStr := "successful" defer func() { - telemetry.IncrCounter(1, "tx", "count") - telemetry.IncrCounter(1, "tx", resultStr) - telemetry.SetGauge(float32(gInfo.GasUsed), "tx", "gas", "used") - telemetry.SetGauge(float32(gInfo.GasWanted), "tx", "gas", "wanted") + telemetry.IncrCounter(1, "tx", "count") // TODO(PLT-327): remove once app_tx_count_total verified + telemetry.IncrCounter(1, "tx", resultStr) // TODO(PLT-327): remove once app_tx_count_total verified + telemetry.SetGauge(float32(gInfo.GasUsed), "tx", "gas", "used") // TODO(PLT-327): remove once app_tx_gas_used verified + telemetry.SetGauge(float32(gInfo.GasWanted), "tx", "gas", "wanted") // TODO(PLT-327): remove once app_tx_gas_wanted verified + appMetrics.txCount.Add(ctx.Context(), 1) + appMetrics.txCount.Add(ctx.Context(), 1, otelmetrics.WithAttributes(attribute.String("result", resultStr))) + appMetrics.txGasUsed.Record(ctx.Context(), int64(gInfo.GasUsed)) //nolint:gosec + appMetrics.txGasWanted.Record(ctx.Context(), int64(gInfo.GasWanted)) //nolint:gosec }() gInfo, result, anteEvents, resCtx, err := legacyabci.DeliverTx(ctx.WithTxBytes(req.Tx).WithTxSum(checksum), tx, app.GetTxConfig(), &app.DeliverTxKeepers, checksum, func(ctx sdk.Context) (sdk.Context, sdk.CacheMultiStore) { return app.CacheTxContext(ctx, checksum) @@ -177,7 +204,11 @@ func (app *App) DeliverTx(ctx sdk.Context, req abci.RequestDeliverTxV2, tx sdk.T // DeliverTxBatch is not part of the ABCI specification, but this is here for code convention func (app *App) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) { - defer metrics.MeasureDeliverBatchTxDuration(time.Now()) + deliverBatchStart := time.Now() + defer func() { + utilmetrics.MeasureDeliverBatchTxDuration(deliverBatchStart) // TODO(PLT-327): remove once app_abci_deliver_batch_tx_duration_seconds verified + appMetrics.deliverBatchTxDuration.Record(ctx.Context(), time.Since(deliverBatchStart).Seconds()) + }() spanCtx, span := app.GetBaseApp().TracingInfo.StartWithContext("DeliverTxBatch", ctx.TraceSpanContext()) defer span.End() // update context with trace span new context @@ -207,6 +238,9 @@ func (app *App) Commit(ctx context.Context) (res *abci.ResponseCommit, err error defer span.End() start := time.Now() res, err = app.BaseApp.Commit(ctx) - app.RecordBenchmarkCommitTime(time.Since(start)) + elapsed := time.Since(start) + // legacy: telemetry.MeasureSince in sei-cosmos/baseapp/abci.go TODO(PLT-327) + appMetrics.commitDuration.Record(ctx, elapsed.Seconds()) + app.RecordBenchmarkCommitTime(elapsed) return res, err } diff --git a/app/ante/evm_checktx.go b/app/ante/evm_checktx.go index 02a0e53dc8..960479571c 100644 --- a/app/ante/evm_checktx.go +++ b/app/ante/evm_checktx.go @@ -21,6 +21,9 @@ import ( upgradekeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/keeper" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "github.com/sei-protocol/sei-chain/utils" "github.com/sei-protocol/sei-chain/utils/helpers" "github.com/sei-protocol/sei-chain/utils/metrics" @@ -297,14 +300,16 @@ func CheckNonce(ctx sdk.Context, latestCtxGetter func() sdk.Context, ek *evmkeep ctx = ctx.WithCheckTxCallback(func(priority int64) { txHash := tmtypes.Tx(ctx.TxBytes()).Hash() ek.AddPendingNonce(txHash, evmAddr, etx.Nonce(), priority) - metrics.IncrementPendingNonce("added") + metrics.IncrementPendingNonce("added") // TODO(PLT-327): remove once app_pending_nonce_total verified + appAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "added"))) }) // if the mempool expires a transaction, this handler is invoked ctx = ctx.WithExpireTxHandler(func() { txHash := tmtypes.Tx(ctx.TxBytes()).Hash() ek.RemovePendingNonce(txHash) - metrics.IncrementPendingNonce("expired") + metrics.IncrementPendingNonce("expired") // TODO(PLT-327): remove once app_pending_nonce_total verified + appAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "expired"))) }) if txNonce > nextNonce { @@ -323,7 +328,8 @@ func CheckNonce(ctx sdk.Context, latestCtxGetter func() sdk.Context, ek *evmkeep if txNonce < nextNonceToBeMined { // this nonce has already been mined, we cannot accept it again - metrics.IncrementPendingNonce("rejected") + metrics.IncrementPendingNonce("rejected") // TODO(PLT-327): remove once app_pending_nonce_total verified + appAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "rejected"))) return abci.Rejected } else if txNonce < nextPendingNonce { // check if the sender still has enough funds to pay for gas @@ -335,7 +341,8 @@ func CheckNonce(ctx sdk.Context, latestCtxGetter func() sdk.Context, ek *evmkeep // this nonce is allowed to process as it is part of the // consecutive nonces from nextNonceToBeMined to nextPendingNonce // This logic allows multiple nonces from an account to be processed in a block. - metrics.IncrementPendingNonce("accepted") + metrics.IncrementPendingNonce("accepted") // TODO(PLT-327): remove once app_pending_nonce_total verified + appAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "accepted"))) return abci.Accepted } return abci.Pending diff --git a/app/ante/metrics.go b/app/ante/metrics.go new file mode 100644 index 0000000000..44c1a0edd9 --- /dev/null +++ b/app/ante/metrics.go @@ -0,0 +1,36 @@ +package ante + +import ( + "sync" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +type anteMetrics struct { + mu sync.Mutex + initialized bool + + pendingNonce metric.Int64Counter +} + +var appAnteMetrics anteMetrics + +// InitAnteMetrics registers all OTel instruments for the ante package. +// Safe to call concurrently; instruments are registered exactly once. +func InitAnteMetrics() { + appAnteMetrics.mu.Lock() + defer appAnteMetrics.mu.Unlock() + if appAnteMetrics.initialized { + return + } + meter := otel.Meter("app_ante") + var err error + if appAnteMetrics.pendingNonce, err = meter.Int64Counter( + "app_pending_nonce_total", + metric.WithDescription("Pending nonce events by type (added, expired, rejected, accepted)"), + ); err != nil { + panic("ante metrics: " + err.Error()) + } + appAnteMetrics.initialized = true +} diff --git a/app/ante/metrics_test.go b/app/ante/metrics_test.go new file mode 100644 index 0000000000..5124033f9a --- /dev/null +++ b/app/ante/metrics_test.go @@ -0,0 +1,25 @@ +package ante + +import ( + "context" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" +) + +func TestInitAnteMetricsNoPanic(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitAnteMetrics() +} + +func TestAnteMetricsPendingNonceAllEvents(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitAnteMetrics() + ctx := context.Background() + for _, event := range []string{"added", "expired", "rejected", "accepted"} { + appAnteMetrics.pendingNonce.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("event", event))) + } +} diff --git a/app/app.go b/app/app.go index 6bcef5afab..633da2e129 100644 --- a/app/app.go +++ b/app/app.go @@ -138,8 +138,9 @@ import ( tmproto "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" wasmkeeper "github.com/sei-protocol/sei-chain/sei-wasmd/x/wasm/keeper" + "github.com/sei-protocol/sei-chain/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/sei-chain/wasmbinding" epochmodule "github.com/sei-protocol/sei-chain/x/epoch" epochmodulekeeper "github.com/sei-protocol/sei-chain/x/epoch/keeper" @@ -165,6 +166,7 @@ import ( "github.com/spf13/cast" dbm "github.com/tendermint/tm-db" "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" // this line is used by starport scaffolding # stargate/app/moduleImport @@ -488,9 +490,11 @@ func New( bApp.SetInterfaceRegistry(interfaceRegistry) // Bind OTEL metrics provider once at application construction - if err := metrics.SetupOtelMetricsProvider(); err != nil { + if err := utilmetrics.SetupOtelMetricsProvider(); err != nil { logger.Error(err.Error()) } + initAppMetrics() + appante.InitAnteMetrics() keys := sdk.NewKVStoreKeys( authtypes.StoreKey, authzkeeper.StoreKey, banktypes.StoreKey, stakingtypes.StoreKey, @@ -1217,7 +1221,9 @@ func (app *App) ProcessProposalHandler(ctx sdk.Context, req *abci.RequestProcess // Invariant: at this point nil entries in typedTxs are proto decode failures only. // EVM preprocessing runs later inside ProcessBlock; do not reorder that before this check. if !app.checkTotalBlockGas(checkCtx, typedTxs) { - metrics.IncrFailedTotalGasWantedCheck(string(req.Header.ProposerAddress)) + utilmetrics.IncrFailedTotalGasWantedCheck(string(req.Header.ProposerAddress)) // TODO(PLT-327): remove once app_failed_total_gas_wanted_check_total verified + appMetrics.failedGasWantedCheck.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("proposer", string(req.Header.ProposerAddress)))) return &abci.ResponseProcessProposal{ Status: abci.ResponseProcessProposal_REJECT, }, nil @@ -1318,19 +1324,23 @@ func (app *App) FinalizeBlocker(ctx sdk.Context, req *abci.RequestFinalizeBlock) app.optimisticProcessingInfoMutex.RUnlock() if !aborted && bytes.Equal(finalHash, req.Hash) { - metrics.IncrementOptimisticProcessingCounter(true) + utilmetrics.IncrementOptimisticProcessingCounter(true) // TODO(PLT-327): remove once app_optimistic_processing_total verified + appMetrics.optimisticProcessing.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("enabled", "true"))) app.SetProcessProposalStateToCommit() if app.EvmKeeper.EthReplayConfig.Enabled || app.EvmKeeper.EthBlockTestConfig.Enabled { return &abci.ResponseFinalizeBlock{}, nil } cms := app.WriteState() - app.LightInvarianceChecks(cms, app.lightInvarianceConfig) + app.LightInvarianceChecks(ctx.Context(), cms, app.lightInvarianceConfig) appHash := app.GetWorkingHash() resp := app.getFinalizeBlockResponse(appHash, events, txRes, endBlockResp) return &resp, nil } } - metrics.IncrementOptimisticProcessingCounter(false) + utilmetrics.IncrementOptimisticProcessingCounter(false) // TODO(PLT-327): remove once app_optimistic_processing_total verified + appMetrics.optimisticProcessing.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("enabled", "false"))) logger.Info("optimistic processing ineligible") bpreq := &BlockProcessRequest{ Hash: req.Hash, @@ -1349,7 +1359,7 @@ func (app *App) FinalizeBlocker(ctx sdk.Context, req *abci.RequestFinalizeBlock) return &abci.ResponseFinalizeBlock{}, nil } cms := app.WriteState() - app.LightInvarianceChecks(cms, app.lightInvarianceConfig) + app.LightInvarianceChecks(ctx.Context(), cms, app.lightInvarianceConfig) appHash := app.GetWorkingHash() resp := app.getFinalizeBlockResponse(appHash, events, txResults, endBlockResp) return &resp, nil @@ -1384,8 +1394,12 @@ func (app *App) DeliverTxWithResult(ctx sdk.Context, tx []byte, typedTx sdk.Tx) if !skipMetrics { // Record metrics for non-gasless transactions - metrics.IncrGasCounter("gas_used", deliverTxResp.GasUsed) - metrics.IncrGasCounter("gas_wanted", deliverTxResp.GasWanted) + utilmetrics.IncrGasCounter("gas_used", deliverTxResp.GasUsed) // TODO(PLT-327): remove once app_tx_gas_total verified + utilmetrics.IncrGasCounter("gas_wanted", deliverTxResp.GasWanted) // TODO(PLT-327): remove once app_tx_gas_total verified + appMetrics.txGas.Add(ctx.Context(), deliverTxResp.GasUsed, + otelmetric.WithAttributes(attribute.String("type", "gas_used"))) + appMetrics.txGas.Add(ctx.Context(), deliverTxResp.GasWanted, + otelmetric.WithAttributes(attribute.String("type", "gas_wanted"))) } return &abci.ExecTxResult{ @@ -1402,20 +1416,32 @@ func (app *App) DeliverTxWithResult(ctx sdk.Context, tx []byte, typedTx sdk.Tx) } func (app *App) ProcessTxsSynchronousV2(ctx sdk.Context, txs [][]byte, typedTxs []sdk.Tx) []*abci.ExecTxResult { - defer metrics.BlockProcessLatency(time.Now(), metrics.SYNCHRONOUS) + blockProcessStart := time.Now() + defer func() { + utilmetrics.BlockProcessLatency(blockProcessStart, utilmetrics.SYNCHRONOUS) // TODO(PLT-327): remove once app_block_process_duration_seconds verified + appMetrics.blockProcessDuration.Record(ctx.Context(), time.Since(blockProcessStart).Seconds(), + otelmetric.WithAttributes(attribute.String("type", utilmetrics.SYNCHRONOUS))) + }() txResults := make([]*abci.ExecTxResult, 0, len(txs)) for i, tx := range txs { ctx = ctx.WithTxIndex(i) res := app.DeliverTxWithResult(ctx, tx, typedTxs[i]) txResults = append(txResults, res) - metrics.IncrTxProcessTypeCounter(metrics.SYNCHRONOUS) + utilmetrics.IncrTxProcessTypeCounter(utilmetrics.SYNCHRONOUS) // TODO(PLT-327): remove once app_tx_process_type_total verified + appMetrics.txProcessType.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("type", utilmetrics.SYNCHRONOUS))) } return txResults } func (app *App) ProcessTxsSynchronousGiga(ctx sdk.Context, txs [][]byte, typedTxs []sdk.Tx) []*abci.ExecTxResult { - defer metrics.BlockProcessLatency(time.Now(), metrics.SYNCHRONOUS) + blockProcessGigaStart := time.Now() + defer func() { + utilmetrics.BlockProcessLatency(blockProcessGigaStart, utilmetrics.SYNCHRONOUS) // TODO(PLT-327): remove once app_block_process_duration_seconds verified + appMetrics.blockProcessDuration.Record(ctx.Context(), time.Since(blockProcessGigaStart).Seconds(), + otelmetric.WithAttributes(attribute.String("type", utilmetrics.SYNCHRONOUS))) + }() ms := ctx.MultiStore().CacheMultiStore() defer ms.Write() @@ -1500,7 +1526,9 @@ func (app *App) ProcessTxsSynchronousGiga(ctx sdk.Context, txs [][]byte, typedTx txResults[i] = result ctx.GigaMultiStore().WriteGiga() - metrics.IncrTxProcessTypeCounter(metrics.SYNCHRONOUS) + utilmetrics.IncrTxProcessTypeCounter(utilmetrics.SYNCHRONOUS) // TODO(PLT-327): remove once app_tx_process_type_total verified + appMetrics.txProcessType.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("type", utilmetrics.SYNCHRONOUS))) } return txResults @@ -1549,7 +1577,9 @@ func (app *App) ProcessTXsWithOCCV2(ctx sdk.Context, txs [][]byte, typedTxs []sd execResults := make([]*abci.ExecTxResult, 0, len(batchResult.Results)) for i, r := range batchResult.Results { - metrics.IncrTxProcessTypeCounter(metrics.OCC_CONCURRENT) + utilmetrics.IncrTxProcessTypeCounter(utilmetrics.OCC_CONCURRENT) // TODO(PLT-327): remove once app_tx_process_type_total verified + appMetrics.txProcessType.Add(ctx.Context(), 1, + otelmetric.WithAttributes(attribute.String("type", utilmetrics.OCC_CONCURRENT))) // Check if transaction is gasless before recording gas metrics var recordGasMetrics = true @@ -1575,8 +1605,12 @@ func (app *App) ProcessTXsWithOCCV2(ctx sdk.Context, txs [][]byte, typedTxs []sd } if recordGasMetrics { - metrics.IncrGasCounter("gas_used", r.Response.GasUsed) - metrics.IncrGasCounter("gas_wanted", r.Response.GasWanted) + utilmetrics.IncrGasCounter("gas_used", r.Response.GasUsed) // TODO(PLT-327): remove once app_tx_gas_total verified + utilmetrics.IncrGasCounter("gas_wanted", r.Response.GasWanted) // TODO(PLT-327): remove once app_tx_gas_total verified + appMetrics.txGas.Add(ctx.Context(), r.Response.GasUsed, + otelmetric.WithAttributes(attribute.String("type", "gas_used"))) + appMetrics.txGas.Add(ctx.Context(), r.Response.GasWanted, + otelmetric.WithAttributes(attribute.String("type", "gas_wanted"))) } execResults = append(execResults, &abci.ExecTxResult{ @@ -1655,7 +1689,8 @@ func (app *App) ProcessTXsWithOCCGiga(ctx sdk.Context, txs [][]byte, typedTxs [] } if fallbackToV2 { - metrics.IncrGigaFallbackToV2Counter() + utilmetrics.IncrGigaFallbackToV2Counter() // TODO(PLT-327): remove once app_giga_fallback_to_v2_total verified + appMetrics.gigaFallback.Add(ctx.Context(), 1) // Discard all EVM changes by skipping cache writes, then re-run all txs via DeliverTx. evmBatchResult = nil v2Entries = make([]*sdk.DeliverTxEntry, len(txs)) diff --git a/app/invariance.go b/app/invariance.go index c8def4baf8..b7ea172465 100644 --- a/app/invariance.go +++ b/app/invariance.go @@ -1,13 +1,16 @@ package app import ( + "context" "fmt" "time" - "github.com/armon/go-metrics" + armonmetrics "github.com/armon/go-metrics" servertypes "github.com/sei-protocol/sei-chain/sei-cosmos/server/types" "github.com/sei-protocol/sei-chain/sei-cosmos/storev2/commitment" "github.com/spf13/cast" + "go.opentelemetry.io/otel/attribute" + otelmetrics "go.opentelemetry.io/otel/metric" sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" @@ -40,22 +43,23 @@ func ReadLightInvarianceConfig(opts servertypes.AppOptions) (LightInvarianceConf return cfg, nil } -func (app *App) LightInvarianceChecks(cms sdk.CommitMultiStore, config LightInvarianceConfig) { +func (app *App) LightInvarianceChecks(ctx context.Context, cms sdk.CommitMultiStore, config LightInvarianceConfig) { // Skip invariance checks when mock_balances is enabled since we fake balances // without updating the actual store, which would fail the supply check. if MockBalancesEnabled { return } if config.SupplyEnabled { - app.LightInvarianceTotalSupply(cms) + app.LightInvarianceTotalSupply(ctx, cms) } } -func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { - defer metrics.MeasureSince( - []string{"sei", "lightinvariance_supply", "milliseconds"}, - time.Now().UTC(), - ) +func (app *App) LightInvarianceTotalSupply(ctx context.Context, cms sdk.CommitMultiStore) { + invarianceStart := time.Now() + defer func() { + armonmetrics.MeasureSince([]string{"sei", "lightinvariance_supply", "milliseconds"}, invarianceStart.UTC()) // TODO(PLT-327): remove once app_lightinvariance_supply_duration_seconds verified + appMetrics.invarianceDuration.Record(ctx, time.Since(invarianceStart).Seconds()) + }() ckv, ok := cms.GetStore(app.BankKeeper.GetStoreKey()).(*commitment.Store) if !ok { logger.Error("bank store is not a memiavl store; cannot run light invariance check") @@ -67,24 +71,16 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { for _, p := range balanceChangePairs { if len(p.Key) < 2 { // invalid key; ignore - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []metrics.Label{ - { - Name: "type", - Value: "sei", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []armonmetrics.Label{{Name: "type", Value: "sei"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_invalid_key_total verified + appMetrics.invarianceInvalidKey.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "sei"))) logger.Error("invalid changed pair key for usei", "key", fmt.Sprintf("%X", p.Key)) continue } addrLen := int(p.Key[1]) if len(p.Key) < addrLen+2 { // invalid key length; ignore - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []metrics.Label{ - { - Name: "type", - Value: "sei", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []armonmetrics.Label{{Name: "type", Value: "sei"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_invalid_key_total verified + appMetrics.invarianceInvalidKey.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "sei"))) logger.Error("invalid changed pair key for usei", "key", fmt.Sprintf("%X", p.Key)) continue } @@ -96,15 +92,8 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { if !p.Delete { var balance sdk.Coin if err := balance.Unmarshal(p.Value); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "usei", - }, { - Name: "step", - Value: "post_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "usei"}, {Name: "step", Value: "post_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "usei"), attribute.String("step", "post_block"))) logger.Error("failed to unmarshal balance", "err", err) continue } @@ -124,15 +113,8 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { } var balance sdk.Coin if err := balance.Unmarshal(val); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "usei", - }, { - Name: "step", - Value: "pre_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "usei"}, {Name: "step", Value: "pre_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "usei"), attribute.String("step", "pre_block"))) logger.Error("failed to unmarshal preblock balance", "err", err) continue } @@ -144,26 +126,15 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { for _, p := range weiChangePairs { var amt sdk.Int if len(p.Key) < 1 { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []metrics.Label{ - { - Name: "type", - Value: "wei", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "invalid_changed_key"}, 1, []armonmetrics.Label{{Name: "type", Value: "wei"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_invalid_key_total verified + appMetrics.invarianceInvalidKey.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "wei"))) logger.Error("invalid changed pair key", "key", fmt.Sprintf("%X", p.Key)) continue } if !p.Delete { if err := amt.Unmarshal(p.Value); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "wei", - }, { - Name: "step", - Value: "post_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "wei"}, {Name: "step", Value: "post_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "wei"), attribute.String("step", "post_block"))) logger.Error("failed to unmarshal wei balance", "err", err) continue } @@ -183,15 +154,8 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { } var amt sdk.Int if err := amt.Unmarshal(val); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "wei", - }, { - Name: "step", - Value: "pre_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "wei"}, {Name: "step", Value: "pre_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "wei"), attribute.String("step", "pre_block"))) logger.Error("failed to unmarshal preblock wei balance", "err", err) continue } @@ -203,15 +167,8 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { if bz := ckv.Get(append(banktypes.SupplyKey, []byte(sdk.MustGetBaseDenom())...)); bz != nil { var amt sdk.Int if err := amt.Unmarshal(bz); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "total_supply", - }, { - Name: "step", - Value: "pre_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "total_supply"}, {Name: "step", Value: "pre_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "total_supply"), attribute.String("step", "pre_block"))) logger.Error("failed to unmarshal pre total supply", "err", err) return } @@ -224,15 +181,8 @@ func (app *App) LightInvarianceTotalSupply(cms sdk.CommitMultiStore) { } else { var amt sdk.Int if err := amt.Unmarshal(p.Value); err != nil { - seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []metrics.Label{ - { - Name: "type", - Value: "total_supply", - }, { - Name: "step", - Value: "post_block", - }, - }) + seimetrics.SafeMetricsIncrCounterWithLabels([]string{"sei", "lightinvariance_supply", "unmarshal_failure"}, 1, []armonmetrics.Label{{Name: "type", Value: "total_supply"}, {Name: "step", Value: "post_block"}}) // TODO(PLT-327): remove once app_lightinvariance_supply_unmarshal_failure_total verified + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetrics.WithAttributes(attribute.String("type", "total_supply"), attribute.String("step", "post_block"))) logger.Error("failed to unmarshal total supply", "err", err) } else { supplyChanged = amt.Sub(preTotalSupply) diff --git a/app/invariance_test.go b/app/invariance_test.go index 3886f19bc6..9891ae0302 100644 --- a/app/invariance_test.go +++ b/app/invariance_test.go @@ -172,7 +172,9 @@ func TestLightInvarianceChecks(t *testing.T) { } a.BankKeeper.SetSupply(ctx, useiCoin(tt.postSupply)) a.SetDeliverStateToCommit() - f := func() { a.LightInvarianceChecks(a.WriteState(), app.LightInvarianceConfig{SupplyEnabled: true}) } + f := func() { + a.LightInvarianceChecks(t.Context(), a.WriteState(), app.LightInvarianceConfig{SupplyEnabled: true}) + } if tt.success { require.NotPanics(t, f) } else { diff --git a/app/metrics.go b/app/metrics.go new file mode 100644 index 0000000000..909e4380c3 --- /dev/null +++ b/app/metrics.go @@ -0,0 +1,209 @@ +package app + +import ( + "context" + "sync" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/sei-protocol/sei-chain/sei-cosmos/version" +) + +const appMeterName = "app" + +// histogramBuckets aligns with block-time SLO thresholds +// (p50 ≤ 500ms, p95 ≤ 1.5s, p99 ≤ 2.5s) expressed in seconds. +var histogramBuckets = metric.WithExplicitBucketBoundaries( + 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, +) + +type metrics struct { + mu sync.Mutex + initialized bool + + // ABCI phase durations + beginBlockDuration metric.Float64Histogram + endBlockDuration metric.Float64Histogram + moduleEndBlockDuration metric.Float64Histogram + checkTxDuration metric.Float64Histogram + deliverTxDuration metric.Float64Histogram + deliverBatchTxDuration metric.Float64Histogram + + // Commit duration + commitDuration metric.Float64Histogram + + // Block processing duration by execution type + blockProcessDuration metric.Float64Histogram + + // Per-tx counts and gas + txCount metric.Int64Counter + txProcessType metric.Int64Counter + txGas metric.Int64Counter + txGasUsed metric.Int64Gauge + txGasWanted metric.Int64Gauge + + // App-level flow counters + optimisticProcessing metric.Int64Counter + failedGasWantedCheck metric.Int64Counter + gigaFallback metric.Int64Counter + + // Light invariance check + invarianceDuration metric.Float64Histogram + invarianceInvalidKey metric.Int64Counter + invarianceUnmarshalFail metric.Int64Counter +} + +// appMetrics is the package-level OTel instrument set, initialized once in NewApp. +var appMetrics metrics + +func must[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} + +// initAppMetrics registers all OTel instruments for the app package. +// Safe to call concurrently; instruments are registered exactly once. +func initAppMetrics() { + appMetrics.mu.Lock() + defer appMetrics.mu.Unlock() + if appMetrics.initialized { + return + } + meter := otel.Meter(appMeterName) + + appMetrics.beginBlockDuration = must(meter.Float64Histogram( + "app_abci_begin_block_duration_seconds", + metric.WithDescription("Duration of ABCI BeginBlock"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.endBlockDuration = must(meter.Float64Histogram( + "app_abci_end_block_duration_seconds", + metric.WithDescription("Duration of ABCI EndBlock"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.moduleEndBlockDuration = must(meter.Float64Histogram( + "app_abci_module_end_block_duration_seconds", + metric.WithDescription("Duration of module EndBlock calls within ABCI EndBlock"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.checkTxDuration = must(meter.Float64Histogram( + "app_abci_check_tx_duration_seconds", + metric.WithDescription("Duration of ABCI CheckTx"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.deliverTxDuration = must(meter.Float64Histogram( + "app_abci_deliver_tx_duration_seconds", + metric.WithDescription("Duration of ABCI DeliverTx"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.deliverBatchTxDuration = must(meter.Float64Histogram( + "app_abci_deliver_batch_tx_duration_seconds", + metric.WithDescription("Duration of ABCI DeliverTxBatch"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.commitDuration = must(meter.Float64Histogram( + "app_abci_commit_duration_seconds", + metric.WithDescription("Duration of ABCI Commit (state write to disk)"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.blockProcessDuration = must(meter.Float64Histogram( + "app_block_process_duration_seconds", + metric.WithDescription("Duration of block tx processing by execution type"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.txCount = must(meter.Int64Counter( + "app_tx_count_total", + metric.WithDescription("Number of transactions delivered"), + )) + + appMetrics.txProcessType = must(meter.Int64Counter( + "app_tx_process_type_total", + metric.WithDescription("Transactions processed by execution type"), + )) + + appMetrics.txGas = must(meter.Int64Counter( + "app_tx_gas_total", + metric.WithDescription("Cumulative transaction gas by type (gas_used, gas_wanted)"), + )) + + appMetrics.txGasUsed = must(meter.Int64Gauge( + "app_tx_gas_used", + metric.WithDescription("Gas used by the most recently delivered transaction"), + )) + + appMetrics.txGasWanted = must(meter.Int64Gauge( + "app_tx_gas_wanted", + metric.WithDescription("Gas wanted by the most recently delivered transaction"), + )) + + appMetrics.optimisticProcessing = must(meter.Int64Counter( + "app_optimistic_processing_total", + metric.WithDescription("Optimistic processing attempts; enabled=true means cache hit, false means miss"), + )) + + appMetrics.failedGasWantedCheck = must(meter.Int64Counter( + "app_failed_total_gas_wanted_check_total", + metric.WithDescription("Proposals rejected because total block gas wanted exceeded max"), + )) + + appMetrics.gigaFallback = must(meter.Int64Counter( + "app_giga_fallback_to_v2_total", + metric.WithDescription("Times giga executor fell back to V2 processing"), + )) + + appMetrics.invarianceDuration = must(meter.Float64Histogram( + "app_lightinvariance_supply_duration_seconds", + metric.WithDescription("Duration of light invariance total supply check"), + metric.WithUnit("s"), + histogramBuckets, + )) + + appMetrics.invarianceInvalidKey = must(meter.Int64Counter( + "app_lightinvariance_supply_invalid_key_total", + metric.WithDescription("Invalid changed-pair keys detected during invariance check"), + )) + + appMetrics.invarianceUnmarshalFail = must(meter.Int64Counter( + "app_lightinvariance_supply_unmarshal_failure_total", + metric.WithDescription("Unmarshal failures during invariance supply check"), + )) + + // Build-info observable gauge replaces GaugeSeidVersionAndCommit (called per BeginBlock). + // The callback fires on every Prometheus scrape; no per-block call site is needed. + // TODO(PLT-327): remove metrics.GaugeSeidVersionAndCommit call in abci.go once app_build_info verified + vi := version.NewInfo() + _ = must(meter.Int64ObservableGauge( + "app_build_info", + metric.WithDescription("Running binary build info; value is always 1"), + metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error { + o.Observe(1, + metric.WithAttributes( + attribute.String("seid_version", vi.Version), + attribute.String("commit", vi.GitCommit), + ), + ) + return nil + }), + )) + appMetrics.initialized = true +} diff --git a/app/metrics_test.go b/app/metrics_test.go new file mode 100644 index 0000000000..a1f416cec7 --- /dev/null +++ b/app/metrics_test.go @@ -0,0 +1,48 @@ +package app + +import ( + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" +) + +func TestInitAppMetricsNoPanic(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + initAppMetrics() +} + +func TestAppMetricsAllInstrumentsUsable(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + initAppMetrics() + + ctx := t.Context() + + appMetrics.beginBlockDuration.Record(ctx, 0.1) + appMetrics.endBlockDuration.Record(ctx, 0.2) + appMetrics.moduleEndBlockDuration.Record(ctx, 0.15) + appMetrics.checkTxDuration.Record(ctx, 0.05) + appMetrics.deliverTxDuration.Record(ctx, 0.01) + appMetrics.deliverBatchTxDuration.Record(ctx, 0.1) + appMetrics.commitDuration.Record(ctx, 0.3) + appMetrics.blockProcessDuration.Record(ctx, 0.5) + + appMetrics.txCount.Add(ctx, 1) + appMetrics.txProcessType.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "sequential"))) + appMetrics.txGas.Add(ctx, 100, otelmetric.WithAttributes(attribute.String("type", "gas_used"))) + appMetrics.txGasUsed.Record(ctx, 1000) + appMetrics.txGasWanted.Record(ctx, 2000) + + appMetrics.optimisticProcessing.Add(ctx, 1, otelmetric.WithAttributes(attribute.Bool("enabled", true))) + appMetrics.failedGasWantedCheck.Add(ctx, 1) + appMetrics.gigaFallback.Add(ctx, 1) + + appMetrics.invarianceDuration.Record(ctx, 0.01) + appMetrics.invarianceInvalidKey.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "sei"))) + appMetrics.invarianceUnmarshalFail.Add(ctx, 1, otelmetric.WithAttributes( + attribute.String("type", "usei"), + attribute.String("step", "post_block"), + )) +}