diff --git a/README-INFRA-METRICS.md b/README-INFRA-METRICS.md new file mode 100644 index 00000000000..ba5d66848cf --- /dev/null +++ b/README-INFRA-METRICS.md @@ -0,0 +1,237 @@ +# TRON Prometheus Infrastructure Metrics + +**PR for [java-tron Issue #6590](https://github.com/tronprotocol/java-tron/issues/6590)** | +**Penn Blockchain Conference Hackathon 2026 — TRON Bounty 2** + +--- + +## What This PR Does + +Implements Prometheus metrics for empty block detection and SR set change monitoring, addressing critical operational blind spots in java-tron's monitoring infrastructure. + +## New Metrics Reference + +| Metric Name | Type | Labels | Description | +|-------------|------|--------|-------------| +| `tron:block_transaction_count` | Histogram | `miner` | Distribution of transaction counts per block | +| `tron:sr_set_change_total` | Counter | `witness`, `change_type` | SR set changes (added/removed) | + +### Empty Blocks via Histogram + +Query empty blocks using the histogram's `le="0.0"` bucket: + +```promql +# Empty blocks count by miner +tron:block_transaction_count_bucket{le="0.0"} + +# Empty block ratio +rate(tron:block_transaction_count_bucket{le="0.0"}[1h]) / rate(tron:block_transaction_count_count[1h]) +``` + +### Histogram Buckets + +`[0, 10, 50, 100, 200, 500, 1000, 2000, 5000, 10000]` + +### Label Values for `tron:sr_set_change_total` + +| Label | Value | Description | +|-------|-------|-------------| +| `change_type` | `added` | A new SR entered the active set | +| `change_type` | `removed` | An existing SR left the active set | +| `witness` | base58 address | The SR address affected | + +## Setup Instructions + +### Enable Prometheus Metrics + +In your node's `config.conf`: + +```hocon +node { + metricsPrometheusEnable = true +} +``` + +Or via CLI flag: + +```bash +java -jar FullNode.jar --metrics-prometheus-enable +``` + +### Prometheus Endpoint + +When enabled, metrics are available at: + +``` +http://localhost:9527/metrics +``` + +### Prometheus Configuration + +Add to your `prometheus.yml`: + +```yaml +scrape_configs: + - job_name: 'tron-node' + static_configs: + - targets: ['localhost:9527'] + metrics_path: '/metrics' +``` + +## PromQL Example Queries + +### Empty Block Rate (per minute) + +```promql +rate(tron:block_transaction_count_bucket{le="0.0"}[1m]) +``` + +### Empty Block Ratio (last hour) + +```promql +rate(tron:block_transaction_count_bucket{le="0.0"}[1h]) / rate(tron:block_transaction_count_count[1h]) +``` + +### Total Empty Blocks by Miner + +```promql +tron:block_transaction_count_bucket{le="0.0"} +``` + +### Transaction Count Distribution + +```promql +# Blocks with 0-10 transactions +tron:block_transaction_count_bucket{le="10"} - tron:block_transaction_count_bucket{le="0"} + +# Average transactions per block +rate(tron:block_transaction_count_sum[5m]) / rate(tron:block_transaction_count_count[5m]) +``` + +### SR Set Changes Over Time + +```promql +rate(tron:sr_set_change_total[5m]) +``` + +### SRs Added vs Removed + +```promql +# Added +sum by (change_type) (tron:sr_set_change_total{change_type="added"}) + +# Removed +sum by (change_type) (tron:sr_set_change_total{change_type="removed"}) +``` + +### Alert: High Empty Block Rate + +```promql +rate(tron:block_transaction_count_bucket{le="0.0"}[5m]) > 10 +``` + +### Alert: SR Set Changed + +```promql +increase(tron:sr_set_change_total[1h]) > 0 +``` + +## Files Modified + +| File | Change | +|------|--------| +| `common/src/main/java/org/tron/common/prometheus/MetricKeys.java` | Removed `BLOCK_EMPTY`, added `BLOCK_TRANSACTION_COUNT` histogram constant | +| `common/src/main/java/org/tron/common/prometheus/MetricsCounter.java` | Removed `BLOCK_EMPTY` counter registration | +| `common/src/main/java/org/tron/common/prometheus/MetricsHistogram.java` | Added overloaded `init()` for custom buckets, registered `BLOCK_TRANSACTION_COUNT` | +| `framework/src/main/java/org/tron/core/metrics/blockchain/BlockChainMetricManager.java` | Replaced counter with `histogramObserve()` for all blocks, kept SR counter | +| `framework/src/test/java/org/tron/core/metrics/prometheus/PrometheusApiServiceTest.java` | Updated tests for histogram bucket queries | + +## Build & Test Commands + +```bash +# Build without tests (fast) +./gradlew clean build -x test + +# Compile modified source +./gradlew :framework:compileJava :common:compileJava + +# Run Prometheus metric tests only +./gradlew :framework:test --tests \ + "org.tron.core.metrics.prometheus.PrometheusApiServiceTest" + +# Run all metrics tests +./gradlew :framework:test --tests "org.tron.core.metrics.*" + +# Full test suite +./gradlew test + +# Coverage report +./gradlew :framework:jacocoTestReport +# Report: framework/build/reports/jacoco/test/html/index.html +``` + +## Implementation Details + +### Block Transaction Count Histogram + +Records transaction count for **all blocks** (including empty blocks): + +```java +int txCount = block.getTransactions().size(); +Metrics.histogramObserve(MetricKeys.Histogram.BLOCK_TRANSACTION_COUNT, txCount, + StringUtil.encode58Check(address)); +``` + +Benefits over simple counter: +- **Rich insights**: Tracks full distribution of tx counts +- **Flexible queries**: Percentiles, trends, specific ranges +- **Empty block detection**: Via `le="0.0"` bucket + +### SR Set Change Detection + +```java +List currentSrList = + chainBaseManager.getWitnessScheduleStore().getActiveWitnesses(); +Set currentSrSet = currentSrList.stream() + .map(bs -> Hex.toHexString(bs.toByteArray())) + .collect(Collectors.toSet()); + +if (!previousSrSet.isEmpty() && !currentSrSet.equals(previousSrSet)) { + for (String sr : Sets.difference(currentSrSet, previousSrSet)) { + Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1, + sr, MetricLabels.Counter.SR_ADDED); + } + for (String sr : Sets.difference(previousSrSet, currentSrSet)) { + Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1, + sr, MetricLabels.Counter.SR_REMOVED); + } +} +previousSrSet = currentSrSet; +``` + +### Code Style + +- Purely additive — zero protocol changes, zero API changes, zero backward compatibility issues +- Uses existing `Metrics.histogramObserve()` pattern for histogram +- Uses existing `Metrics.counterInc()` pattern for counter +- All constants defined in `MetricKeys.java` (no hardcoded strings) +- Java 8 compatible +- No new Gradle dependencies + +## Why Histogram for Empty Blocks? + +The histogram approach (as suggested by Sunny6889) provides richer insights: + +| Approach | Pros | +|----------|------| +| Counter | Simple, single-purpose | +| **Histogram** | Tracks distribution, enables ratio queries, supports percentiles | + +Example queries enabled by histogram: +- Empty block ratio over any time window +- Transaction distribution patterns +- Block capacity utilization + +## Related Issues + +- [java-tron #6590](https://github.com/tronprotocol/java-tron/issues/6590) — Prometheus metrics for empty blocks and SR changes diff --git a/common/src/main/java/org/tron/common/prometheus/MetricKeys.java b/common/src/main/java/org/tron/common/prometheus/MetricKeys.java index 87ab6fae0a3..60a221fbd42 100644 --- a/common/src/main/java/org/tron/common/prometheus/MetricKeys.java +++ b/common/src/main/java/org/tron/common/prometheus/MetricKeys.java @@ -17,6 +17,7 @@ public static class Counter { public static final String P2P_ERROR = "tron:p2p_error"; public static final String P2P_DISCONNECT = "tron:p2p_disconnect"; public static final String INTERNAL_SERVICE_FAIL = "tron:internal_service_fail"; + public static final String SR_SET_CHANGE = "tron:sr_set_change_total"; private Counter() { throw new IllegalStateException("Counter"); @@ -62,6 +63,7 @@ public static class Histogram { public static final String MESSAGE_PROCESS_LATENCY = "tron:message_process_latency_seconds"; public static final String BLOCK_FETCH_LATENCY = "tron:block_fetch_latency_seconds"; public static final String BLOCK_RECEIVE_DELAY = "tron:block_receive_delay_seconds"; + public static final String BLOCK_TRANSACTION_COUNT = "tron:block_transaction_count"; private Histogram() { throw new IllegalStateException("Histogram"); diff --git a/common/src/main/java/org/tron/common/prometheus/MetricLabels.java b/common/src/main/java/org/tron/common/prometheus/MetricLabels.java index 2aa3c1e3378..353c8302ad4 100644 --- a/common/src/main/java/org/tron/common/prometheus/MetricLabels.java +++ b/common/src/main/java/org/tron/common/prometheus/MetricLabels.java @@ -31,6 +31,9 @@ public static class Counter { public static final String TXS_FAIL_SIG = "sig"; public static final String TXS_FAIL_TAPOS = "tapos"; public static final String TXS_FAIL_DUP = "dup"; + public static final String BLOCK_EMPTY = "empty"; + public static final String SR_ADDED = "added"; + public static final String SR_REMOVED = "removed"; private Counter() { throw new IllegalStateException("Counter"); diff --git a/common/src/main/java/org/tron/common/prometheus/MetricsCounter.java b/common/src/main/java/org/tron/common/prometheus/MetricsCounter.java index 6acdf23b3bc..862baed8371 100644 --- a/common/src/main/java/org/tron/common/prometheus/MetricsCounter.java +++ b/common/src/main/java/org/tron/common/prometheus/MetricsCounter.java @@ -18,6 +18,9 @@ class MetricsCounter { init(MetricKeys.Counter.P2P_DISCONNECT, "tron p2p disconnect .", "type"); init(MetricKeys.Counter.INTERNAL_SERVICE_FAIL, "internal Service fail.", "class", "method"); + init(MetricKeys.Counter.SR_SET_CHANGE, + "Total SR set changes during maintenance periods.", + "witness", "change_type"); } private MetricsCounter() { diff --git a/common/src/main/java/org/tron/common/prometheus/MetricsHistogram.java b/common/src/main/java/org/tron/common/prometheus/MetricsHistogram.java index 556db10feb5..545ccba0392 100644 --- a/common/src/main/java/org/tron/common/prometheus/MetricsHistogram.java +++ b/common/src/main/java/org/tron/common/prometheus/MetricsHistogram.java @@ -48,6 +48,10 @@ public class MetricsHistogram { init(MetricKeys.Histogram.BLOCK_FETCH_LATENCY, "fetch block latency."); init(MetricKeys.Histogram.BLOCK_RECEIVE_DELAY, "receive block delay time, receiveTime - blockTime."); + init(MetricKeys.Histogram.BLOCK_TRANSACTION_COUNT, + "Distribution of transaction counts per block.", + new double[]{0, 10, 50, 100, 200, 500, 1000, 2000, 5000, 10000}, + "miner"); } private MetricsHistogram() { @@ -62,6 +66,17 @@ private static void init(String name, String help, String... labels) { .register()); } + private static void init(String name, String help, double[] buckets, String... labels) { + Histogram.Builder builder = Histogram.build() + .name(name) + .help(help) + .labelNames(labels); + if (buckets != null && buckets.length > 0) { + builder.buckets(buckets); + } + container.put(name, builder.register()); + } + static Histogram.Timer startTimer(String key, String... labels) { if (Metrics.enabled()) { Histogram histogram = container.get(key); diff --git a/framework/src/main/java/org/tron/core/metrics/blockchain/BlockChainMetricManager.java b/framework/src/main/java/org/tron/core/metrics/blockchain/BlockChainMetricManager.java index 384f1d8add1..9624495208a 100644 --- a/framework/src/main/java/org/tron/core/metrics/blockchain/BlockChainMetricManager.java +++ b/framework/src/main/java/org/tron/core/metrics/blockchain/BlockChainMetricManager.java @@ -1,9 +1,13 @@ package org.tron.core.metrics.blockchain; import com.codahale.metrics.Counter; +import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import java.util.Map; import java.util.SortedMap; import java.util.concurrent.ConcurrentHashMap; @@ -42,6 +46,7 @@ public class BlockChainMetricManager { private long failProcessBlockNum = 0; @Setter private String failProcessBlockReason = ""; + private Set previousSrSet = new HashSet<>(); public BlockChainInfo getBlockChainInfo() { BlockChainInfo blockChainInfo = new BlockChainInfo(); @@ -169,6 +174,30 @@ public void applyBlock(BlockCapsule block) { Metrics.counterInc(MetricKeys.Counter.TXS, block.getTransactions().size(), MetricLabels.Counter.TXS_SUCCESS, MetricLabels.Counter.TXS_SUCCESS); } + + // Record transaction count distribution for all blocks (including empty blocks) + int txCount = block.getTransactions().size(); + Metrics.histogramObserve(MetricKeys.Histogram.BLOCK_TRANSACTION_COUNT, txCount, + StringUtil.encode58Check(address)); + + // SR set change detection + List currentSrList = + chainBaseManager.getWitnessScheduleStore().getActiveWitnesses(); + Set currentSrSet = currentSrList.stream() + .map(bs -> Hex.toHexString(bs.toByteArray())) + .collect(Collectors.toSet()); + + if (!previousSrSet.isEmpty() && !currentSrSet.equals(previousSrSet)) { + for (String sr : Sets.difference(currentSrSet, previousSrSet)) { + Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1, + sr, MetricLabels.Counter.SR_ADDED); + } + for (String sr : Sets.difference(previousSrSet, currentSrSet)) { + Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1, + sr, MetricLabels.Counter.SR_REMOVED); + } + } + previousSrSet = currentSrSet; } private List getSrList() { diff --git a/framework/src/test/java/org/tron/core/metrics/prometheus/PrometheusApiServiceTest.java b/framework/src/test/java/org/tron/core/metrics/prometheus/PrometheusApiServiceTest.java index d4d758b7a98..96c79278b96 100644 --- a/framework/src/test/java/org/tron/core/metrics/prometheus/PrometheusApiServiceTest.java +++ b/framework/src/test/java/org/tron/core/metrics/prometheus/PrometheusApiServiceTest.java @@ -22,12 +22,14 @@ import org.tron.common.parameter.CommonParameter; import org.tron.common.prometheus.MetricLabels; import org.tron.common.prometheus.Metrics; +import org.bouncycastle.util.encoders.Hex; import org.tron.common.utils.ByteArray; import org.tron.common.utils.PublicMethod; import org.tron.common.utils.Sha256Hash; import org.tron.common.utils.Utils; import org.tron.consensus.dpos.DposSlot; import org.tron.core.ChainBaseManager; +import org.tron.core.metrics.blockchain.BlockChainMetricManager; import org.tron.core.capsule.AccountCapsule; import org.tron.core.capsule.BlockCapsule; import org.tron.core.capsule.WitnessCapsule; @@ -53,6 +55,8 @@ public class PrometheusApiServiceTest extends BaseTest { private ConsensusService consensusService; @Resource private ChainBaseManager chainManager; + @Resource + private BlockChainMetricManager blockChainMetricManager; static { Args.setParam(new String[] {"-d", dbPath()}, TestConstants.TEST_CONF); @@ -170,4 +174,83 @@ private BlockCapsule createTestBlockCapsule(long time, return blockCapsule; } + @Test + public void testEmptyBlockMetric() throws Exception { + ECKey ecKey = ECKey.fromPrivate(privateKey); + String minerBase58 = org.tron.common.utils.StringUtil.encode58Check(ecKey.getAddress()); + ByteString witnessAddress = ByteString.copyFrom(ecKey.getAddress()); + + chainBaseManager.getWitnessScheduleStore().saveActiveWitnesses(new ArrayList<>()); + chainBaseManager.addWitness(witnessAddress); + + BlockCapsule block = new BlockCapsule( + chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderNumber() + 1, + Sha256Hash.wrap(chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderHash().getByteString()), + System.currentTimeMillis(), + witnessAddress); + block.generatedByMyself = true; + block.setMerkleRoot(); + block.sign(privateKey); + + Double beforeValue = CollectorRegistry.defaultRegistry.getSampleValue( + "tron:block_transaction_count_bucket", + new String[]{"miner", "le"}, + new String[]{minerBase58, "0.0"}); + double before = beforeValue == null ? 0.0 : beforeValue; + + blockChainMetricManager.applyBlock(block); + + Double afterValue = CollectorRegistry.defaultRegistry.getSampleValue( + "tron:block_transaction_count_bucket", + new String[]{"miner", "le"}, + new String[]{minerBase58, "0.0"}); + Assert.assertNotNull("Empty block bucket should exist for miner: " + minerBase58, afterValue); + Assert.assertEquals("Histogram bucket le=0.0 should have incremented by 1", + before + 1.0, afterValue, 0.001); + } + + @Test + public void testSrSetChangeMetric() throws Exception { + ECKey ecKey = ECKey.fromPrivate(privateKey); + ByteString witnessAddress = ByteString.copyFrom(ecKey.getAddress()); + + chainBaseManager.getWitnessScheduleStore().saveActiveWitnesses(new ArrayList<>()); + chainBaseManager.addWitness(witnessAddress); + + BlockCapsule block1 = new BlockCapsule( + chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderNumber() + 1, + Sha256Hash.wrap(chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderHash().getByteString()), + System.currentTimeMillis(), + witnessAddress); + block1.generatedByMyself = true; + block1.setMerkleRoot(); + block1.sign(privateKey); + blockChainMetricManager.applyBlock(block1); + + ECKey newWitnessKey = new ECKey(Utils.getRandom()); + ByteString newWitnessBs = ByteString.copyFrom(newWitnessKey.getAddress()); + chainBaseManager.addWitness(newWitnessBs); + + BlockCapsule block2 = new BlockCapsule( + chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderNumber() + 1, + Sha256Hash.wrap(chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderHash().getByteString()), + System.currentTimeMillis() + 3000, + witnessAddress); + block2.generatedByMyself = true; + block2.setMerkleRoot(); + block2.sign(privateKey); + blockChainMetricManager.applyBlock(block2); + + String newWitnessHex = Hex.toHexString(newWitnessBs.toByteArray()); + Double addedValue = CollectorRegistry.defaultRegistry.getSampleValue( + "tron:sr_set_change_total", + new String[]{"witness", "change_type"}, + new String[]{newWitnessHex, "added"}); + + Assert.assertNotNull( + "tron:sr_set_change_total{witness=...,change_type=added} should exist", + addedValue); + Assert.assertTrue("SR change counter should be >= 1", addedValue >= 1.0); + } + } \ No newline at end of file