From eca4113adf0bcc6121a9bfdc066b66d822d6139c Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:18:47 +0700 Subject: [PATCH 01/68] feat: add ix-tests integration test harness Amp-Thread-ID: https://ampcode.com/threads/T-019db3d7-0da5-76ce-a24b-dc7507211386 Co-authored-by: Amp --- Cargo.lock | 26 ++++++++++++++++++ Cargo.toml | 2 +- ix-tests/Cargo.toml | 26 ++++++++++++++++++ ix-tests/configs/suite.toml | 6 ++++ ix-tests/src/cli.rs | 35 +++++++++++++++++++++++ ix-tests/src/config.rs | 55 +++++++++++++++++++++++++++++++++++++ ix-tests/src/main.rs | 37 +++++++++++++++++++++++++ ix-tests/src/scenario.rs | 33 ++++++++++++++++++++++ 8 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 ix-tests/Cargo.toml create mode 100644 ix-tests/configs/suite.toml create mode 100644 ix-tests/src/cli.rs create mode 100644 ix-tests/src/config.rs create mode 100644 ix-tests/src/main.rs create mode 100644 ix-tests/src/scenario.rs diff --git a/Cargo.lock b/Cargo.lock index f1faffb..d455628 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2206,6 +2206,32 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "ix-tests" +version = "0.1.0" +dependencies = [ + "anyhow", + "bs58", + "futures", + "helius-laserstream", + "reqwest 0.12.28", + "serde", + "serde_json", + "solana-keypair 3.1.2", + "solana-pubkey 4.1.0", + "solana-rpc-client", + "solana-signature 3.4.0", + "solana-signer 3.0.0", + "solana-system-interface 3.2.0", + "solana-transaction 3.1.0", + "tokio", + "tokio-stream", + "toml 0.9.12+spec-1.1.0", + "tonic", + "tracing", + "tracing-subscriber", +] + [[package]] name = "jiff" version = "0.2.23" diff --git a/Cargo.toml b/Cargo.toml index 9e92def..9cafb59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] -members = ["event-proto", "grpc-service", "geyser-plugin"] +members = ["event-proto", "grpc-service", "geyser-plugin", "ix-tests"] resolver = "2" diff --git a/ix-tests/Cargo.toml b/ix-tests/Cargo.toml new file mode 100644 index 0000000..003caa1 --- /dev/null +++ b/ix-tests/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "ix-tests" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0" +bs58 = "0.5" +futures = "0.3" +helius-laserstream = { git = "https://github.com/magicblock-labs/laserstream-sdk", rev = "fe205cb2b85864d1821027d663813d66160285dc" } +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +solana-keypair = "3.1.2" +solana-pubkey = "4.1.0" +solana-rpc-client = { version = "4.0.0-beta.4", default-features = false } +solana-signature = "3.4.0" +solana-signer = "3.0.0" +solana-system-interface = "3.2.0" +solana-transaction = "3.1.0" +tokio = { version = "1.47", features = ["macros", "rt-multi-thread", "process", "sync", "time", "fs", "signal"] } +tokio-stream = "0.1" +toml = "0.9.12" +tonic = { version = "0.12", features = ["transport"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } diff --git a/ix-tests/configs/suite.toml b/ix-tests/configs/suite.toml new file mode 100644 index 0000000..38e7d2b --- /dev/null +++ b/ix-tests/configs/suite.toml @@ -0,0 +1,6 @@ +service_binary = "target/debug/magigblock-grpc-service" +validator_rpc_url = "http://127.0.0.1:8899" +failure_artifact_root = "target/ix-tests/failures" +service_start_timeout_ms = 10000 +checkpoint_timeout_ms = 20000 +transaction_timeout_ms = 20000 diff --git a/ix-tests/src/cli.rs b/ix-tests/src/cli.rs new file mode 100644 index 0000000..c1564ab --- /dev/null +++ b/ix-tests/src/cli.rs @@ -0,0 +1,35 @@ +use std::path::PathBuf; + +use anyhow::{Context, bail}; + +pub struct Cli { + pub config_path: PathBuf, + pub scenario: String, +} + +impl Cli { + pub fn parse() -> anyhow::Result { + let mut cli = Self { + config_path: PathBuf::from("ix-tests/configs/suite.toml"), + scenario: "all".to_owned(), + }; + + let mut args = std::env::args().skip(1); + while let Some(arg) = args.next() { + match arg.as_str() { + "--config" => { + let path = + args.next().context("missing value for --config")?; + cli.config_path = PathBuf::from(path); + } + "--scenario" => { + cli.scenario = + args.next().context("missing value for --scenario")?; + } + _ => bail!("invalid CLI argument: {arg}"), + } + } + + Ok(cli) + } +} diff --git a/ix-tests/src/config.rs b/ix-tests/src/config.rs new file mode 100644 index 0000000..6b35f49 --- /dev/null +++ b/ix-tests/src/config.rs @@ -0,0 +1,55 @@ +use std::path::{Path, PathBuf}; + +use serde::Deserialize; + +#[derive(Clone, Debug)] +pub struct SuiteConfig { + pub service_binary: PathBuf, + pub validator_rpc_url: String, + pub failure_artifact_root: PathBuf, + pub service_start_timeout_ms: u64, + pub checkpoint_timeout_ms: u64, + pub transaction_timeout_ms: u64, +} + +#[derive(Debug, Deserialize)] +struct FileSuiteConfig { + #[serde(default)] + service_binary: Option, + #[serde(default)] + validator_rpc_url: Option, + #[serde(default)] + failure_artifact_root: Option, + #[serde(default)] + service_start_timeout_ms: Option, + #[serde(default)] + checkpoint_timeout_ms: Option, + #[serde(default)] + transaction_timeout_ms: Option, +} + +impl SuiteConfig { + pub fn load(path: &Path) -> anyhow::Result { + let contents = std::fs::read_to_string(path)?; + let file: FileSuiteConfig = toml::from_str(&contents)?; + + Ok(SuiteConfig { + service_binary: file.service_binary.unwrap_or_else(|| { + PathBuf::from("target/debug/magigblock-grpc-service") + }), + validator_rpc_url: file + .validator_rpc_url + .unwrap_or_else(|| "http://127.0.0.1:8899".to_owned()), + failure_artifact_root: file + .failure_artifact_root + .unwrap_or_else(|| PathBuf::from("target/ix-tests/failures")), + service_start_timeout_ms: file + .service_start_timeout_ms + .unwrap_or(10_000), + checkpoint_timeout_ms: file.checkpoint_timeout_ms.unwrap_or(20_000), + transaction_timeout_ms: file + .transaction_timeout_ms + .unwrap_or(20_000), + }) + } +} diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs new file mode 100644 index 0000000..079e49e --- /dev/null +++ b/ix-tests/src/main.rs @@ -0,0 +1,37 @@ +mod cli; +mod config; +mod scenario; + +use tracing::info; + +fn init_tracing() { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "ix_tests=info".into()), + ) + .with_target(false) + .init(); +} + +fn main() -> anyhow::Result<()> { + init_tracing(); + + let cli = cli::Cli::parse()?; + let config = config::SuiteConfig::load(&cli.config_path)?; + let scenario = scenario::ScenarioName::parse(&cli.scenario)?; + + info!( + config_path = %cli.config_path.display(), + scenario = scenario.as_str(), + service_binary = %config.service_binary.display(), + validator_rpc_url = %config.validator_rpc_url, + failure_artifact_root = %config.failure_artifact_root.display(), + service_start_timeout_ms = config.service_start_timeout_ms, + checkpoint_timeout_ms = config.checkpoint_timeout_ms, + transaction_timeout_ms = config.transaction_timeout_ms, + "loaded integration test suite config" + ); + + Ok(()) +} diff --git a/ix-tests/src/scenario.rs b/ix-tests/src/scenario.rs new file mode 100644 index 0000000..4d6504b --- /dev/null +++ b/ix-tests/src/scenario.rs @@ -0,0 +1,33 @@ +use anyhow::bail; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ScenarioName { + All, + SingleBasic, + SingleLoad, + DualConcurrent, + DualRestart, +} + +impl ScenarioName { + pub fn parse(input: &str) -> anyhow::Result { + match input { + "all" => Ok(Self::All), + "single-basic" => Ok(Self::SingleBasic), + "single-load" => Ok(Self::SingleLoad), + "dual-concurrent" => Ok(Self::DualConcurrent), + "dual-restart" => Ok(Self::DualRestart), + _ => bail!("unknown scenario: {input}"), + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::All => "all", + Self::SingleBasic => "single-basic", + Self::SingleLoad => "single-load", + Self::DualConcurrent => "dual-concurrent", + Self::DualRestart => "dual-restart", + } + } +} From 7cb42b68c334f339b4db29d63764d6e91958e942 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:21:00 +0700 Subject: [PATCH 02/68] feat: implement gRPC ping RPC and add test Amp-Thread-ID: https://ampcode.com/threads/T-019db3d7-0da5-76ce-a24b-dc7507211386 Co-authored-by: Amp --- grpc-service/src/grpc_service/service.rs | 28 ++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/grpc-service/src/grpc_service/service.rs b/grpc-service/src/grpc_service/service.rs index 3942b74..6926fb4 100644 --- a/grpc-service/src/grpc_service/service.rs +++ b/grpc-service/src/grpc_service/service.rs @@ -421,9 +421,11 @@ impl< async fn ping( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented("Ping is not supported")) + Ok(Response::new(PongResponse { + count: request.into_inner().count, + })) } async fn get_latest_blockhash( @@ -474,9 +476,13 @@ mod tests { }; use tokio::time::timeout; + use helius_laserstream::grpc::PingRequest; + use helius_laserstream::grpc::geyser_server::Geyser; + use tonic::Request; + use super::{ - FilterOp, bootstrap_new_pubkeys_impl, parse_accounts_filter, - parse_filter_op, parse_pubkey_list, + FilterOp, GrpcSubscriptionService, bootstrap_new_pubkeys_impl, + parse_accounts_filter, parse_filter_op, parse_pubkey_list, }; use crate::domain::{AccountState, PubkeyFilter, bytes_to_base58}; use crate::errors::{GeykagError, GeykagResult}; @@ -925,4 +931,18 @@ mod tests { [pubkey_b58(1), pubkey_b58(2)].into_iter().collect() ); } + + #[tokio::test] + async fn test_ping_returns_ok() { + let dispatcher = DispatcherHandle::spawn(8, 8); + let snapshot_store = MockSnapshotStore::new(HashMap::new()); + let validator = MockValidatorSubscriptions::succeed(); + + let service = + GrpcSubscriptionService::new(dispatcher, snapshot_store, validator); + + let response = + service.ping(Request::new(PingRequest { count: 0 })).await; + assert!(response.is_ok()); + } } From d7dca98f3a92e4ceb1bfa6cc2fae75b87046eb1f Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:23:45 +0700 Subject: [PATCH 03/68] feat: add scenario layout, accounts, artifacts, and runner Amp-Thread-ID: https://ampcode.com/threads/T-019db3d7-0da5-76ce-a24b-dc7507211386 Co-authored-by: Amp --- ix-tests/src/accounts.rs | 233 ++++++++++++++++++++++++++++++++++++++ ix-tests/src/artifacts.rs | 101 +++++++++++++++++ ix-tests/src/layout.rs | 34 ++++++ ix-tests/src/main.rs | 21 +++- ix-tests/src/runner.rs | 13 +++ 5 files changed, 400 insertions(+), 2 deletions(-) create mode 100644 ix-tests/src/accounts.rs create mode 100644 ix-tests/src/artifacts.rs create mode 100644 ix-tests/src/layout.rs create mode 100644 ix-tests/src/runner.rs diff --git a/ix-tests/src/accounts.rs b/ix-tests/src/accounts.rs new file mode 100644 index 0000000..3c3af30 --- /dev/null +++ b/ix-tests/src/accounts.rs @@ -0,0 +1,233 @@ +use crate::scenario::ScenarioName; + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum NamedAccount { + SimpleA, + SimpleB, + SimpleC, + SimpleD, + SharedA, + SharedB, + RestartA, + RestartB, + OwnerData, + Hot00, + Hot01, + Hot02, + Hot03, + Hot04, + Hot05, + Hot06, + Hot07, + Hot08, + Hot09, +} + +impl NamedAccount { + fn all() -> &'static [NamedAccount] { + &[ + Self::SimpleA, + Self::SimpleB, + Self::SimpleC, + Self::SimpleD, + Self::SharedA, + Self::SharedB, + Self::RestartA, + Self::RestartB, + Self::OwnerData, + Self::Hot00, + Self::Hot01, + Self::Hot02, + Self::Hot03, + Self::Hot04, + Self::Hot05, + Self::Hot06, + Self::Hot07, + Self::Hot08, + Self::Hot09, + ] + } + + fn index(self) -> usize { + match self { + Self::SimpleA => 0, + Self::SimpleB => 1, + Self::SimpleC => 2, + Self::SimpleD => 3, + Self::SharedA => 4, + Self::SharedB => 5, + Self::RestartA => 6, + Self::RestartB => 7, + Self::OwnerData => 8, + Self::Hot00 => 9, + Self::Hot01 => 10, + Self::Hot02 => 11, + Self::Hot03 => 12, + Self::Hot04 => 13, + Self::Hot05 => 14, + Self::Hot06 => 15, + Self::Hot07 => 16, + Self::Hot08 => 17, + Self::Hot09 => 18, + } + } +} + +const ACCOUNT_COUNT: usize = 19; + +/// Base seeds — one unique 32-byte array per named account. +/// Each scenario derives its own seeds by XOR-ing with a per-scenario byte. +const BASE_SEEDS: [[u8; 32]; ACCOUNT_COUNT] = [ + // SimpleA + [ + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + ], + // SimpleB + [ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, + 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + ], + // SimpleC + [ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, + 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + ], + // SimpleD + [ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, + 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + ], + // SharedA + [ + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, + 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + ], + // SharedB + [ + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, + 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + ], + // RestartA + [ + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, + 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + ], + // RestartB + [ + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, + 0xFC, 0xFD, 0xFE, 0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, + ], + // OwnerData + [ + 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, + 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, + 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, + ], + // Hot00 + [ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, + ], + // Hot01 + [ + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, + 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, + ], + // Hot02 + [ + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, + 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, + ], + // Hot03 + [ + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, + 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, + ], + // Hot04 + [ + 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, + 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, + ], + // Hot05 + [ + 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, + 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, + 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, + ], + // Hot06 + [ + 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, + 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, + 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x00, + ], + // Hot07 + [ + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, + ], + // Hot08 + [ + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, + 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, + ], + // Hot09 + [ + 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, + 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, + ], +]; + +fn scenario_xor_byte(scenario: ScenarioName) -> u8 { + match scenario { + ScenarioName::All => unreachable!("All is expanded before accounts"), + ScenarioName::SingleBasic => 0xAA, + ScenarioName::SingleLoad => 0xBB, + ScenarioName::DualConcurrent => 0xCC, + ScenarioName::DualRestart => 0xDD, + } +} + +fn derive_seed(scenario: ScenarioName, account: NamedAccount) -> [u8; 32] { + let base = BASE_SEEDS[account.index()]; + let xor = scenario_xor_byte(scenario); + let mut seed = [0u8; 32]; + for i in 0..32 { + seed[i] = base[i] ^ xor; + } + seed +} + +pub struct ScenarioAccounts { + seeds: [[u8; 32]; ACCOUNT_COUNT], +} + +impl ScenarioAccounts { + pub fn for_scenario(name: ScenarioName) -> Self { + let mut seeds = [[0u8; 32]; ACCOUNT_COUNT]; + for account in NamedAccount::all() { + seeds[account.index()] = derive_seed(name, *account); + } + Self { seeds } + } + + pub fn pubkey_b58(&self, account: NamedAccount) -> String { + bs58::encode(&self.seeds[account.index()]).into_string() + } +} diff --git a/ix-tests/src/artifacts.rs b/ix-tests/src/artifacts.rs new file mode 100644 index 0000000..1db9ef4 --- /dev/null +++ b/ix-tests/src/artifacts.rs @@ -0,0 +1,101 @@ +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use anyhow::Context; + +use crate::config::SuiteConfig; +use crate::layout::ServiceInstance; +use crate::scenario::ScenarioName; + +#[allow(dead_code)] +pub struct ServiceLogPaths { + pub stdout: PathBuf, + pub stderr: PathBuf, +} + +#[allow(dead_code)] +pub struct RunArtifacts { + run_dir: PathBuf, + failure_root: PathBuf, + persist_on_failure: bool, +} + +impl RunArtifacts { + pub fn new( + config: &SuiteConfig, + scenario: ScenarioName, + ) -> anyhow::Result { + let pid = std::process::id(); + let run_dir = PathBuf::from(format!( + "target/ix-tests/tmp/{}-{}", + scenario.as_str(), + pid + )); + std::fs::create_dir_all(&run_dir).with_context(|| { + format!("failed to create run dir: {}", run_dir.display()) + })?; + + Ok(Self { + run_dir, + failure_root: config.failure_artifact_root.clone(), + persist_on_failure: true, + }) + } + + #[allow(dead_code)] + pub fn service_logs(&self, instance: ServiceInstance) -> ServiceLogPaths { + let label = match instance { + ServiceInstance::One => "service-1", + ServiceInstance::Two => "service-2", + }; + ServiceLogPaths { + stdout: self.run_dir.join(format!("{label}.stdout.log")), + stderr: self.run_dir.join(format!("{label}.stderr.log")), + } + } + + #[allow(dead_code)] + pub fn client_updates_path(&self, scenario: ScenarioName) -> PathBuf { + self.run_dir + .join(format!("{}-client-updates.json", scenario.as_str())) + } + + #[allow(dead_code)] + pub fn persist_failure(&self) -> anyhow::Result<()> { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let scenario_name = self + .run_dir + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown"); + let dest = self + .failure_root + .join(format!("{scenario_name}-{timestamp}")); + std::fs::create_dir_all(&self.failure_root).with_context(|| { + format!( + "failed to create failure root: {}", + self.failure_root.display() + ) + })?; + std::fs::rename(&self.run_dir, &dest).with_context(|| { + format!( + "failed to move run dir {} to {}", + self.run_dir.display(), + dest.display() + ) + })?; + Ok(()) + } + + pub fn cleanup_success(&self) -> anyhow::Result<()> { + if self.run_dir.exists() { + std::fs::remove_dir_all(&self.run_dir).with_context(|| { + format!("failed to remove run dir: {}", self.run_dir.display()) + })?; + } + Ok(()) + } +} diff --git a/ix-tests/src/layout.rs b/ix-tests/src/layout.rs new file mode 100644 index 0000000..a733c81 --- /dev/null +++ b/ix-tests/src/layout.rs @@ -0,0 +1,34 @@ +use crate::scenario::ScenarioName; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ServiceInstance { + One, + Two, +} + +pub struct ScenarioLayout { + pub services: Vec, + pub client_count: usize, +} + +impl ScenarioLayout { + pub fn for_scenario(name: ScenarioName) -> Self { + match name { + ScenarioName::SingleBasic => Self { + services: vec![ServiceInstance::One], + client_count: 4, + }, + ScenarioName::SingleLoad => Self { + services: vec![ServiceInstance::One], + client_count: 100, + }, + ScenarioName::DualConcurrent | ScenarioName::DualRestart => Self { + services: vec![ServiceInstance::One, ServiceInstance::Two], + client_count: 20, + }, + ScenarioName::All => { + unreachable!("All is expanded before layout") + } + } + } +} diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 079e49e..1934f8a 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -1,5 +1,11 @@ +#[allow(dead_code)] +mod accounts; +mod artifacts; mod cli; mod config; +#[allow(dead_code)] +mod layout; +mod runner; mod scenario; use tracing::info; @@ -19,11 +25,11 @@ fn main() -> anyhow::Result<()> { let cli = cli::Cli::parse()?; let config = config::SuiteConfig::load(&cli.config_path)?; - let scenario = scenario::ScenarioName::parse(&cli.scenario)?; + let requested = scenario::ScenarioName::parse(&cli.scenario)?; info!( config_path = %cli.config_path.display(), - scenario = scenario.as_str(), + scenario = requested.as_str(), service_binary = %config.service_binary.display(), validator_rpc_url = %config.validator_rpc_url, failure_artifact_root = %config.failure_artifact_root.display(), @@ -33,5 +39,16 @@ fn main() -> anyhow::Result<()> { "loaded integration test suite config" ); + let scenarios = runner::ordered_scenarios(requested); + let names: Vec<&str> = scenarios.iter().map(|s| s.as_str()).collect(); + info!(scenarios = ?names, "resolved scenario execution order"); + + for scenario in &scenarios { + info!(scenario = scenario.as_str(), "running scenario"); + let artifacts = artifacts::RunArtifacts::new(&config, *scenario)?; + artifacts.cleanup_success()?; + info!(scenario = scenario.as_str(), "scenario passed"); + } + Ok(()) } diff --git a/ix-tests/src/runner.rs b/ix-tests/src/runner.rs new file mode 100644 index 0000000..0ed4864 --- /dev/null +++ b/ix-tests/src/runner.rs @@ -0,0 +1,13 @@ +use crate::scenario::ScenarioName; + +pub fn ordered_scenarios(requested: ScenarioName) -> Vec { + match requested { + ScenarioName::All => vec![ + ScenarioName::SingleBasic, + ScenarioName::SingleLoad, + ScenarioName::DualConcurrent, + ScenarioName::DualRestart, + ], + concrete => vec![concrete], + } +} From 77ac4a8177d848d8a60478e085d5517858fca266 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:24:28 +0700 Subject: [PATCH 04/68] chore: add ix-tests grpc-service config files Amp-Thread-ID: https://ampcode.com/threads/T-019db3d7-0da5-76ce-a24b-dc7507211386 Co-authored-by: Amp --- ix-tests/configs/grpc-service/service-1.toml | 17 +++++++++++++++++ ix-tests/configs/grpc-service/service-2.toml | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 ix-tests/configs/grpc-service/service-1.toml create mode 100644 ix-tests/configs/grpc-service/service-2.toml diff --git a/ix-tests/configs/grpc-service/service-1.toml b/ix-tests/configs/grpc-service/service-1.toml new file mode 100644 index 0000000..1f10478 --- /dev/null +++ b/ix-tests/configs/grpc-service/service-1.toml @@ -0,0 +1,17 @@ +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" +group_id = "ix-tests-service-1" +auto_offset_reset = "latest" + +[ksql] +url = "http://localhost:8088" +table = "ACCOUNTS" + +[validator] +accounts_filter_url = "http://localhost:3000/filters/accounts" + +[grpc] +bind_host = "0.0.0.0" +port = 51051 +dispatcher_capacity = 4096 diff --git a/ix-tests/configs/grpc-service/service-2.toml b/ix-tests/configs/grpc-service/service-2.toml new file mode 100644 index 0000000..934c23c --- /dev/null +++ b/ix-tests/configs/grpc-service/service-2.toml @@ -0,0 +1,17 @@ +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" +group_id = "ix-tests-service-2" +auto_offset_reset = "latest" + +[ksql] +url = "http://localhost:8088" +table = "ACCOUNTS" + +[validator] +accounts_filter_url = "http://localhost:3000/filters/accounts" + +[grpc] +bind_host = "0.0.0.0" +port = 51052 +dispatcher_capacity = 4096 From 5973b71e9fbf38d9d192f2c9796ed779e1b14722 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:26:56 +0700 Subject: [PATCH 05/68] feat: add service process controller with ping readiness probe Amp-Thread-ID: https://ampcode.com/threads/T-019db3d7-0da5-76ce-a24b-dc7507211386 Co-authored-by: Amp --- ix-tests/src/layout.rs | 3 + ix-tests/src/main.rs | 18 ++++- ix-tests/src/service.rs | 170 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 ix-tests/src/service.rs diff --git a/ix-tests/src/layout.rs b/ix-tests/src/layout.rs index a733c81..26f0df8 100644 --- a/ix-tests/src/layout.rs +++ b/ix-tests/src/layout.rs @@ -1,16 +1,19 @@ use crate::scenario::ScenarioName; #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[allow(dead_code)] pub enum ServiceInstance { One, Two, } +#[allow(dead_code)] pub struct ScenarioLayout { pub services: Vec, pub client_count: usize, } +#[allow(dead_code)] impl ScenarioLayout { pub fn for_scenario(name: ScenarioName) -> Self { match name { diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 1934f8a..b20a097 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -3,13 +3,17 @@ mod accounts; mod artifacts; mod cli; mod config; -#[allow(dead_code)] mod layout; mod runner; mod scenario; +mod service; use tracing::info; +use crate::layout::ServiceInstance; +use crate::scenario::ScenarioName; +use crate::service::{ServiceController, ServiceSpec}; + fn init_tracing() { tracing_subscriber::fmt() .with_env_filter( @@ -20,7 +24,8 @@ fn init_tracing() { .init(); } -fn main() -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { init_tracing(); let cli = cli::Cli::parse()?; @@ -43,9 +48,18 @@ fn main() -> anyhow::Result<()> { let names: Vec<&str> = scenarios.iter().map(|s| s.as_str()).collect(); info!(scenarios = ?names, "resolved scenario execution order"); + let controller = ServiceController::new(&config); + for scenario in &scenarios { info!(scenario = scenario.as_str(), "running scenario"); let artifacts = artifacts::RunArtifacts::new(&config, *scenario)?; + + if *scenario == ScenarioName::SingleBasic { + let spec = ServiceSpec::for_instance(ServiceInstance::One); + let svc = controller.start(&spec, &artifacts).await?; + controller.shutdown(svc).await?; + } + artifacts.cleanup_success()?; info!(scenario = scenario.as_str(), "scenario passed"); } diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs new file mode 100644 index 0000000..82ac789 --- /dev/null +++ b/ix-tests/src/service.rs @@ -0,0 +1,170 @@ +use std::path::PathBuf; +use std::time::Duration; + +use anyhow::{Context, bail}; +use helius_laserstream::grpc::PingRequest; +use helius_laserstream::grpc::geyser_client::GeyserClient; +use tokio::process::Command; +use tracing::{debug, info}; + +use crate::artifacts::RunArtifacts; +use crate::config::SuiteConfig; +use crate::layout::ServiceInstance; + +#[allow(dead_code)] +pub struct ManagedService { + pub instance: ServiceInstance, + pub endpoint: String, + child: tokio::process::Child, +} + +pub struct ServiceController { + service_binary: PathBuf, + service_start_timeout: Duration, +} + +pub struct ServiceSpec { + pub instance: ServiceInstance, + pub config_path: PathBuf, + pub endpoint: String, +} + +impl ServiceSpec { + pub fn for_instance(instance: ServiceInstance) -> Self { + match instance { + ServiceInstance::One => Self { + instance, + config_path: PathBuf::from( + "ix-tests/configs/grpc-service/service-1.toml", + ), + endpoint: "http://127.0.0.1:51051".to_owned(), + }, + ServiceInstance::Two => Self { + instance, + config_path: PathBuf::from( + "ix-tests/configs/grpc-service/service-2.toml", + ), + endpoint: "http://127.0.0.1:51052".to_owned(), + }, + } + } +} + +impl ServiceController { + pub fn new(config: &SuiteConfig) -> Self { + Self { + service_binary: config.service_binary.clone(), + service_start_timeout: Duration::from_millis( + config.service_start_timeout_ms, + ), + } + } + + pub async fn start( + &self, + spec: &ServiceSpec, + artifacts: &RunArtifacts, + ) -> anyhow::Result { + let log_paths = artifacts.service_logs(spec.instance); + + let stdout_file = std::fs::File::create(&log_paths.stdout) + .with_context(|| { + format!( + "failed to create stdout log: {}", + log_paths.stdout.display() + ) + })?; + let stderr_file = std::fs::File::create(&log_paths.stderr) + .with_context(|| { + format!( + "failed to create stderr log: {}", + log_paths.stderr.display() + ) + })?; + + info!( + binary = %self.service_binary.display(), + config = %spec.config_path.display(), + endpoint = %spec.endpoint, + "starting grpc-service" + ); + + let child = Command::new(&self.service_binary) + .arg("--config") + .arg(&spec.config_path) + .stdout(stdout_file) + .stderr(stderr_file) + .kill_on_drop(true) + .spawn() + .with_context(|| { + format!( + "failed to spawn service binary: {}", + self.service_binary.display() + ) + })?; + + let managed = ManagedService { + instance: spec.instance, + endpoint: spec.endpoint.clone(), + child, + }; + + self.wait_until_ready(&spec.endpoint, &log_paths).await?; + + Ok(managed) + } + + pub async fn shutdown( + &self, + mut service: ManagedService, + ) -> anyhow::Result<()> { + info!( + endpoint = %service.endpoint, + "shutting down grpc-service" + ); + service.child.start_kill().context("failed to send kill")?; + let status = service + .child + .wait() + .await + .context("failed to wait for child")?; + debug!( + endpoint = %service.endpoint, + status = %status, + "grpc-service exited" + ); + Ok(()) + } + + async fn wait_until_ready( + &self, + endpoint: &str, + log_paths: &crate::artifacts::ServiceLogPaths, + ) -> anyhow::Result<()> { + let deadline = tokio::time::Instant::now() + self.service_start_timeout; + + loop { + if let Ok(mut client) = + GeyserClient::connect(endpoint.to_owned()).await + && client.ping(PingRequest { count: 1 }).await.is_ok() + { + info!(endpoint, "grpc-service is ready"); + return Ok(()); + } + + if tokio::time::Instant::now() >= deadline { + bail!( + "grpc-service at {} did not become ready within {:?}\n\ + stdout: {}\n\ + stderr: {}", + endpoint, + self.service_start_timeout, + log_paths.stdout.display(), + log_paths.stderr.display(), + ); + } + + tokio::time::sleep(Duration::from_millis(200)).await; + } + } +} From 38391bd836527c5ba1c799f0f547972eff75c02a Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:29:08 +0700 Subject: [PATCH 06/68] feat: add gRPC test client and observation log Amp-Thread-ID: https://ampcode.com/threads/T-019db3df-499e-72bd-9be0-df089c30106d Co-authored-by: Amp --- ix-tests/src/client.rs | 179 ++++++++++++++++++++++++++++++++++++ ix-tests/src/main.rs | 4 + ix-tests/src/observation.rs | 51 ++++++++++ 3 files changed, 234 insertions(+) create mode 100644 ix-tests/src/client.rs create mode 100644 ix-tests/src/observation.rs diff --git a/ix-tests/src/client.rs b/ix-tests/src/client.rs new file mode 100644 index 0000000..9aae1b6 --- /dev/null +++ b/ix-tests/src/client.rs @@ -0,0 +1,179 @@ +use std::collections::HashMap; +use std::time::{SystemTime, UNIX_EPOCH}; + +use anyhow::Context; +use helius_laserstream::grpc::geyser_client::GeyserClient; +use helius_laserstream::grpc::subscribe_update::UpdateOneof; +use helius_laserstream::grpc::{ + SubscribeRequest, SubscribeRequestFilterAccounts, +}; +use tokio::sync::mpsc; +use tokio_stream::StreamExt; +use tokio_stream::wrappers::ReceiverStream; + +use crate::layout::ServiceInstance; +use crate::observation::{ClientLog, ObservedUpdate}; + +#[allow(dead_code)] +pub struct TestGrpcClient { + pub id: usize, + pub service: ServiceInstance, + pub endpoint: String, + log: ClientLog, + request_tx: mpsc::Sender, + receive_task: tokio::task::JoinHandle>, +} + +#[allow(dead_code)] +impl TestGrpcClient { + pub async fn connect( + id: usize, + service: ServiceInstance, + endpoint: String, + ) -> anyhow::Result { + let mut client = + GeyserClient::connect(endpoint.clone()).await.with_context( + || format!("client {id}: failed to connect to {endpoint}"), + )?; + + let (tx, rx) = mpsc::channel::(16); + + tx.send(SubscribeRequest::default()) + .await + .context("failed to send initial empty subscribe request")?; + + let response = client + .subscribe(ReceiverStream::new(rx)) + .await + .with_context(|| format!("client {id}: subscribe call failed"))?; + let mut update_stream = response.into_inner(); + + let log = ClientLog::new(); + let log_clone = log.clone(); + + let receive_task = tokio::spawn(async move { + while let Some(item) = update_stream.next().await { + match item { + Ok(update) => { + if let Some(UpdateOneof::Account(account_update)) = + update.update_oneof + && let Some(info) = account_update.account + { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + + let observed = ObservedUpdate { + client_id: id, + service, + pubkey_b58: bs58::encode(&info.pubkey) + .into_string(), + slot: account_update.slot, + lamports: info.lamports, + owner_b58: bs58::encode(&info.owner) + .into_string(), + executable: info.executable, + rent_epoch: info.rent_epoch, + write_version: info.write_version, + txn_signature_b58: info + .txn_signature + .as_ref() + .map(|b| bs58::encode(b).into_string()), + data: info.data, + received_at_millis: now, + }; + + log_clone.push(observed); + } + } + Err(status) => { + tracing::warn!( + client_id = id, + %status, + "stream error" + ); + break; + } + } + } + Ok(()) + }); + + Ok(Self { + id, + service, + endpoint, + log, + request_tx: tx, + receive_task, + }) + } + + pub async fn replace_subscription( + &self, + pubkeys: &[String], + ) -> anyhow::Result<()> { + let req = SubscribeRequest { + accounts: HashMap::from([( + "replace".to_owned(), + SubscribeRequestFilterAccounts { + account: pubkeys.to_vec(), + ..Default::default() + }, + )]), + ..Default::default() + }; + self.request_tx + .send(req) + .await + .context("failed to send replace subscription request") + } + + pub async fn patch_subscription( + &self, + add: &[String], + remove: &[String], + ) -> anyhow::Result<()> { + let mut accounts = HashMap::new(); + if !add.is_empty() { + accounts.insert( + "add".to_owned(), + SubscribeRequestFilterAccounts { + account: add.to_vec(), + ..Default::default() + }, + ); + } + if !remove.is_empty() { + accounts.insert( + "remove".to_owned(), + SubscribeRequestFilterAccounts { + account: remove.to_vec(), + ..Default::default() + }, + ); + } + let req = SubscribeRequest { + accounts, + ..Default::default() + }; + self.request_tx + .send(req) + .await + .context("failed to send patch subscription request") + } + + pub fn log(&self) -> &ClientLog { + &self.log + } + + pub async fn shutdown(self) -> anyhow::Result<()> { + drop(self.request_tx); + match self.receive_task.await { + Ok(result) => result, + Err(e) if e.is_cancelled() => Ok(()), + Err(e) => Err(e.into()), + } + } +} diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index b20a097..4bad623 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -2,8 +2,12 @@ mod accounts; mod artifacts; mod cli; +#[allow(dead_code)] +mod client; mod config; mod layout; +#[allow(dead_code)] +mod observation; mod runner; mod scenario; mod service; diff --git a/ix-tests/src/observation.rs b/ix-tests/src/observation.rs new file mode 100644 index 0000000..5da691b --- /dev/null +++ b/ix-tests/src/observation.rs @@ -0,0 +1,51 @@ +use std::sync::{Arc, Mutex}; + +use crate::layout::ServiceInstance; + +#[allow(dead_code)] +#[derive(Clone, Debug)] +pub struct ObservedUpdate { + pub client_id: usize, + pub service: ServiceInstance, + pub pubkey_b58: String, + pub slot: u64, + pub lamports: u64, + pub owner_b58: String, + pub executable: bool, + pub rent_epoch: u64, + pub write_version: u64, + pub txn_signature_b58: Option, + pub data: Vec, + pub received_at_millis: u128, +} + +#[derive(Clone)] +pub struct ClientLog { + entries: Arc>>, +} + +#[allow(dead_code)] +impl ClientLog { + pub fn new() -> Self { + Self { + entries: Arc::new(Mutex::new(Vec::new())), + } + } + + pub fn push(&self, update: ObservedUpdate) { + self.entries.lock().unwrap().push(update); + } + + pub fn snapshot(&self) -> Vec { + self.entries.lock().unwrap().clone() + } + + pub fn snapshot_from(&self, start_index: usize) -> Vec { + let guard = self.entries.lock().unwrap(); + guard[start_index..].to_vec() + } + + pub fn len(&self) -> usize { + self.entries.lock().unwrap().len() + } +} From e43b0b03ca520f10d0ebda9fa12f221421e69700 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 13:34:50 +0700 Subject: [PATCH 07/68] feat: add validator driver and keypair-backed accounts Amp-Thread-ID: https://ampcode.com/threads/T-019db3e1-8171-7040-8bbc-a8f061eb1b72 Co-authored-by: Amp --- ix-tests/Cargo.toml | 4 +- ix-tests/src/accounts.rs | 30 ++++++-- ix-tests/src/main.rs | 14 +++- ix-tests/src/validator.rs | 155 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 10 deletions(-) create mode 100644 ix-tests/src/validator.rs diff --git a/ix-tests/Cargo.toml b/ix-tests/Cargo.toml index 003caa1..c1071e3 100644 --- a/ix-tests/Cargo.toml +++ b/ix-tests/Cargo.toml @@ -16,8 +16,8 @@ solana-pubkey = "4.1.0" solana-rpc-client = { version = "4.0.0-beta.4", default-features = false } solana-signature = "3.4.0" solana-signer = "3.0.0" -solana-system-interface = "3.2.0" -solana-transaction = "3.1.0" +solana-system-interface = { version = "3.2.0", features = ["bincode"] } +solana-transaction = { version = "3.1.0", features = ["bincode"] } tokio = { version = "1.47", features = ["macros", "rt-multi-thread", "process", "sync", "time", "fs", "signal"] } tokio-stream = "0.1" toml = "0.9.12" diff --git a/ix-tests/src/accounts.rs b/ix-tests/src/accounts.rs index 3c3af30..73ef63b 100644 --- a/ix-tests/src/accounts.rs +++ b/ix-tests/src/accounts.rs @@ -1,3 +1,7 @@ +use solana_keypair::Keypair; +use solana_pubkey::Pubkey; +use solana_signer::Signer; + use crate::scenario::ScenarioName; #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] @@ -215,19 +219,31 @@ fn derive_seed(scenario: ScenarioName, account: NamedAccount) -> [u8; 32] { } pub struct ScenarioAccounts { - seeds: [[u8; 32]; ACCOUNT_COUNT], + keypairs: Vec, } +#[allow(dead_code)] impl ScenarioAccounts { pub fn for_scenario(name: ScenarioName) -> Self { - let mut seeds = [[0u8; 32]; ACCOUNT_COUNT]; - for account in NamedAccount::all() { - seeds[account.index()] = derive_seed(name, *account); - } - Self { seeds } + let keypairs = NamedAccount::all() + .iter() + .map(|account| { + let seed = derive_seed(name, *account); + Keypair::new_from_array(seed) + }) + .collect(); + Self { keypairs } + } + + pub fn keypair(&self, account: NamedAccount) -> &Keypair { + &self.keypairs[account.index()] + } + + pub fn pubkey(&self, account: NamedAccount) -> Pubkey { + self.keypairs[account.index()].pubkey() } pub fn pubkey_b58(&self, account: NamedAccount) -> String { - bs58::encode(&self.seeds[account.index()]).into_string() + self.pubkey(account).to_string() } } diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 4bad623..97211c9 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -1,4 +1,3 @@ -#[allow(dead_code)] mod accounts; mod artifacts; mod cli; @@ -11,12 +10,16 @@ mod observation; mod runner; mod scenario; mod service; +#[allow(dead_code)] +mod validator; use tracing::info; +use crate::accounts::{NamedAccount, ScenarioAccounts}; use crate::layout::ServiceInstance; use crate::scenario::ScenarioName; use crate::service::{ServiceController, ServiceSpec}; +use crate::validator::ValidatorDriver; fn init_tracing() { tracing_subscriber::fmt() @@ -61,6 +64,15 @@ async fn main() -> anyhow::Result<()> { if *scenario == ScenarioName::SingleBasic { let spec = ServiceSpec::for_instance(ServiceInstance::One); let svc = controller.start(&spec, &artifacts).await?; + + let accounts = + ScenarioAccounts::for_scenario(ScenarioName::SingleBasic); + let validator = ValidatorDriver::new(&config); + validator.fund_payer().await?; + validator + .airdrop(&accounts.pubkey(NamedAccount::SimpleA), 1_000_000) + .await?; + controller.shutdown(svc).await?; } diff --git a/ix-tests/src/validator.rs b/ix-tests/src/validator.rs new file mode 100644 index 0000000..d25def3 --- /dev/null +++ b/ix-tests/src/validator.rs @@ -0,0 +1,155 @@ +use std::time::Duration; + +use anyhow::Context; +use solana_keypair::Keypair; +use solana_pubkey::Pubkey; +use solana_rpc_client::nonblocking::rpc_client::RpcClient; +use solana_signer::Signer; +use solana_system_interface::instruction as system_instruction; +use solana_transaction::Transaction; +use tracing::info; + +use crate::config::SuiteConfig; + +pub struct ValidatorDriver { + rpc: RpcClient, + payer: Keypair, + transaction_timeout: Duration, +} + +impl ValidatorDriver { + pub fn new(config: &SuiteConfig) -> Self { + let rpc = RpcClient::new(config.validator_rpc_url.clone()); + let payer = Keypair::new(); + let transaction_timeout = + Duration::from_millis(config.transaction_timeout_ms); + Self { + rpc, + payer, + transaction_timeout, + } + } + + pub async fn fund_payer(&self) -> anyhow::Result<()> { + let lamports = 10_000_000_000; // 10 SOL + let sig = self + .rpc + .request_airdrop(&self.payer.pubkey(), lamports) + .await + .context("fund_payer: request_airdrop failed")?; + self.confirm_signature(&sig).await?; + info!( + payer = %self.payer.pubkey(), + lamports, + "funded payer" + ); + Ok(()) + } + + pub async fn airdrop( + &self, + pubkey: &Pubkey, + lamports: u64, + ) -> anyhow::Result { + let sig = self + .rpc + .request_airdrop(pubkey, lamports) + .await + .with_context(|| { + format!("airdrop to {pubkey} of {lamports} failed") + })?; + self.confirm_signature(&sig).await?; + info!(%pubkey, lamports, %sig, "airdrop confirmed"); + Ok(sig.to_string()) + } + + pub async fn transfer( + &self, + from: &Keypair, + to: &Pubkey, + lamports: u64, + ) -> anyhow::Result { + let ix = system_instruction::transfer(&from.pubkey(), to, lamports); + let blockhash = self + .rpc + .get_latest_blockhash() + .await + .context("transfer: get_latest_blockhash failed")?; + let mut tx = Transaction::new_with_payer(&[ix], Some(&from.pubkey())); + tx.sign(&[from], blockhash); + let sig = self + .rpc + .send_and_confirm_transaction(&tx) + .await + .with_context(|| { + format!( + "transfer {} lamports from {} to {} failed", + lamports, + from.pubkey(), + to + ) + })?; + info!(from = %from.pubkey(), %to, lamports, %sig, "transfer confirmed"); + Ok(sig.to_string()) + } + + pub async fn allocate_and_assign( + &self, + target: &Keypair, + space: u64, + new_owner: Pubkey, + ) -> anyhow::Result { + let alloc_ix = system_instruction::allocate(&target.pubkey(), space); + let assign_ix = + system_instruction::assign(&target.pubkey(), &new_owner); + let blockhash = self + .rpc + .get_latest_blockhash() + .await + .context("allocate_and_assign: get_latest_blockhash failed")?; + let mut tx = Transaction::new_with_payer( + &[alloc_ix, assign_ix], + Some(&self.payer.pubkey()), + ); + tx.sign(&[&self.payer, target], blockhash); + let sig = self + .rpc + .send_and_confirm_transaction(&tx) + .await + .with_context(|| { + format!( + "allocate_and_assign for {} (space={}, owner={}) failed", + target.pubkey(), + space, + new_owner + ) + })?; + info!( + target = %target.pubkey(), + space, + new_owner = %new_owner, + %sig, + "allocate_and_assign confirmed" + ); + Ok(sig.to_string()) + } + + async fn confirm_signature( + &self, + sig: &solana_signature::Signature, + ) -> anyhow::Result<()> { + let deadline = tokio::time::Instant::now() + self.transaction_timeout; + loop { + if self.rpc.confirm_transaction(sig).await.unwrap_or(false) { + return Ok(()); + } + if tokio::time::Instant::now() >= deadline { + anyhow::bail!( + "transaction {sig} not confirmed within {:?}", + self.transaction_timeout + ); + } + tokio::time::sleep(Duration::from_millis(200)).await; + } + } +} From c088b3396628bc447214f38668e53dfe0d658655 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:18:02 +0700 Subject: [PATCH 08/68] feat: add ix checkpoint runner --- ix-tests/src/expectation.rs | 220 ++++++++++++++++++++++++++++++++++++ ix-tests/src/main.rs | 14 +++ 2 files changed, 234 insertions(+) create mode 100644 ix-tests/src/expectation.rs diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs new file mode 100644 index 0000000..fabb865 --- /dev/null +++ b/ix-tests/src/expectation.rs @@ -0,0 +1,220 @@ +use std::time::{Duration, Instant}; + +use anyhow::{Context, bail}; +use tokio::time::sleep; + +use crate::client::TestGrpcClient; +use crate::config::SuiteConfig; +use crate::observation::ObservedUpdate; + +#[derive(Clone, Debug, Default)] +#[allow(dead_code)] +pub struct ExpectedUpdate { + pub pubkey_b58: Option, + pub slot: Option, + pub lamports: Option, + pub owner_b58: Option, + pub executable: Option, + pub rent_epoch: Option, + pub write_version: Option, + pub txn_signature_b58: Option>, + pub data: Option>, +} + +#[derive(Clone, Debug, Default)] +#[allow(dead_code)] +pub struct ClientCheckpoint { + pub client_id: usize, + pub allowed: Vec, + pub required: Vec, +} + +#[derive(Clone, Debug, Default)] +#[allow(dead_code)] +pub struct CheckpointSpec { + pub name: &'static str, + pub clients: Vec, +} + +#[derive(Clone, Debug, Default)] +#[allow(dead_code)] +pub struct ClientCursor { + pub client_id: usize, + pub next_index: usize, +} + +#[derive(Clone, Debug)] +#[allow(dead_code)] +pub struct CheckpointRunner { + timeout: Duration, +} + +#[allow(dead_code)] +impl ExpectedUpdate { + pub fn matches(&self, observed: &ObservedUpdate) -> bool { + self.pubkey_b58 + .as_ref() + .is_none_or(|expected| observed.pubkey_b58 == *expected) + && self.slot.is_none_or(|expected| observed.slot == expected) + && self + .lamports + .is_none_or(|expected| observed.lamports == expected) + && self + .owner_b58 + .as_ref() + .is_none_or(|expected| observed.owner_b58 == *expected) + && self + .executable + .is_none_or(|expected| observed.executable == expected) + && self + .rent_epoch + .is_none_or(|expected| observed.rent_epoch == expected) + && self + .write_version + .is_none_or(|expected| observed.write_version == expected) + && self.txn_signature_b58.as_ref().is_none_or(|expected| { + observed.txn_signature_b58.as_ref() == expected.as_ref() + }) + && self + .data + .as_ref() + .is_none_or(|expected| observed.data == *expected) + } +} + +#[allow(dead_code)] +impl CheckpointRunner { + pub fn new(config: &SuiteConfig) -> Self { + Self { + timeout: Duration::from_millis(config.checkpoint_timeout_ms), + } + } + + pub async fn wait_until_satisfied( + &self, + spec: &CheckpointSpec, + clients: &[TestGrpcClient], + cursors: &mut [ClientCursor], + ) -> anyhow::Result<()> { + let deadline = Instant::now() + self.timeout; + + loop { + let mut all_required_seen = true; + + for client_spec in &spec.clients { + let client = clients + .iter() + .find(|client| client.id == client_spec.client_id) + .with_context(|| { + format!( + "checkpoint '{}' references unknown client {}", + spec.name, client_spec.client_id + ) + })?; + let cursor = cursors + .iter() + .find(|cursor| cursor.client_id == client_spec.client_id) + .with_context(|| { + format!( + "checkpoint '{}' is missing cursor for client {}", + spec.name, client_spec.client_id + ) + })?; + let observed = client.log().snapshot_from(cursor.next_index); + + if let Some(unexpected) = observed.iter().find(|update| { + !client_spec + .allowed + .iter() + .any(|expected| expected.matches(update)) + }) { + bail!( + "checkpoint '{}' failed for client {}: unexpected update for pubkey {}", + spec.name, + client_spec.client_id, + unexpected.pubkey_b58 + ); + } + + let missing_required = + client_spec.required.iter().any(|expected| { + !observed.iter().any(|update| expected.matches(update)) + }); + if missing_required { + all_required_seen = false; + } + } + + if all_required_seen { + for cursor in cursors.iter_mut() { + let client = clients + .iter() + .find(|client| client.id == cursor.client_id) + .with_context(|| { + format!( + "checkpoint '{}' cannot advance missing client {}", + spec.name, cursor.client_id + ) + })?; + cursor.next_index = client.log().len(); + } + return Ok(()); + } + + if Instant::now() >= deadline { + bail!( + "checkpoint '{}' timed out after {:?}", + spec.name, + self.timeout + ); + } + + sleep(Duration::from_millis(50)).await; + } + } +} + +#[cfg(test)] +mod tests { + use crate::layout::ServiceInstance; + use crate::observation::ObservedUpdate; + + use super::ExpectedUpdate; + + fn observed_update() -> ObservedUpdate { + ObservedUpdate { + client_id: 7, + service: ServiceInstance::One, + pubkey_b58: "pubkey".to_owned(), + slot: 42, + lamports: 99, + owner_b58: "owner".to_owned(), + executable: false, + rent_epoch: 5, + write_version: 6, + txn_signature_b58: Some("sig".to_owned()), + data: vec![1, 2, 3], + received_at_millis: 123, + } + } + + #[test] + fn matches_ignores_none_fields() { + let expected = ExpectedUpdate { + lamports: Some(99), + ..Default::default() + }; + + assert!(expected.matches(&observed_update())); + } + + #[test] + fn matches_rejects_mismatched_fields() { + let expected = ExpectedUpdate { + lamports: Some(100), + ..Default::default() + }; + + assert!(!expected.matches(&observed_update())); + } +} diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 97211c9..f29cd57 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -4,6 +4,7 @@ mod cli; #[allow(dead_code)] mod client; mod config; +mod expectation; mod layout; #[allow(dead_code)] mod observation; @@ -16,6 +17,8 @@ mod validator; use tracing::info; use crate::accounts::{NamedAccount, ScenarioAccounts}; +use crate::client::TestGrpcClient; +use crate::expectation::{CheckpointRunner, CheckpointSpec, ClientCursor}; use crate::layout::ServiceInstance; use crate::scenario::ScenarioName; use crate::service::{ServiceController, ServiceSpec}; @@ -73,6 +76,17 @@ async fn main() -> anyhow::Result<()> { .airdrop(&accounts.pubkey(NamedAccount::SimpleA), 1_000_000) .await?; + let checkpoint_runner = CheckpointRunner::new(&config); + let checkpoint = CheckpointSpec { + name: "empty", + clients: Vec::new(), + }; + let clients: Vec = Vec::new(); + let mut cursors: Vec = Vec::new(); + checkpoint_runner + .wait_until_satisfied(&checkpoint, &clients, &mut cursors) + .await?; + controller.shutdown(svc).await?; } From 4b8445c037aeed2186e80b01fc0e571402c8f5ce Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:21:40 +0700 Subject: [PATCH 09/68] feat: add ix single basic scenario --- ix-tests/src/context.rs | 16 +++ ix-tests/src/main.rs | 58 +++----- ix-tests/src/scenarios/mod.rs | 21 +++ ix-tests/src/scenarios/single_basic.rs | 188 +++++++++++++++++++++++++ ix-tests/src/validator.rs | 9 ++ 5 files changed, 257 insertions(+), 35 deletions(-) create mode 100644 ix-tests/src/context.rs create mode 100644 ix-tests/src/scenarios/mod.rs create mode 100644 ix-tests/src/scenarios/single_basic.rs diff --git a/ix-tests/src/context.rs b/ix-tests/src/context.rs new file mode 100644 index 0000000..ca676f6 --- /dev/null +++ b/ix-tests/src/context.rs @@ -0,0 +1,16 @@ +use crate::accounts::ScenarioAccounts; +use crate::artifacts::RunArtifacts; +use crate::config::SuiteConfig; +use crate::expectation::CheckpointRunner; +use crate::service::ServiceController; +use crate::validator::ValidatorDriver; + +#[allow(dead_code)] +pub struct ScenarioContext { + pub suite_config: SuiteConfig, + pub artifacts: RunArtifacts, + pub service_controller: ServiceController, + pub validator: ValidatorDriver, + pub checkpoint_runner: CheckpointRunner, + pub accounts: ScenarioAccounts, +} diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index f29cd57..41ef12a 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -4,24 +4,24 @@ mod cli; #[allow(dead_code)] mod client; mod config; +mod context; mod expectation; mod layout; #[allow(dead_code)] mod observation; mod runner; mod scenario; +mod scenarios; mod service; #[allow(dead_code)] mod validator; use tracing::info; -use crate::accounts::{NamedAccount, ScenarioAccounts}; -use crate::client::TestGrpcClient; -use crate::expectation::{CheckpointRunner, CheckpointSpec, ClientCursor}; -use crate::layout::ServiceInstance; -use crate::scenario::ScenarioName; -use crate::service::{ServiceController, ServiceSpec}; +use crate::accounts::ScenarioAccounts; +use crate::context::ScenarioContext; +use crate::expectation::CheckpointRunner; +use crate::service::ServiceController; use crate::validator::ValidatorDriver; fn init_tracing() { @@ -58,40 +58,28 @@ async fn main() -> anyhow::Result<()> { let names: Vec<&str> = scenarios.iter().map(|s| s.as_str()).collect(); info!(scenarios = ?names, "resolved scenario execution order"); - let controller = ServiceController::new(&config); - for scenario in &scenarios { info!(scenario = scenario.as_str(), "running scenario"); let artifacts = artifacts::RunArtifacts::new(&config, *scenario)?; + let ctx = ScenarioContext { + suite_config: config.clone(), + artifacts, + service_controller: ServiceController::new(&config), + validator: ValidatorDriver::new(&config), + checkpoint_runner: CheckpointRunner::new(&config), + accounts: ScenarioAccounts::for_scenario(*scenario), + }; - if *scenario == ScenarioName::SingleBasic { - let spec = ServiceSpec::for_instance(ServiceInstance::One); - let svc = controller.start(&spec, &artifacts).await?; - - let accounts = - ScenarioAccounts::for_scenario(ScenarioName::SingleBasic); - let validator = ValidatorDriver::new(&config); - validator.fund_payer().await?; - validator - .airdrop(&accounts.pubkey(NamedAccount::SimpleA), 1_000_000) - .await?; - - let checkpoint_runner = CheckpointRunner::new(&config); - let checkpoint = CheckpointSpec { - name: "empty", - clients: Vec::new(), - }; - let clients: Vec = Vec::new(); - let mut cursors: Vec = Vec::new(); - checkpoint_runner - .wait_until_satisfied(&checkpoint, &clients, &mut cursors) - .await?; - - controller.shutdown(svc).await?; + match scenarios::run_scenario(*scenario, &ctx).await { + Ok(()) => { + ctx.artifacts.cleanup_success()?; + info!(scenario = scenario.as_str(), "scenario passed"); + } + Err(error) => { + ctx.artifacts.persist_failure()?; + return Err(error); + } } - - artifacts.cleanup_success()?; - info!(scenario = scenario.as_str(), "scenario passed"); } Ok(()) diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs new file mode 100644 index 0000000..bc341ba --- /dev/null +++ b/ix-tests/src/scenarios/mod.rs @@ -0,0 +1,21 @@ +mod single_basic; + +use anyhow::bail; + +use crate::context::ScenarioContext; +use crate::scenario::ScenarioName; + +pub async fn run_scenario( + name: ScenarioName, + ctx: &ScenarioContext, +) -> anyhow::Result<()> { + match name { + ScenarioName::SingleBasic => single_basic::run(ctx).await, + ScenarioName::SingleLoad + | ScenarioName::DualConcurrent + | ScenarioName::DualRestart => { + bail!("scenario not implemented: {}", name.as_str()) + } + ScenarioName::All => bail!("scenario dispatch does not accept 'all'"), + } +} diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs new file mode 100644 index 0000000..a4ab5ea --- /dev/null +++ b/ix-tests/src/scenarios/single_basic.rs @@ -0,0 +1,188 @@ +use anyhow::Context; +use solana_pubkey::Pubkey; + +use crate::accounts::NamedAccount; +use crate::client::TestGrpcClient; +use crate::context::ScenarioContext; +use crate::expectation::{ + CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, +}; +use crate::layout::ServiceInstance; +use crate::service::{ManagedService, ServiceSpec}; + +const OWNER_DATA_SPACE: u64 = 64; +const SYNTHETIC_OWNER_BYTES: [u8; 32] = [ + 0x31, 0x22, 0x13, 0x04, 0xF5, 0xE6, 0xD7, 0xC8, 0xB9, 0xAA, 0x9B, 0x8C, + 0x7D, 0x6E, 0x5F, 0x40, 0x11, 0x32, 0x53, 0x74, 0x95, 0xB6, 0xD7, 0xF8, + 0x18, 0x29, 0x3A, 0x4B, 0x5C, 0x6D, 0x7E, 0x8F, +]; + +pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { + let spec = ServiceSpec::for_instance(ServiceInstance::One); + let mut service = + Some(ctx.service_controller.start(&spec, &ctx.artifacts).await?); + let mut clients = Vec::new(); + let mut cursors = Vec::new(); + + let result = + run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + + let client_shutdown = shutdown_clients(clients).await; + let service_shutdown = + shutdown_service(&ctx.service_controller, &mut service).await; + + result?; + client_shutdown?; + service_shutdown?; + Ok(()) +} + +async fn run_inner( + ctx: &ScenarioContext, + endpoint: &str, + clients: &mut Vec, + cursors: &mut Vec, +) -> anyhow::Result<()> { + for id in 0..4 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::One, + endpoint.to_owned(), + ) + .await + .with_context(|| format!("failed to connect client {id}"))?; + cursors.push(ClientCursor { + client_id: id, + next_index: 0, + }); + clients.push(client); + } + + clients[0] + .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleA)]) + .await?; + clients[1] + .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleB)]) + .await?; + clients[2] + .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleC)]) + .await?; + clients[3] + .replace_subscription(&[ctx + .accounts + .pubkey_b58(NamedAccount::OwnerData)]) + .await?; + + ctx.validator.fund_payer().await?; + + let simple_a_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleA), 1_000_000) + .await?; + let simple_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleB), 2_000_000) + .await?; + let simple_c_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleC), 3_000_000) + .await?; + + let basic_checkpoint = CheckpointSpec { + name: "basic-lamports", + clients: vec![ + lamport_client_checkpoint( + 0, + ctx.accounts.pubkey_b58(NamedAccount::SimpleA), + 1_000_000, + simple_a_sig, + ), + lamport_client_checkpoint( + 1, + ctx.accounts.pubkey_b58(NamedAccount::SimpleB), + 2_000_000, + simple_b_sig, + ), + lamport_client_checkpoint( + 2, + ctx.accounts.pubkey_b58(NamedAccount::SimpleC), + 3_000_000, + simple_c_sig, + ), + ], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&basic_checkpoint, clients, cursors) + .await?; + + let rent_lamports = + ctx.validator.rent_exempt_balance(OWNER_DATA_SPACE).await?; + ctx.validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::OwnerData), rent_lamports) + .await?; + + let synthetic_owner = Pubkey::new_from_array(SYNTHETIC_OWNER_BYTES); + let owner_data_sig = ctx + .validator + .allocate_and_assign( + ctx.accounts.keypair(NamedAccount::OwnerData), + OWNER_DATA_SPACE, + synthetic_owner, + ) + .await?; + + let owner_data_expected = ExpectedUpdate { + pubkey_b58: Some(ctx.accounts.pubkey_b58(NamedAccount::OwnerData)), + owner_b58: Some(synthetic_owner.to_string()), + txn_signature_b58: Some(Some(owner_data_sig)), + data: None, + ..Default::default() + }; + let owner_data_checkpoint = CheckpointSpec { + name: "owner-data-change", + clients: vec![ClientCheckpoint { + client_id: 3, + allowed: vec![owner_data_expected.clone()], + required: vec![owner_data_expected], + }], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&owner_data_checkpoint, clients, cursors) + .await +} + +fn lamport_client_checkpoint( + client_id: usize, + pubkey_b58: String, + lamports: u64, + txn_signature_b58: String, +) -> ClientCheckpoint { + let expected = ExpectedUpdate { + pubkey_b58: Some(pubkey_b58), + lamports: Some(lamports), + txn_signature_b58: Some(Some(txn_signature_b58)), + ..Default::default() + }; + ClientCheckpoint { + client_id, + allowed: vec![expected.clone()], + required: vec![expected], + } +} + +async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { + for client in clients { + client.shutdown().await?; + } + Ok(()) +} + +async fn shutdown_service( + controller: &crate::service::ServiceController, + service: &mut Option, +) -> anyhow::Result<()> { + if let Some(service) = service.take() { + controller.shutdown(service).await?; + } + Ok(()) +} diff --git a/ix-tests/src/validator.rs b/ix-tests/src/validator.rs index d25def3..9b586a0 100644 --- a/ix-tests/src/validator.rs +++ b/ix-tests/src/validator.rs @@ -134,6 +134,15 @@ impl ValidatorDriver { Ok(sig.to_string()) } + pub async fn rent_exempt_balance(&self, space: u64) -> anyhow::Result { + self.rpc + .get_minimum_balance_for_rent_exemption(space as usize) + .await + .with_context(|| { + format!("failed to fetch rent-exempt balance for {space} bytes") + }) + } + async fn confirm_signature( &self, sig: &solana_signature::Signature, From 88515b34df2397047b92ba39895bb25e1421f21c Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:22:37 +0700 Subject: [PATCH 10/68] feat: add ix single load scenario --- ix-tests/src/scenarios/mod.rs | 6 +- ix-tests/src/scenarios/single_load.rs | 115 ++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 ix-tests/src/scenarios/single_load.rs diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs index bc341ba..0603b7a 100644 --- a/ix-tests/src/scenarios/mod.rs +++ b/ix-tests/src/scenarios/mod.rs @@ -1,4 +1,5 @@ mod single_basic; +mod single_load; use anyhow::bail; @@ -11,9 +12,8 @@ pub async fn run_scenario( ) -> anyhow::Result<()> { match name { ScenarioName::SingleBasic => single_basic::run(ctx).await, - ScenarioName::SingleLoad - | ScenarioName::DualConcurrent - | ScenarioName::DualRestart => { + ScenarioName::SingleLoad => single_load::run(ctx).await, + ScenarioName::DualConcurrent | ScenarioName::DualRestart => { bail!("scenario not implemented: {}", name.as_str()) } ScenarioName::All => bail!("scenario dispatch does not accept 'all'"), diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs new file mode 100644 index 0000000..a040fa8 --- /dev/null +++ b/ix-tests/src/scenarios/single_load.rs @@ -0,0 +1,115 @@ +use anyhow::Context; + +use crate::accounts::NamedAccount; +use crate::client::TestGrpcClient; +use crate::context::ScenarioContext; +use crate::expectation::{ + CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, +}; +use crate::layout::ServiceInstance; +use crate::service::{ManagedService, ServiceSpec}; + +pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { + let spec = ServiceSpec::for_instance(ServiceInstance::One); + let mut service = + Some(ctx.service_controller.start(&spec, &ctx.artifacts).await?); + let mut clients = Vec::new(); + let mut cursors = Vec::new(); + + let result = + run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + + let client_shutdown = shutdown_clients(clients).await; + let service_shutdown = + shutdown_service(&ctx.service_controller, &mut service).await; + + result?; + client_shutdown?; + service_shutdown?; + Ok(()) +} + +async fn run_inner( + ctx: &ScenarioContext, + endpoint: &str, + clients: &mut Vec, + cursors: &mut Vec, +) -> anyhow::Result<()> { + let shared_a = ctx.accounts.pubkey_b58(NamedAccount::SharedA); + let shared_b = ctx.accounts.pubkey_b58(NamedAccount::SharedB); + + for id in 0..100 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::One, + endpoint.to_owned(), + ) + .await + .with_context(|| format!("failed to connect client {id}"))?; + client + .replace_subscription(&[shared_a.clone(), shared_b.clone()]) + .await + .with_context(|| { + format!("failed to set subscriptions for client {id}") + })?; + cursors.push(ClientCursor { + client_id: id, + next_index: 0, + }); + clients.push(client); + } + + ctx.validator.fund_payer().await?; + + let mut expected_updates = Vec::new(); + for index in 1..=25u64 { + let (account, lamports) = if index % 2 == 1 { + (NamedAccount::SharedA, 10_000 + index) + } else { + (NamedAccount::SharedB, 20_000 + index) + }; + let pubkey_b58 = ctx.accounts.pubkey_b58(account); + let sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(account), lamports) + .await?; + expected_updates.push(ExpectedUpdate { + pubkey_b58: Some(pubkey_b58), + lamports: Some(lamports), + txn_signature_b58: Some(Some(sig)), + ..Default::default() + }); + } + + let client_specs = (0..100) + .map(|client_id| ClientCheckpoint { + client_id, + allowed: expected_updates.clone(), + required: expected_updates.clone(), + }) + .collect(); + let checkpoint = CheckpointSpec { + name: "single-load-fanout", + clients: client_specs, + }; + ctx.checkpoint_runner + .wait_until_satisfied(&checkpoint, clients, cursors) + .await +} + +async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { + for client in clients { + client.shutdown().await?; + } + Ok(()) +} + +async fn shutdown_service( + controller: &crate::service::ServiceController, + service: &mut Option, +) -> anyhow::Result<()> { + if let Some(service) = service.take() { + controller.shutdown(service).await?; + } + Ok(()) +} From 2dbea649908533d2c5ca669ae1c12048a31ffd38 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:23:33 +0700 Subject: [PATCH 11/68] feat: add ix dual concurrent scenario --- ix-tests/src/scenarios/dual_concurrent.rs | 203 ++++++++++++++++++++++ ix-tests/src/scenarios/mod.rs | 4 +- 2 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 ix-tests/src/scenarios/dual_concurrent.rs diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs new file mode 100644 index 0000000..9de57c9 --- /dev/null +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -0,0 +1,203 @@ +use anyhow::Context; + +use crate::accounts::NamedAccount; +use crate::client::TestGrpcClient; +use crate::context::ScenarioContext; +use crate::expectation::{ + CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, +}; +use crate::layout::ServiceInstance; +use crate::service::{ManagedService, ServiceSpec}; + +pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { + let spec_one = ServiceSpec::for_instance(ServiceInstance::One); + let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); + let mut services = Vec::new(); + let mut clients = Vec::new(); + let mut cursors = Vec::new(); + + let result = run_inner( + ctx, + &spec_one, + &spec_two, + &mut services, + &mut clients, + &mut cursors, + ) + .await; + + let client_shutdown = shutdown_clients(clients).await; + let service_shutdown = + shutdown_services(&ctx.service_controller, services).await; + + result?; + client_shutdown?; + service_shutdown?; + Ok(()) +} + +async fn run_inner( + ctx: &ScenarioContext, + spec_one: &ServiceSpec, + spec_two: &ServiceSpec, + services: &mut Vec, + clients: &mut Vec, + cursors: &mut Vec, +) -> anyhow::Result<()> { + services.push( + ctx.service_controller + .start(spec_one, &ctx.artifacts) + .await?, + ); + services.push( + ctx.service_controller + .start(spec_two, &ctx.artifacts) + .await?, + ); + + let simple_a = ctx.accounts.pubkey_b58(NamedAccount::SimpleA); + let simple_b = ctx.accounts.pubkey_b58(NamedAccount::SimpleB); + let shared_a = ctx.accounts.pubkey_b58(NamedAccount::SharedA); + + for id in 0..10 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::One, + spec_one.endpoint.clone(), + ) + .await + .with_context(|| { + format!("failed to connect service-one client {id}") + })?; + let subscription = if id < 5 { + vec![simple_a.clone()] + } else { + vec![shared_a.clone()] + }; + client.replace_subscription(&subscription).await?; + clients.push(client); + cursors.push(ClientCursor { + client_id: id, + next_index: 0, + }); + } + + for id in 10..20 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::Two, + spec_two.endpoint.clone(), + ) + .await + .with_context(|| { + format!("failed to connect service-two client {id}") + })?; + let subscription = if id < 15 { + vec![simple_b.clone()] + } else { + vec![shared_a.clone()] + }; + client.replace_subscription(&subscription).await?; + clients.push(client); + cursors.push(ClientCursor { + client_id: id, + next_index: 0, + }); + } + + ctx.validator.fund_payer().await?; + + let simple_a_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleA), 1_111_111) + .await?; + let simple_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleB), 2_222_222) + .await?; + let shared_a_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedA), 3_333_333) + .await?; + + let simple_a_expected = ExpectedUpdate { + pubkey_b58: Some(simple_a), + lamports: Some(1_111_111), + txn_signature_b58: Some(Some(simple_a_sig)), + ..Default::default() + }; + let simple_b_expected = ExpectedUpdate { + pubkey_b58: Some(simple_b), + lamports: Some(2_222_222), + txn_signature_b58: Some(Some(simple_b_sig)), + ..Default::default() + }; + let shared_a_expected = ExpectedUpdate { + pubkey_b58: Some(shared_a), + lamports: Some(3_333_333), + txn_signature_b58: Some(Some(shared_a_sig)), + ..Default::default() + }; + + let mut checkpoint_clients = Vec::new(); + for client_id in 0..5 { + checkpoint_clients.push(single_update_checkpoint( + client_id, + simple_a_expected.clone(), + )); + } + for client_id in 5..10 { + checkpoint_clients.push(single_update_checkpoint( + client_id, + shared_a_expected.clone(), + )); + } + for client_id in 10..15 { + checkpoint_clients.push(single_update_checkpoint( + client_id, + simple_b_expected.clone(), + )); + } + for client_id in 15..20 { + checkpoint_clients.push(single_update_checkpoint( + client_id, + shared_a_expected.clone(), + )); + } + + let checkpoint = CheckpointSpec { + name: "dual-concurrent-routing", + clients: checkpoint_clients, + }; + ctx.checkpoint_runner + .wait_until_satisfied(&checkpoint, clients, cursors) + .await +} + +fn single_update_checkpoint( + client_id: usize, + expected: ExpectedUpdate, +) -> ClientCheckpoint { + ClientCheckpoint { + client_id, + allowed: vec![expected.clone()], + required: vec![expected], + } +} + +async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { + for client in clients { + client.shutdown().await?; + } + Ok(()) +} + +async fn shutdown_services( + controller: &crate::service::ServiceController, + services: Vec, +) -> anyhow::Result<()> { + for service in services { + controller.shutdown(service).await?; + } + Ok(()) +} diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs index 0603b7a..fe174da 100644 --- a/ix-tests/src/scenarios/mod.rs +++ b/ix-tests/src/scenarios/mod.rs @@ -1,3 +1,4 @@ +mod dual_concurrent; mod single_basic; mod single_load; @@ -13,7 +14,8 @@ pub async fn run_scenario( match name { ScenarioName::SingleBasic => single_basic::run(ctx).await, ScenarioName::SingleLoad => single_load::run(ctx).await, - ScenarioName::DualConcurrent | ScenarioName::DualRestart => { + ScenarioName::DualConcurrent => dual_concurrent::run(ctx).await, + ScenarioName::DualRestart => { bail!("scenario not implemented: {}", name.as_str()) } ScenarioName::All => bail!("scenario dispatch does not accept 'all'"), From 48d2c802a5b43d7bb1b8ef3aec4d05d1c86fd8f9 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:25:18 +0700 Subject: [PATCH 12/68] feat: add ix dual restart scenario --- ix-tests/src/scenarios/dual_restart.rs | 416 +++++++++++++++++++++++++ ix-tests/src/scenarios/mod.rs | 5 +- 2 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 ix-tests/src/scenarios/dual_restart.rs diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs new file mode 100644 index 0000000..ab76597 --- /dev/null +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -0,0 +1,416 @@ +use anyhow::{Context, bail}; + +use crate::accounts::{NamedAccount, ScenarioAccounts}; +use crate::client::TestGrpcClient; +use crate::context::ScenarioContext; +use crate::expectation::{ + CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, +}; +use crate::layout::ServiceInstance; +use crate::observation::ClientLog; +use crate::service::{ManagedService, ServiceSpec}; + +pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { + let spec_one = ServiceSpec::for_instance(ServiceInstance::One); + let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); + let mut service_one = Some( + ctx.service_controller + .start(&spec_one, &ctx.artifacts) + .await?, + ); + let mut service_two = Some( + ctx.service_controller + .start(&spec_two, &ctx.artifacts) + .await?, + ); + let mut active_clients = Vec::new(); + let mut cursors = Vec::new(); + + let result = run_inner( + ctx, + &spec_one, + &spec_two, + &mut service_one, + &mut active_clients, + &mut cursors, + ) + .await; + + let client_shutdown = shutdown_clients(active_clients).await; + let service_one_shutdown = + shutdown_service(&ctx.service_controller, &mut service_one).await; + let service_two_shutdown = + shutdown_service(&ctx.service_controller, &mut service_two).await; + + result?; + client_shutdown?; + service_one_shutdown?; + service_two_shutdown?; + Ok(()) +} + +async fn run_inner( + ctx: &ScenarioContext, + spec_one: &ServiceSpec, + spec_two: &ServiceSpec, + service_one: &mut Option, + active_clients: &mut Vec, + cursors: &mut Vec, +) -> anyhow::Result<()> { + connect_service_one_clients( + &ctx.accounts, + active_clients, + cursors, + &spec_one.endpoint, + ) + .await?; + connect_service_two_clients( + &ctx.accounts, + active_clients, + cursors, + &spec_two.endpoint, + ) + .await?; + + ctx.validator.fund_payer().await?; + + let restart_a_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 4_444_444) + .await?; + let restart_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartB), 5_555_555) + .await?; + let shared_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 6_666_666) + .await?; + + let pre_restart = CheckpointSpec { + name: "pre-restart", + clients: vec![ + repeated_checkpoint( + 0..5, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::RestartA), + 4_444_444, + restart_a_sig, + ), + ), + repeated_checkpoint( + 5..10, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::SharedB), + 6_666_666, + shared_b_sig.clone(), + ), + ), + repeated_checkpoint( + 10..15, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::RestartB), + 5_555_555, + restart_b_sig, + ), + ), + repeated_checkpoint( + 15..20, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::SharedB), + 6_666_666, + shared_b_sig, + ), + ), + ] + .into_iter() + .flatten() + .collect(), + }; + ctx.checkpoint_runner + .wait_until_satisfied(&pre_restart, active_clients, cursors) + .await?; + + let parked_logs = + shutdown_service_one_clients(active_clients, cursors).await?; + shutdown_service(&ctx.service_controller, service_one).await?; + + ctx.validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 7_777_777) + .await?; + let during_shared_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 8_888_888) + .await?; + + assert_logs_unchanged(&parked_logs)?; + + let during_restart = CheckpointSpec { + name: "during-restart", + clients: vec![ + empty_checkpoints(10..15), + repeated_checkpoint( + 15..20, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::SharedB), + 8_888_888, + during_shared_b_sig, + ), + ), + ] + .into_iter() + .flatten() + .collect(), + }; + ctx.checkpoint_runner + .wait_until_satisfied(&during_restart, active_clients, cursors) + .await?; + + *service_one = Some( + ctx.service_controller + .start(spec_one, &ctx.artifacts) + .await?, + ); + connect_service_one_clients( + &ctx.accounts, + active_clients, + cursors, + &spec_one.endpoint, + ) + .await?; + + let post_restart_a_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 9_999_999) + .await?; + let post_shared_b_sig = ctx + .validator + .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 10_101_010) + .await?; + + let post_restart = CheckpointSpec { + name: "post-restart", + clients: vec![ + repeated_checkpoint( + 0..5, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::RestartA), + 9_999_999, + post_restart_a_sig, + ), + ), + repeated_checkpoint( + 5..10, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::SharedB), + 10_101_010, + post_shared_b_sig.clone(), + ), + ), + empty_checkpoints(10..15), + repeated_checkpoint( + 15..20, + expected_update( + ctx.accounts.pubkey_b58(NamedAccount::SharedB), + 10_101_010, + post_shared_b_sig, + ), + ), + ] + .into_iter() + .flatten() + .collect(), + }; + ctx.checkpoint_runner + .wait_until_satisfied(&post_restart, active_clients, cursors) + .await +} + +async fn connect_service_one_clients( + accounts: &ScenarioAccounts, + active_clients: &mut Vec, + cursors: &mut Vec, + endpoint: &str, +) -> anyhow::Result<()> { + for id in 0..10 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::One, + endpoint.to_owned(), + ) + .await + .with_context(|| { + format!("failed to connect service-one client {id}") + })?; + let subscription = if id < 5 { + vec![NamedAccount::RestartA] + } else { + vec![NamedAccount::SharedB] + }; + let pubkeys = subscription + .into_iter() + .map(|account| accounts.pubkey_b58(account)) + .collect::>(); + client.replace_subscription(&pubkeys).await?; + upsert_client(active_clients, cursors, client); + } + Ok(()) +} + +async fn connect_service_two_clients( + accounts: &ScenarioAccounts, + active_clients: &mut Vec, + cursors: &mut Vec, + endpoint: &str, +) -> anyhow::Result<()> { + for id in 10..20 { + let client = TestGrpcClient::connect( + id, + ServiceInstance::Two, + endpoint.to_owned(), + ) + .await + .with_context(|| { + format!("failed to connect service-two client {id}") + })?; + let subscription = if id < 15 { + vec![NamedAccount::RestartB] + } else { + vec![NamedAccount::SharedB] + }; + let pubkeys = subscription + .into_iter() + .map(|account| accounts.pubkey_b58(account)) + .collect::>(); + client.replace_subscription(&pubkeys).await?; + upsert_client(active_clients, cursors, client); + } + Ok(()) +} + +fn upsert_client( + active_clients: &mut Vec, + cursors: &mut Vec, + client: TestGrpcClient, +) { + let client_id = client.id; + if let Some(position) = + active_clients.iter().position(|c| c.id == client_id) + { + active_clients[position] = client; + } else { + active_clients.push(client); + } + + if let Some(cursor) = cursors + .iter_mut() + .find(|cursor| cursor.client_id == client_id) + { + cursor.next_index = 0; + } else { + cursors.push(ClientCursor { + client_id, + next_index: 0, + }); + } +} + +async fn shutdown_service_one_clients( + active_clients: &mut Vec, + cursors: &mut Vec, +) -> anyhow::Result> { + let mut parked = Vec::new(); + let mut remaining = Vec::new(); + + for client in active_clients.drain(..) { + if client.service == ServiceInstance::One { + let log = client.log().clone(); + let len = log.len(); + parked.push(ParkedClientLog { + client_id: client.id, + log, + len, + }); + client.shutdown().await?; + } else { + remaining.push(client); + } + } + + *active_clients = remaining; + cursors.retain(|cursor| cursor.client_id >= 10); + Ok(parked) +} + +fn assert_logs_unchanged( + parked_logs: &[ParkedClientLog], +) -> anyhow::Result<()> { + for parked in parked_logs { + if parked.log.len() != parked.len { + bail!( + "service-one client {} received updates after shutdown", + parked.client_id + ); + } + } + Ok(()) +} + +fn expected_update( + pubkey_b58: String, + lamports: u64, + txn_signature_b58: String, +) -> ExpectedUpdate { + ExpectedUpdate { + pubkey_b58: Some(pubkey_b58), + lamports: Some(lamports), + txn_signature_b58: Some(Some(txn_signature_b58)), + ..Default::default() + } +} + +fn repeated_checkpoint( + range: std::ops::Range, + expected: ExpectedUpdate, +) -> Vec { + range + .map(|client_id| ClientCheckpoint { + client_id, + allowed: vec![expected.clone()], + required: vec![expected.clone()], + }) + .collect() +} + +fn empty_checkpoints(range: std::ops::Range) -> Vec { + range + .map(|client_id| ClientCheckpoint { + client_id, + allowed: Vec::new(), + required: Vec::new(), + }) + .collect() +} + +async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { + for client in clients { + client.shutdown().await?; + } + Ok(()) +} + +async fn shutdown_service( + controller: &crate::service::ServiceController, + service: &mut Option, +) -> anyhow::Result<()> { + if let Some(service) = service.take() { + controller.shutdown(service).await?; + } + Ok(()) +} + +struct ParkedClientLog { + client_id: usize, + log: ClientLog, + len: usize, +} diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs index fe174da..839a302 100644 --- a/ix-tests/src/scenarios/mod.rs +++ b/ix-tests/src/scenarios/mod.rs @@ -1,4 +1,5 @@ mod dual_concurrent; +mod dual_restart; mod single_basic; mod single_load; @@ -15,9 +16,7 @@ pub async fn run_scenario( ScenarioName::SingleBasic => single_basic::run(ctx).await, ScenarioName::SingleLoad => single_load::run(ctx).await, ScenarioName::DualConcurrent => dual_concurrent::run(ctx).await, - ScenarioName::DualRestart => { - bail!("scenario not implemented: {}", name.as_str()) - } + ScenarioName::DualRestart => dual_restart::run(ctx).await, ScenarioName::All => bail!("scenario dispatch does not accept 'all'"), } } From 5431e2b1f1e19ec5d0456361e77e8c177df3b5a3 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:28:15 +0700 Subject: [PATCH 13/68] feat: persist ix failure artifacts --- ix-tests/src/artifacts.rs | 32 ++++++++++++++++++ ix-tests/src/layout.rs | 4 ++- ix-tests/src/main.rs | 8 +++-- ix-tests/src/observation.rs | 4 ++- ix-tests/src/scenarios/dual_concurrent.rs | 26 +++++++++----- ix-tests/src/scenarios/dual_restart.rs | 41 ++++++++++++++++------- ix-tests/src/scenarios/mod.rs | 15 +++++++-- ix-tests/src/scenarios/single_basic.rs | 34 +++++++++++++------ ix-tests/src/scenarios/single_load.rs | 34 +++++++++++++------ 9 files changed, 150 insertions(+), 48 deletions(-) diff --git a/ix-tests/src/artifacts.rs b/ix-tests/src/artifacts.rs index 1db9ef4..1df0928 100644 --- a/ix-tests/src/artifacts.rs +++ b/ix-tests/src/artifacts.rs @@ -2,7 +2,9 @@ use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::Context; +use serde::Serialize; +use crate::client::TestGrpcClient; use crate::config::SuiteConfig; use crate::layout::ServiceInstance; use crate::scenario::ScenarioName; @@ -60,6 +62,36 @@ impl RunArtifacts { .join(format!("{}-client-updates.json", scenario.as_str())) } + pub fn write_client_updates( + &self, + scenario: ScenarioName, + clients: &[TestGrpcClient], + ) -> anyhow::Result<()> { + #[derive(Serialize)] + struct ClientUpdates { + client_id: usize, + service: ServiceInstance, + endpoint: String, + updates: Vec, + } + + let payload = clients + .iter() + .map(|client| ClientUpdates { + client_id: client.id, + service: client.service, + endpoint: client.endpoint.clone(), + updates: client.log().snapshot(), + }) + .collect::>(); + let path = self.client_updates_path(scenario); + let json = serde_json::to_vec_pretty(&payload) + .context("failed to serialize client updates")?; + std::fs::write(&path, json).with_context(|| { + format!("failed to write client updates to {}", path.display()) + }) + } + #[allow(dead_code)] pub fn persist_failure(&self) -> anyhow::Result<()> { let timestamp = SystemTime::now() diff --git a/ix-tests/src/layout.rs b/ix-tests/src/layout.rs index 26f0df8..fb9f570 100644 --- a/ix-tests/src/layout.rs +++ b/ix-tests/src/layout.rs @@ -1,6 +1,8 @@ +use serde::Serialize; + use crate::scenario::ScenarioName; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)] #[allow(dead_code)] pub enum ServiceInstance { One, diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 41ef12a..48c53d8 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -75,9 +75,13 @@ async fn main() -> anyhow::Result<()> { ctx.artifacts.cleanup_success()?; info!(scenario = scenario.as_str(), "scenario passed"); } - Err(error) => { + Err(failure) => { + if !failure.clients.is_empty() { + ctx.artifacts + .write_client_updates(*scenario, &failure.clients)?; + } ctx.artifacts.persist_failure()?; - return Err(error); + return Err(failure.error); } } } diff --git a/ix-tests/src/observation.rs b/ix-tests/src/observation.rs index 5da691b..6f2e0cd 100644 --- a/ix-tests/src/observation.rs +++ b/ix-tests/src/observation.rs @@ -1,9 +1,11 @@ use std::sync::{Arc, Mutex}; +use serde::Serialize; + use crate::layout::ServiceInstance; #[allow(dead_code)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ObservedUpdate { pub client_id: usize, pub service: ServiceInstance, diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs index 9de57c9..28ef3c1 100644 --- a/ix-tests/src/scenarios/dual_concurrent.rs +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -7,9 +7,10 @@ use crate::expectation::{ CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, }; use crate::layout::ServiceInstance; +use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; -pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { +pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_one = ServiceSpec::for_instance(ServiceInstance::One); let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); let mut services = Vec::new(); @@ -25,14 +26,16 @@ pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { &mut cursors, ) .await; + if let Err(error) = result { + return Err(ScenarioFailure { error, clients }); + } - let client_shutdown = shutdown_clients(clients).await; - let service_shutdown = - shutdown_services(&ctx.service_controller, services).await; - - result?; - client_shutdown?; - service_shutdown?; + shutdown_clients(clients) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_services(&ctx.service_controller, services) + .await + .map_err(scenario_failure_without_clients)?; Ok(()) } @@ -201,3 +204,10 @@ async fn shutdown_services( } Ok(()) } + +fn scenario_failure_without_clients(error: anyhow::Error) -> ScenarioFailure { + ScenarioFailure { + error, + clients: Vec::new(), + } +} diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index ab76597..0a49827 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -8,20 +8,23 @@ use crate::expectation::{ }; use crate::layout::ServiceInstance; use crate::observation::ClientLog; +use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; -pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { +pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_one = ServiceSpec::for_instance(ServiceInstance::One); let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); let mut service_one = Some( ctx.service_controller .start(&spec_one, &ctx.artifacts) - .await?, + .await + .map_err(scenario_failure_without_clients)?, ); let mut service_two = Some( ctx.service_controller .start(&spec_two, &ctx.artifacts) - .await?, + .await + .map_err(scenario_failure_without_clients)?, ); let mut active_clients = Vec::new(); let mut cursors = Vec::new(); @@ -35,17 +38,22 @@ pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { &mut cursors, ) .await; + if let Err(error) = result { + return Err(ScenarioFailure { + error, + clients: active_clients, + }); + } - let client_shutdown = shutdown_clients(active_clients).await; - let service_one_shutdown = - shutdown_service(&ctx.service_controller, &mut service_one).await; - let service_two_shutdown = - shutdown_service(&ctx.service_controller, &mut service_two).await; - - result?; - client_shutdown?; - service_one_shutdown?; - service_two_shutdown?; + shutdown_clients(active_clients) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_service(&ctx.service_controller, &mut service_one) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_service(&ctx.service_controller, &mut service_two) + .await + .map_err(scenario_failure_without_clients)?; Ok(()) } @@ -414,3 +422,10 @@ struct ParkedClientLog { log: ClientLog, len: usize, } + +fn scenario_failure_without_clients(error: anyhow::Error) -> ScenarioFailure { + ScenarioFailure { + error, + clients: Vec::new(), + } +} diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs index 839a302..faf8016 100644 --- a/ix-tests/src/scenarios/mod.rs +++ b/ix-tests/src/scenarios/mod.rs @@ -3,20 +3,29 @@ mod dual_restart; mod single_basic; mod single_load; -use anyhow::bail; +use anyhow::anyhow; +use crate::client::TestGrpcClient; use crate::context::ScenarioContext; use crate::scenario::ScenarioName; +pub struct ScenarioFailure { + pub error: anyhow::Error, + pub clients: Vec, +} + pub async fn run_scenario( name: ScenarioName, ctx: &ScenarioContext, -) -> anyhow::Result<()> { +) -> Result<(), ScenarioFailure> { match name { ScenarioName::SingleBasic => single_basic::run(ctx).await, ScenarioName::SingleLoad => single_load::run(ctx).await, ScenarioName::DualConcurrent => dual_concurrent::run(ctx).await, ScenarioName::DualRestart => dual_restart::run(ctx).await, - ScenarioName::All => bail!("scenario dispatch does not accept 'all'"), + ScenarioName::All => Err(ScenarioFailure { + error: anyhow!("scenario dispatch does not accept 'all'"), + clients: Vec::new(), + }), } } diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index a4ab5ea..9ea591b 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -8,6 +8,7 @@ use crate::expectation::{ CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, }; use crate::layout::ServiceInstance; +use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; const OWNER_DATA_SPACE: u64 = 64; @@ -17,23 +18,29 @@ const SYNTHETIC_OWNER_BYTES: [u8; 32] = [ 0x18, 0x29, 0x3A, 0x4B, 0x5C, 0x6D, 0x7E, 0x8F, ]; -pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { +pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec = ServiceSpec::for_instance(ServiceInstance::One); - let mut service = - Some(ctx.service_controller.start(&spec, &ctx.artifacts).await?); + let mut service = Some( + ctx.service_controller + .start(&spec, &ctx.artifacts) + .await + .map_err(scenario_failure_without_clients)?, + ); let mut clients = Vec::new(); let mut cursors = Vec::new(); let result = run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + if let Err(error) = result { + return Err(ScenarioFailure { error, clients }); + } - let client_shutdown = shutdown_clients(clients).await; - let service_shutdown = - shutdown_service(&ctx.service_controller, &mut service).await; - - result?; - client_shutdown?; - service_shutdown?; + shutdown_clients(clients) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_service(&ctx.service_controller, &mut service) + .await + .map_err(scenario_failure_without_clients)?; Ok(()) } @@ -186,3 +193,10 @@ async fn shutdown_service( } Ok(()) } + +fn scenario_failure_without_clients(error: anyhow::Error) -> ScenarioFailure { + ScenarioFailure { + error, + clients: Vec::new(), + } +} diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index a040fa8..ee4a4a2 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -7,25 +7,32 @@ use crate::expectation::{ CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, }; use crate::layout::ServiceInstance; +use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; -pub async fn run(ctx: &ScenarioContext) -> anyhow::Result<()> { +pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec = ServiceSpec::for_instance(ServiceInstance::One); - let mut service = - Some(ctx.service_controller.start(&spec, &ctx.artifacts).await?); + let mut service = Some( + ctx.service_controller + .start(&spec, &ctx.artifacts) + .await + .map_err(scenario_failure_without_clients)?, + ); let mut clients = Vec::new(); let mut cursors = Vec::new(); let result = run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + if let Err(error) = result { + return Err(ScenarioFailure { error, clients }); + } - let client_shutdown = shutdown_clients(clients).await; - let service_shutdown = - shutdown_service(&ctx.service_controller, &mut service).await; - - result?; - client_shutdown?; - service_shutdown?; + shutdown_clients(clients) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_service(&ctx.service_controller, &mut service) + .await + .map_err(scenario_failure_without_clients)?; Ok(()) } @@ -113,3 +120,10 @@ async fn shutdown_service( } Ok(()) } + +fn scenario_failure_without_clients(error: anyhow::Error) -> ScenarioFailure { + ScenarioFailure { + error, + clients: Vec::new(), + } +} From 2112eb5e05ce091a9db9ccffdd4623172aa7f4c9 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Wed, 22 Apr 2026 15:28:52 +0700 Subject: [PATCH 14/68] docs: add ix test root workflows --- Makefile | 17 +++++++++++++++++ README.md | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/Makefile b/Makefile index 9911f0d..d64f43f 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,9 @@ CLIENT_REST ?= http://127.0.0.1:3030 kafka-ready \ kafka-ui \ kafka-ui-down \ + ix-tests-build \ + ix-tests-run \ + ix-tests-scenario \ grpc-service-run \ grpc-service-build \ grpc-service-client \ @@ -27,6 +30,9 @@ help: @echo " kafka-ready - Start the stack and initialize stream/table/schema" @echo " kafka-ui - Start Redpanda Console" @echo " kafka-ui-down - Stop Redpanda Console" + @echo " ix-tests-build - Build the gRPC service binary and the ix-tests harness" + @echo " ix-tests-run - Run the full local integration suite" + @echo " ix-tests-scenario - Run one integration scenario (SCENARIO=...)" @echo " grpc-service-run - Run the gRPC service" @echo " grpc-service-build - Build the gRPC service package" @echo " grpc-service-client - Run the example gRPC client" @@ -61,6 +67,17 @@ kafka-ui: kafka-ui-down: $(MAKE) -C kafka-setup ui-down +ix-tests-build: + cargo build -p magigblock-grpc-service + cargo build -p ix-tests + +ix-tests-run: + cargo run -p ix-tests -- --config ix-tests/configs/suite.toml --scenario all + +ix-tests-scenario: + @test -n "$(SCENARIO)" || (echo "Provide SCENARIO=..." >&2; exit 1) + cargo run -p ix-tests -- --config ix-tests/configs/suite.toml --scenario "$(SCENARIO)" + grpc-service-run: $(MAKE) -C grpc-service run diff --git a/README.md b/README.md index 90f8067..dcd674a 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ This repo contains the MagicBlock account update pipeline: - `event-proto/`: shared Rust crate `magigblock-event-proto` - `grpc-service/`: Rust crate `magigblock-grpc-service` - `geyser-plugin/`: Solana Geyser plugin crate +- `ix-tests/`: local end-to-end gRPC integration harness - `kafka-setup/`: minimal Kafka/ksqlDB local environment - `Makefile`: top-level operator entrypoint @@ -36,6 +37,9 @@ cargo test --workspace -- --test-threads=16 - `make kafka-ready` - `make kafka-ui` - `make kafka-ui-down` +- `make ix-tests-build` +- `make ix-tests-run` +- `make ix-tests-scenario SCENARIO=single-basic` - `make grpc-service-run` - `make grpc-service-build` - `make grpc-service-client` @@ -43,3 +47,5 @@ cargo test --workspace -- --test-threads=16 - `make grpc-service-client-remove-sub PUBKEY=` - `make geyser-plugin-build` - `make geyser-plugin-launch` + +The integration suite assumes Kafka/ksqlDB and the validator-with-plugin are already up. Scenarios are isolated and can be run individually. Failure artifacts are written under `target/ix-tests/failures/`. From a499959d481b82b9bc373c60c7ad5be0f29e4756 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 11:43:55 +0700 Subject: [PATCH 15/68] chore: fixes + log improvs in ix-tests --- geyser-plugin/Makefile | 25 ++-- .../configs/plugin-config.example.json | 2 +- ix-tests/src/artifacts.rs | 26 +++- ix-tests/src/expectation.rs | 115 +++++++++++++----- ix-tests/src/main.rs | 3 + ix-tests/src/scenarios/single_basic.rs | 75 +++++++----- ix-tests/src/service.rs | 15 ++- ix-tests/src/validator.rs | 13 +- 8 files changed, 199 insertions(+), 75 deletions(-) diff --git a/geyser-plugin/Makefile b/geyser-plugin/Makefile index 4fab2f0..87ab5fc 100644 --- a/geyser-plugin/Makefile +++ b/geyser-plugin/Makefile @@ -18,14 +18,6 @@ help: @echo " make launch - Launch solana-test-validator with the plugin (depends on build-plugin)" @echo " make clean - Remove compiled artifacts" -init-config: - @echo "Creating $(PLUGIN_CONFIG)..." - @perl -0pe 's#(? "$(PLUGIN_CONFIG)" - @echo "Creating $(VALIDATOR_CONFIG)..." - @perl -0pe 's#(? "$(VALIDATOR_CONFIG)" - @echo "✓ Created $(PLUGIN_CONFIG)" - @echo "✓ Created $(VALIDATOR_CONFIG)" - build-plugin: @echo "Building plugin for $(UNAME_S)..." cargo build --release @@ -48,6 +40,23 @@ launch: build-plugin fi solana-test-validator --log --reset --geyser-plugin-config "$(VALIDATOR_CONFIG)" +init-config: + @echo "Creating $(VALIDATOR_CONFIG) and $(PLUGIN_CONFIG) for $(UNAME_S) (.$(PLUGIN_EXT))..." + @if [ -f "$(VALIDATOR_CONFIG)" ]; then \ + echo "Error: $(VALIDATOR_CONFIG) already exists, refusing to overwrite"; \ + exit 1; \ + fi + @if [ -f "$(PLUGIN_CONFIG)" ]; then \ + echo "Error: $(PLUGIN_CONFIG) already exists, refusing to overwrite"; \ + exit 1; \ + fi + @sed -e 's|path-to-plugin-library|$(PLUGIN_PATH)|' \ + -e 's|plugin-config.toml|$(PLUGIN_CONFIG)|' \ + ./configs/plugin-config.example.json > "$(VALIDATOR_CONFIG)" + @cp ./configs/plugin-config.example.toml "$(PLUGIN_CONFIG)" + @echo "Created $(VALIDATOR_CONFIG) (libpath: $(PLUGIN_PATH))" + @echo "Created $(PLUGIN_CONFIG)" + clean: cargo clean @echo "Cleaned build artifacts" diff --git a/geyser-plugin/configs/plugin-config.example.json b/geyser-plugin/configs/plugin-config.example.json index 7b83930..748ca13 100644 --- a/geyser-plugin/configs/plugin-config.example.json +++ b/geyser-plugin/configs/plugin-config.example.json @@ -1,4 +1,4 @@ { - "libpath": "../target/release/libsolana_accountsdb_plugin_kafka.so", + "libpath": "path-to-plugin-library", "config_file": "plugin-config.toml" } diff --git a/ix-tests/src/artifacts.rs b/ix-tests/src/artifacts.rs index 1df0928..84e621c 100644 --- a/ix-tests/src/artifacts.rs +++ b/ix-tests/src/artifacts.rs @@ -44,7 +44,6 @@ impl RunArtifacts { }) } - #[allow(dead_code)] pub fn service_logs(&self, instance: ServiceInstance) -> ServiceLogPaths { let label = match instance { ServiceInstance::One => "service-1", @@ -56,6 +55,31 @@ impl RunArtifacts { } } + pub fn dump_service_logs_at(paths: &ServiceLogPaths) -> anyhow::Result<()> { + for path in &[&paths.stdout, &paths.stderr] { + if path.exists() { + let content = + std::fs::read_to_string(path).with_context(|| { + format!( + "failed to read service log: {}", + path.display() + ) + })?; + println!("--- {} ---\n{}", path.display(), content); + } + } + Ok(()) + } + + #[allow(dead_code)] + pub fn dump_service_logs( + &self, + instance: ServiceInstance, + ) -> anyhow::Result<()> { + let paths = self.service_logs(instance); + Self::dump_service_logs_at(&paths) + } + #[allow(dead_code)] pub fn client_updates_path(&self, scenario: ScenarioName) -> PathBuf { self.run_dir diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index fabb865..04d3dce 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -1,6 +1,7 @@ use std::time::{Duration, Instant}; +use tracing::*; -use anyhow::{Context, bail}; +use anyhow::{bail, Context}; use tokio::time::sleep; use crate::client::TestGrpcClient; @@ -52,33 +53,89 @@ pub struct CheckpointRunner { #[allow(dead_code)] impl ExpectedUpdate { pub fn matches(&self, observed: &ObservedUpdate) -> bool { - self.pubkey_b58 - .as_ref() - .is_none_or(|expected| observed.pubkey_b58 == *expected) - && self.slot.is_none_or(|expected| observed.slot == expected) - && self - .lamports - .is_none_or(|expected| observed.lamports == expected) - && self - .owner_b58 - .as_ref() - .is_none_or(|expected| observed.owner_b58 == *expected) - && self - .executable - .is_none_or(|expected| observed.executable == expected) - && self - .rent_epoch - .is_none_or(|expected| observed.rent_epoch == expected) - && self - .write_version - .is_none_or(|expected| observed.write_version == expected) - && self.txn_signature_b58.as_ref().is_none_or(|expected| { - observed.txn_signature_b58.as_ref() == expected.as_ref() - }) - && self - .data - .as_ref() - .is_none_or(|expected| observed.data == *expected) + let mut mismatches = Vec::new(); + if let Some(expected) = &self.pubkey_b58 { + if observed.pubkey_b58 != *expected { + mismatches.push(format!( + "pubkey_b58: expected {}, got {}", + expected, observed.pubkey_b58 + )); + } + } + if let Some(expected) = self.slot { + if observed.slot != expected { + mismatches.push(format!( + "slot: expected {}, got {}", + expected, observed.slot + )); + } + } + if let Some(expected) = self.lamports { + if observed.lamports != expected { + mismatches.push(format!( + "lamports: expected {}, got {}", + expected, observed.lamports + )); + } + } + if let Some(expected) = &self.owner_b58 { + if observed.owner_b58 != *expected { + mismatches.push(format!( + "owner_b58: expected {}, got {}", + expected, observed.owner_b58 + )); + } + } + + if let Some(expected) = self.executable { + if observed.executable != expected { + mismatches.push(format!( + "executable: expected {}, got {}", + expected, observed.executable + )); + } + } + + if let Some(expected) = self.rent_epoch { + if observed.rent_epoch != expected { + mismatches.push(format!( + "rent_epoch: expected {}, got {}", + expected, observed.rent_epoch + )); + } + } + + if let Some(expected) = self.write_version { + if observed.write_version != expected { + mismatches.push(format!( + "write_version: expected {}, got {}", + expected, observed.write_version + )); + } + } + + if let Some(expected) = &self.txn_signature_b58 { + if observed.txn_signature_b58.as_ref() != expected.as_ref() { + mismatches.push(format!( + "txn_signature_b58: expected {:?}, got {:?}", + expected, observed.txn_signature_b58 + )); + } + } + + if let Some(expected) = &self.data { + if observed.data != *expected { + mismatches.push(format!( + "data: expected {:?}, got {:?}", + expected, observed.data + )); + } + } + + if !mismatches.is_empty() { + warn!("Mismatches:\n {}", mismatches.join("\n ")); + } + return mismatches.is_empty(); } } @@ -128,6 +185,8 @@ impl CheckpointRunner { .iter() .any(|expected| expected.matches(update)) }) { + error!("Expected one of: {:#?}", client_spec.allowed); + error!("Got: {unexpected:#?}"); bail!( "checkpoint '{}' failed for client {}: unexpected update for pubkey {}", spec.name, diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index 48c53d8..b43befb 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -30,6 +30,9 @@ fn init_tracing() { tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| "ix_tests=info".into()), ) + .without_time() + .with_file(true) + .with_line_number(true) .with_target(false) .init(); } diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index 9ea591b..27d7b94 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -1,5 +1,6 @@ use anyhow::Context; use solana_pubkey::Pubkey; +use tracing::debug; use crate::accounts::NamedAccount; use crate::client::TestGrpcClient; @@ -65,56 +66,72 @@ async fn run_inner( clients.push(client); } + let simple_a = ctx.accounts.pubkey(NamedAccount::SimpleA); + debug!("Client 0 subscribing to SimpleA: {simple_a}"); clients[0] - .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleA)]) + .replace_subscription(&[simple_a.to_string()]) .await?; + + let simple_b = ctx.accounts.pubkey(NamedAccount::SimpleB); + debug!("Client 1 subscribing to SimpleB: {simple_b}"); clients[1] - .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleB)]) + .replace_subscription(&[simple_b.to_string()]) .await?; + + let simple_c = ctx.accounts.pubkey(NamedAccount::SimpleC); + debug!("Client 2 subscribing to SimpleC: {simple_c}"); clients[2] - .replace_subscription(&[ctx.accounts.pubkey_b58(NamedAccount::SimpleC)]) + .replace_subscription(&[simple_c.to_string()]) .await?; + + let owner_data = ctx.accounts.pubkey(NamedAccount::OwnerData); + debug!("Client 3 subscribing to OwnerData: {owner_data}"); clients[3] - .replace_subscription(&[ctx - .accounts - .pubkey_b58(NamedAccount::OwnerData)]) + .replace_subscription(&[owner_data.to_string()]) + .await?; + + // Right after we made the subscriptions we expect to get an _empty_ account update for + // each account + let empty_checkpoint = CheckpointSpec { + name: "initial-empty-accounts", + clients: vec![ + lamport_client_checkpoint(0, simple_a.to_string(), 0, None), + lamport_client_checkpoint(1, simple_b.to_string(), 0, None), + lamport_client_checkpoint(2, simple_c.to_string(), 0, None), + ], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&empty_checkpoint, clients, cursors) .await?; + // Then we airdrop some lamports to each account and expect to see the updates with the + // correct lamports and signatures ctx.validator.fund_payer().await?; - let simple_a_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleA), 1_000_000) - .await?; - let simple_b_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleB), 2_000_000) - .await?; - let simple_c_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleC), 3_000_000) - .await?; + let simple_a_sig = ctx.validator.airdrop(&simple_a, 1_000_000).await?; + let simple_b_sig = ctx.validator.airdrop(&simple_b, 2_000_000).await?; + let simple_c_sig = ctx.validator.airdrop(&simple_c, 3_000_000).await?; let basic_checkpoint = CheckpointSpec { name: "basic-lamports", clients: vec![ lamport_client_checkpoint( 0, - ctx.accounts.pubkey_b58(NamedAccount::SimpleA), + simple_a.to_string(), 1_000_000, - simple_a_sig, + Some(simple_a_sig), ), lamport_client_checkpoint( 1, - ctx.accounts.pubkey_b58(NamedAccount::SimpleB), + simple_b.to_string(), 2_000_000, - simple_b_sig, + Some(simple_b_sig), ), lamport_client_checkpoint( 2, - ctx.accounts.pubkey_b58(NamedAccount::SimpleC), + simple_c.to_string(), 3_000_000, - simple_c_sig, + Some(simple_c_sig), ), ], }; @@ -124,9 +141,7 @@ async fn run_inner( let rent_lamports = ctx.validator.rent_exempt_balance(OWNER_DATA_SPACE).await?; - ctx.validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::OwnerData), rent_lamports) - .await?; + ctx.validator.airdrop(&owner_data, rent_lamports).await?; let synthetic_owner = Pubkey::new_from_array(SYNTHETIC_OWNER_BYTES); let owner_data_sig = ctx @@ -139,7 +154,7 @@ async fn run_inner( .await?; let owner_data_expected = ExpectedUpdate { - pubkey_b58: Some(ctx.accounts.pubkey_b58(NamedAccount::OwnerData)), + pubkey_b58: Some(owner_data.to_string()), owner_b58: Some(synthetic_owner.to_string()), txn_signature_b58: Some(Some(owner_data_sig)), data: None, @@ -162,12 +177,12 @@ fn lamport_client_checkpoint( client_id: usize, pubkey_b58: String, lamports: u64, - txn_signature_b58: String, + txn_signature_b58: Option, ) -> ClientCheckpoint { let expected = ExpectedUpdate { pubkey_b58: Some(pubkey_b58), lamports: Some(lamports), - txn_signature_b58: Some(Some(txn_signature_b58)), + txn_signature_b58: Some(txn_signature_b58), ..Default::default() }; ClientCheckpoint { diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index 82ac789..4b25f33 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -5,7 +5,7 @@ use anyhow::{Context, bail}; use helius_laserstream::grpc::PingRequest; use helius_laserstream::grpc::geyser_client::GeyserClient; use tokio::process::Command; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; use crate::artifacts::RunArtifacts; use crate::config::SuiteConfig; @@ -146,13 +146,24 @@ impl ServiceController { loop { if let Ok(mut client) = GeyserClient::connect(endpoint.to_owned()).await - && client.ping(PingRequest { count: 1 }).await.is_ok() + && client + .ping(PingRequest { count: 1 }) + .await + .inspect_err(|err| { + warn!( + "failed to ping grpc-service at {}: {err}", + endpoint + ) + }) + .is_ok() { info!(endpoint, "grpc-service is ready"); return Ok(()); } if tokio::time::Instant::now() >= deadline { + RunArtifacts::dump_service_logs_at(log_paths) + .context("failed to dump service logs")?; bail!( "grpc-service at {} did not become ready within {:?}\n\ stdout: {}\n\ diff --git a/ix-tests/src/validator.rs b/ix-tests/src/validator.rs index 9b586a0..085dc7f 100644 --- a/ix-tests/src/validator.rs +++ b/ix-tests/src/validator.rs @@ -3,7 +3,9 @@ use std::time::Duration; use anyhow::Context; use solana_keypair::Keypair; use solana_pubkey::Pubkey; -use solana_rpc_client::nonblocking::rpc_client::RpcClient; +use solana_rpc_client::{ + api::config::CommitmentConfig, nonblocking::rpc_client::RpcClient, +}; use solana_signer::Signer; use solana_system_interface::instruction as system_instruction; use solana_transaction::Transaction; @@ -19,7 +21,10 @@ pub struct ValidatorDriver { impl ValidatorDriver { pub fn new(config: &SuiteConfig) -> Self { - let rpc = RpcClient::new(config.validator_rpc_url.clone()); + let rpc = RpcClient::new_with_commitment( + config.validator_rpc_url.clone(), + CommitmentConfig::confirmed(), + ); let payer = Keypair::new(); let transaction_timeout = Duration::from_millis(config.transaction_timeout_ms); @@ -32,12 +37,10 @@ impl ValidatorDriver { pub async fn fund_payer(&self) -> anyhow::Result<()> { let lamports = 10_000_000_000; // 10 SOL - let sig = self - .rpc + self.rpc .request_airdrop(&self.payer.pubkey(), lamports) .await .context("fund_payer: request_airdrop failed")?; - self.confirm_signature(&sig).await?; info!( payer = %self.payer.pubkey(), lamports, From 70f37a72606e40ce6bd88a2699ea940573d8cc77 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 12:18:10 +0700 Subject: [PATCH 16/68] chore: log update lamports --- geyser-plugin/src/account_update_publisher.rs | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/geyser-plugin/src/account_update_publisher.rs b/geyser-plugin/src/account_update_publisher.rs index 6d08095..d3a0fc4 100644 --- a/geyser-plugin/src/account_update_publisher.rs +++ b/geyser-plugin/src/account_update_publisher.rs @@ -69,8 +69,9 @@ fn publish_raw_account_update( ) -> PluginResult<()> { if let Ok(key) = <[u8; 32]>::try_from(event.pubkey.as_slice()) { debug!( - "Matched account update {} in slot {}", + "Matched account update {} lamports {} in slot {}", Pubkey::new_from_array(key), + event.lamports, event.slot ); } @@ -130,14 +131,14 @@ fn should_publish_subscribed_account( } fn log_ignore_account_update(pubkey: &[u8]) { - if log_enabled!(::log::Level::Trace) - && let Ok(key) = <&[u8; 32]>::try_from(pubkey) - { - trace!( - "Ignoring update for account key: {:?}", - Pubkey::new_from_array(*key) - ); - return; + if log_enabled!(::log::Level::Trace) { + if let Ok(key) = <&[u8; 32]>::try_from(pubkey) { + trace!( + "Ignoring update for account key: {:?}", + Pubkey::new_from_array(*key) + ); + return; + } } if log_enabled!(::log::Level::Trace) { trace!("Ignoring update for account key bytes: {:?}", pubkey); @@ -147,8 +148,8 @@ fn log_ignore_account_update(pubkey: &[u8]) { #[cfg(test)] mod tests { use super::{ - AccountUpdatePublishOutcome, should_publish_backfill_account, - should_publish_confirmed_account, + should_publish_backfill_account, should_publish_confirmed_account, + AccountUpdatePublishOutcome, }; use crate::{ server::subscriptions::AccountSubscriptions, wire::UpdateAccountEvent, From fdb3fb57259e07fc28b7c3662bcc63434f111a40 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 12:18:20 +0700 Subject: [PATCH 17/68] chore: improve b58 conversions --- ix-tests/src/client.rs | 54 +++++++++++++++++++++++--- ix-tests/src/scenarios/single_basic.rs | 11 +++++- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/ix-tests/src/client.rs b/ix-tests/src/client.rs index 9aae1b6..0196acf 100644 --- a/ix-tests/src/client.rs +++ b/ix-tests/src/client.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use tracing::*; use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::Context; @@ -7,6 +8,8 @@ use helius_laserstream::grpc::subscribe_update::UpdateOneof; use helius_laserstream::grpc::{ SubscribeRequest, SubscribeRequestFilterAccounts, }; +use solana_keypair::Signature; +use solana_pubkey::Pubkey; use tokio::sync::mpsc; use tokio_stream::StreamExt; use tokio_stream::wrappers::ReceiverStream; @@ -51,6 +54,35 @@ impl TestGrpcClient { let log = ClientLog::new(); let log_clone = log.clone(); + fn pubkey_str(bytes: &[u8]) -> String { + if bytes.is_empty() { + return String::new(); + } + Pubkey::try_from(bytes.to_vec()) + .inspect_err(|err| { + error!( + bytes = ?bytes, + err = ?err, + "failed to parse pubkey" + ) + }) + .unwrap().to_string() + } + fn txn_signature_str(bytes: &[u8]) -> String { + if bytes.is_empty() { + return String::new(); + } + Signature::try_from(bytes.to_vec()) + .inspect_err(|err| { + error!( + bytes = ?bytes, + err = ?err, + "failed to parse txn signature" + ) + }) + .unwrap().to_string() + } + let receive_task = tokio::spawn(async move { while let Some(item) = update_stream.next().await { match item { @@ -67,23 +99,35 @@ impl TestGrpcClient { let observed = ObservedUpdate { client_id: id, service, - pubkey_b58: bs58::encode(&info.pubkey) - .into_string(), + pubkey_b58: pubkey_str(&info.pubkey), slot: account_update.slot, lamports: info.lamports, - owner_b58: bs58::encode(&info.owner) - .into_string(), + owner_b58: pubkey_str(&info.owner), executable: info.executable, rent_epoch: info.rent_epoch, write_version: info.write_version, txn_signature_b58: info .txn_signature .as_ref() - .map(|b| bs58::encode(b).into_string()), + .map(|x| txn_signature_str(x)), data: info.data, received_at_millis: now, }; + trace!( + client_id = id, + pubkey = %observed.pubkey_b58, + slot = observed.slot, + lamports = observed.lamports, + owner = %observed.owner_b58, + executable = observed.executable, + rent_epoch = observed.rent_epoch, + write_version = observed.write_version, + txn_signature = ?observed.txn_signature_b58.as_deref(), + data_len = observed.data.len(), + "received account update" + ); + log_clone.push(observed); } } diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index 27d7b94..e069dfc 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -98,12 +98,15 @@ async fn run_inner( lamport_client_checkpoint(0, simple_a.to_string(), 0, None), lamport_client_checkpoint(1, simple_b.to_string(), 0, None), lamport_client_checkpoint(2, simple_c.to_string(), 0, None), + lamport_client_checkpoint(3, owner_data.to_string(), 0, None), ], }; ctx.checkpoint_runner .wait_until_satisfied(&empty_checkpoint, clients, cursors) .await?; + debug!("✅ initial empty accounts"); + // Then we airdrop some lamports to each account and expect to see the updates with the // correct lamports and signatures ctx.validator.fund_payer().await?; @@ -139,6 +142,8 @@ async fn run_inner( .wait_until_satisfied(&basic_checkpoint, clients, cursors) .await?; + debug!("✅ basic lamports updates"); + let rent_lamports = ctx.validator.rent_exempt_balance(OWNER_DATA_SPACE).await?; ctx.validator.airdrop(&owner_data, rent_lamports).await?; @@ -170,7 +175,11 @@ async fn run_inner( }; ctx.checkpoint_runner .wait_until_satisfied(&owner_data_checkpoint, clients, cursors) - .await + .await?; + + debug!("✅ owner and data updates"); + + Ok(()) } fn lamport_client_checkpoint( From 63bbcf0da07858a88d59b246839e779522f087b0 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 12:38:36 +0700 Subject: [PATCH 18/68] refactor(ix-tests): satisfy checkpoints from required only Amp-Thread-ID: https://ampcode.com/threads/T-019dd297-daf4-7092-91a6-1681a0d669ee Co-authored-by: Amp --- ix-tests/src/expectation.rs | 127 +++++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 24 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index 04d3dce..7048628 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -1,7 +1,7 @@ use std::time::{Duration, Instant}; use tracing::*; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use tokio::time::sleep; use crate::client::TestGrpcClient; @@ -26,6 +26,7 @@ pub struct ExpectedUpdate { #[allow(dead_code)] pub struct ClientCheckpoint { pub client_id: usize, + #[allow(dead_code)] pub allowed: Vec, pub required: Vec, } @@ -135,10 +136,22 @@ impl ExpectedUpdate { if !mismatches.is_empty() { warn!("Mismatches:\n {}", mismatches.join("\n ")); } - return mismatches.is_empty(); + mismatches.is_empty() } } +fn unmatched_required<'a>( + required: &'a [ExpectedUpdate], + observed: &[ObservedUpdate], +) -> Vec<&'a ExpectedUpdate> { + required + .iter() + .filter(|expected| { + !observed.iter().any(|update| expected.matches(update)) + }) + .collect() +} + #[allow(dead_code)] impl CheckpointRunner { pub fn new(config: &SuiteConfig) -> Self { @@ -179,27 +192,9 @@ impl CheckpointRunner { })?; let observed = client.log().snapshot_from(cursor.next_index); - if let Some(unexpected) = observed.iter().find(|update| { - !client_spec - .allowed - .iter() - .any(|expected| expected.matches(update)) - }) { - error!("Expected one of: {:#?}", client_spec.allowed); - error!("Got: {unexpected:#?}"); - bail!( - "checkpoint '{}' failed for client {}: unexpected update for pubkey {}", - spec.name, - client_spec.client_id, - unexpected.pubkey_b58 - ); - } - - let missing_required = - client_spec.required.iter().any(|expected| { - !observed.iter().any(|update| expected.matches(update)) - }); - if missing_required { + if !unmatched_required(&client_spec.required, &observed) + .is_empty() + { all_required_seen = false; } } @@ -221,6 +216,39 @@ impl CheckpointRunner { } if Instant::now() >= deadline { + // Build a useful diagnostic per client and bail. + for client_spec in &spec.clients { + let Some(client) = clients + .iter() + .find(|client| client.id == client_spec.client_id) + else { + continue; + }; + let Some(cursor) = cursors.iter().find(|cursor| { + cursor.client_id == client_spec.client_id + }) else { + continue; + }; + let observed = + client.log().snapshot_from(cursor.next_index); + let missing = + unmatched_required(&client_spec.required, &observed); + if missing.is_empty() { + continue; + } + error!( + checkpoint = spec.name, + client_id = client_spec.client_id, + "Missing required: {:#?}", + missing + ); + error!( + checkpoint = spec.name, + client_id = client_spec.client_id, + "Observed in window: {:#?}", + observed + ); + } bail!( "checkpoint '{}' timed out after {:?}", spec.name, @@ -238,7 +266,7 @@ mod tests { use crate::layout::ServiceInstance; use crate::observation::ObservedUpdate; - use super::ExpectedUpdate; + use super::{ExpectedUpdate, unmatched_required}; fn observed_update() -> ObservedUpdate { ObservedUpdate { @@ -257,6 +285,26 @@ mod tests { } } + fn observed_with( + lamports: u64, + txn_signature_b58: Option, + ) -> ObservedUpdate { + ObservedUpdate { + client_id: 7, + service: ServiceInstance::One, + pubkey_b58: "pubkey".to_owned(), + slot: 42, + lamports, + owner_b58: "owner".to_owned(), + executable: false, + rent_epoch: 5, + write_version: 6, + txn_signature_b58, + data: vec![1, 2, 3], + received_at_millis: 123, + } + } + #[test] fn matches_ignores_none_fields() { let expected = ExpectedUpdate { @@ -276,4 +324,35 @@ mod tests { assert!(!expected.matches(&observed_update())); } + + #[test] + fn unmatched_required_returns_empty_when_all_required_match_with_noise() { + let noise = observed_with(0, None); + let matching = observed_with(1_000_000, Some("real-sig".to_owned())); + let observed = vec![noise, matching]; + + let required = vec![ExpectedUpdate { + lamports: Some(1_000_000), + txn_signature_b58: Some(Some("real-sig".to_owned())), + ..Default::default() + }]; + + assert!(unmatched_required(&required, &observed).is_empty()); + } + + #[test] + fn unmatched_required_reports_missing_when_required_never_arrives() { + let noise = observed_with(0, None); + let observed = vec![noise]; + + let required = vec![ExpectedUpdate { + lamports: Some(1_000_000), + ..Default::default() + }]; + + let missing = unmatched_required(&required, &observed); + assert!(!missing.is_empty()); + assert_eq!(missing.len(), 1); + assert_eq!(missing[0].lamports, Some(1_000_000)); + } } From f3e6c4c6948734960775e5f431443e1bc3bc2789 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 12:39:55 +0700 Subject: [PATCH 19/68] refactor: remove allowed field from ClientCheckpoint Step 2: Drop the unused allowed field from the ClientCheckpoint struct and update all four scenario files to stop constructing it: - ix-tests/src/expectation.rs: Removed allowed field from struct - ix-tests/src/scenarios/single_basic.rs: Updated lamport_client_checkpoint helper and owner_data_checkpoint block - ix-tests/src/scenarios/dual_concurrent.rs: Updated single_update_checkpoint helper - ix-tests/src/scenarios/dual_restart.rs: Updated repeated_checkpoint and empty_checkpoints helpers - ix-tests/src/scenarios/single_load.rs: Removed allowed from construction All ix-tests unit tests pass. Clippy errors in expectation.rs were auto-fixed (collapsible_if). Amp-Thread-ID: https://ampcode.com/threads/T-019dd298-cc3a-716f-a374-2375f3a99bf0 Co-authored-by: Amp --- ix-tests/src/expectation.rs | 47 +++++++++-------------- ix-tests/src/scenarios/dual_concurrent.rs | 1 - ix-tests/src/scenarios/dual_restart.rs | 2 - ix-tests/src/scenarios/single_basic.rs | 2 - ix-tests/src/scenarios/single_load.rs | 1 - 5 files changed, 18 insertions(+), 35 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index 7048628..bab91e4 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -26,8 +26,6 @@ pub struct ExpectedUpdate { #[allow(dead_code)] pub struct ClientCheckpoint { pub client_id: usize, - #[allow(dead_code)] - pub allowed: Vec, pub required: Vec, } @@ -55,83 +53,74 @@ pub struct CheckpointRunner { impl ExpectedUpdate { pub fn matches(&self, observed: &ObservedUpdate) -> bool { let mut mismatches = Vec::new(); - if let Some(expected) = &self.pubkey_b58 { - if observed.pubkey_b58 != *expected { + if let Some(expected) = &self.pubkey_b58 + && observed.pubkey_b58 != *expected { mismatches.push(format!( "pubkey_b58: expected {}, got {}", expected, observed.pubkey_b58 )); } - } - if let Some(expected) = self.slot { - if observed.slot != expected { + if let Some(expected) = self.slot + && observed.slot != expected { mismatches.push(format!( "slot: expected {}, got {}", expected, observed.slot )); } - } - if let Some(expected) = self.lamports { - if observed.lamports != expected { + if let Some(expected) = self.lamports + && observed.lamports != expected { mismatches.push(format!( "lamports: expected {}, got {}", expected, observed.lamports )); } - } - if let Some(expected) = &self.owner_b58 { - if observed.owner_b58 != *expected { + if let Some(expected) = &self.owner_b58 + && observed.owner_b58 != *expected { mismatches.push(format!( "owner_b58: expected {}, got {}", expected, observed.owner_b58 )); } - } - if let Some(expected) = self.executable { - if observed.executable != expected { + if let Some(expected) = self.executable + && observed.executable != expected { mismatches.push(format!( "executable: expected {}, got {}", expected, observed.executable )); } - } - if let Some(expected) = self.rent_epoch { - if observed.rent_epoch != expected { + if let Some(expected) = self.rent_epoch + && observed.rent_epoch != expected { mismatches.push(format!( "rent_epoch: expected {}, got {}", expected, observed.rent_epoch )); } - } - if let Some(expected) = self.write_version { - if observed.write_version != expected { + if let Some(expected) = self.write_version + && observed.write_version != expected { mismatches.push(format!( "write_version: expected {}, got {}", expected, observed.write_version )); } - } - if let Some(expected) = &self.txn_signature_b58 { - if observed.txn_signature_b58.as_ref() != expected.as_ref() { + if let Some(expected) = &self.txn_signature_b58 + && observed.txn_signature_b58.as_ref() != expected.as_ref() { mismatches.push(format!( "txn_signature_b58: expected {:?}, got {:?}", expected, observed.txn_signature_b58 )); } - } - if let Some(expected) = &self.data { - if observed.data != *expected { + if let Some(expected) = &self.data + && observed.data != *expected { mismatches.push(format!( "data: expected {:?}, got {:?}", expected, observed.data )); } - } if !mismatches.is_empty() { warn!("Mismatches:\n {}", mismatches.join("\n ")); diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs index 28ef3c1..c9d3b95 100644 --- a/ix-tests/src/scenarios/dual_concurrent.rs +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -183,7 +183,6 @@ fn single_update_checkpoint( ) -> ClientCheckpoint { ClientCheckpoint { client_id, - allowed: vec![expected.clone()], required: vec![expected], } } diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index 0a49827..d3c123f 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -384,7 +384,6 @@ fn repeated_checkpoint( range .map(|client_id| ClientCheckpoint { client_id, - allowed: vec![expected.clone()], required: vec![expected.clone()], }) .collect() @@ -394,7 +393,6 @@ fn empty_checkpoints(range: std::ops::Range) -> Vec { range .map(|client_id| ClientCheckpoint { client_id, - allowed: Vec::new(), required: Vec::new(), }) .collect() diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index e069dfc..c42a3a5 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -169,7 +169,6 @@ async fn run_inner( name: "owner-data-change", clients: vec![ClientCheckpoint { client_id: 3, - allowed: vec![owner_data_expected.clone()], required: vec![owner_data_expected], }], }; @@ -196,7 +195,6 @@ fn lamport_client_checkpoint( }; ClientCheckpoint { client_id, - allowed: vec![expected.clone()], required: vec![expected], } } diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index ee4a4a2..4155c65 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -91,7 +91,6 @@ async fn run_inner( let client_specs = (0..100) .map(|client_id| ClientCheckpoint { client_id, - allowed: expected_updates.clone(), required: expected_updates.clone(), }) .collect(); From 654597adf27a06210e9ae8c702888dc4414be846 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 12:52:43 +0700 Subject: [PATCH 20/68] fix(ix-tests): consume matched required entries incrementally The checkpoint runner re-evaluated every required entry against the full observation snapshot on each loop iteration. Required entries that had already been satisfied were re-checked against the same observations every tick, and a single observation could be counted toward multiple distinct required entries. Track per-client matching state across loop iterations so that: - each required entry, once matched, stays matched and is not re-evaluated; - each loop iteration only scans observations that arrived since the last poll; - each observation can only consume a single required entry. Replace the previous unmatched_required helper with a stateful consume_observations helper and update unit tests to cover the new incremental, consuming behavior. Amp-Thread-ID: https://ampcode.com/threads/T-019dd29f-8b80-76bf-8a1f-623f120781d8 Co-authored-by: Amp --- ix-tests/src/expectation.rs | 340 ++++++++++++++++++++++++------------ 1 file changed, 233 insertions(+), 107 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index bab91e4..a7b3bc7 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -54,73 +54,82 @@ impl ExpectedUpdate { pub fn matches(&self, observed: &ObservedUpdate) -> bool { let mut mismatches = Vec::new(); if let Some(expected) = &self.pubkey_b58 - && observed.pubkey_b58 != *expected { - mismatches.push(format!( - "pubkey_b58: expected {}, got {}", - expected, observed.pubkey_b58 - )); - } + && observed.pubkey_b58 != *expected + { + mismatches.push(format!( + "pubkey_b58: expected {}, got {}", + expected, observed.pubkey_b58 + )); + } if let Some(expected) = self.slot - && observed.slot != expected { - mismatches.push(format!( - "slot: expected {}, got {}", - expected, observed.slot - )); - } + && observed.slot != expected + { + mismatches.push(format!( + "slot: expected {}, got {}", + expected, observed.slot + )); + } if let Some(expected) = self.lamports - && observed.lamports != expected { - mismatches.push(format!( - "lamports: expected {}, got {}", - expected, observed.lamports - )); - } + && observed.lamports != expected + { + mismatches.push(format!( + "lamports: expected {}, got {}", + expected, observed.lamports + )); + } if let Some(expected) = &self.owner_b58 - && observed.owner_b58 != *expected { - mismatches.push(format!( - "owner_b58: expected {}, got {}", - expected, observed.owner_b58 - )); - } + && observed.owner_b58 != *expected + { + mismatches.push(format!( + "owner_b58: expected {}, got {}", + expected, observed.owner_b58 + )); + } if let Some(expected) = self.executable - && observed.executable != expected { - mismatches.push(format!( - "executable: expected {}, got {}", - expected, observed.executable - )); - } + && observed.executable != expected + { + mismatches.push(format!( + "executable: expected {}, got {}", + expected, observed.executable + )); + } if let Some(expected) = self.rent_epoch - && observed.rent_epoch != expected { - mismatches.push(format!( - "rent_epoch: expected {}, got {}", - expected, observed.rent_epoch - )); - } + && observed.rent_epoch != expected + { + mismatches.push(format!( + "rent_epoch: expected {}, got {}", + expected, observed.rent_epoch + )); + } if let Some(expected) = self.write_version - && observed.write_version != expected { - mismatches.push(format!( - "write_version: expected {}, got {}", - expected, observed.write_version - )); - } + && observed.write_version != expected + { + mismatches.push(format!( + "write_version: expected {}, got {}", + expected, observed.write_version + )); + } if let Some(expected) = &self.txn_signature_b58 - && observed.txn_signature_b58.as_ref() != expected.as_ref() { - mismatches.push(format!( - "txn_signature_b58: expected {:?}, got {:?}", - expected, observed.txn_signature_b58 - )); - } + && observed.txn_signature_b58.as_ref() != expected.as_ref() + { + mismatches.push(format!( + "txn_signature_b58: expected {:?}, got {:?}", + expected, observed.txn_signature_b58 + )); + } if let Some(expected) = &self.data - && observed.data != *expected { - mismatches.push(format!( - "data: expected {:?}, got {:?}", - expected, observed.data - )); - } + && observed.data != *expected + { + mismatches.push(format!( + "data: expected {:?}, got {:?}", + expected, observed.data + )); + } if !mismatches.is_empty() { warn!("Mismatches:\n {}", mismatches.join("\n ")); @@ -129,16 +138,46 @@ impl ExpectedUpdate { } } -fn unmatched_required<'a>( - required: &'a [ExpectedUpdate], - observed: &[ObservedUpdate], -) -> Vec<&'a ExpectedUpdate> { - required - .iter() - .filter(|expected| { - !observed.iter().any(|update| expected.matches(update)) - }) - .collect() +/// Per-client state tracked across loop iterations within a single +/// `wait_until_satisfied` call. We consume each required exactly once +/// (so subsequent iterations only compare unmatched required against +/// newly-arrived observations) and we track how far into the log we +/// have already scanned so we never re-examine an observation. +struct ClientMatchState { + /// Whether each required entry (by index) has already been matched. + matched: Vec, + /// Index into `client.log()` from which we still need to scan. + /// Starts at `cursor.next_index` for the checkpoint. + scan_cursor: usize, +} + +impl ClientMatchState { + fn all_matched(&self) -> bool { + self.matched.iter().all(|matched| *matched) + } +} + +/// Try to consume each new observation against any still-unmatched +/// required entry. Each observation may satisfy at most one required +/// entry, and once a required has been matched it stays matched for +/// the remainder of the checkpoint (so subsequent observations are +/// only compared against the still-unmatched required entries). +fn consume_observations( + required: &[ExpectedUpdate], + matched: &mut [bool], + observations: &[ObservedUpdate], +) { + for observation in observations { + if matched.iter().all(|m| *m) { + return; + } + for (req_idx, expected) in required.iter().enumerate() { + if !matched[req_idx] && expected.matches(observation) { + matched[req_idx] = true; + break; + } + } + } } #[allow(dead_code)] @@ -157,10 +196,30 @@ impl CheckpointRunner { ) -> anyhow::Result<()> { let deadline = Instant::now() + self.timeout; - loop { - let mut all_required_seen = true; + // Initialize per-client matching state seeded from the + // existing cursors so we only consider observations newer than + // anything a previous checkpoint already advanced past. + let mut states: Vec = spec + .clients + .iter() + .map(|client_spec| { + let scan_cursor = cursors + .iter() + .find(|cursor| cursor.client_id == client_spec.client_id) + .map(|cursor| cursor.next_index) + .unwrap_or(0); + ClientMatchState { + matched: vec![false; client_spec.required.len()], + scan_cursor, + } + }) + .collect(); - for client_spec in &spec.clients { + loop { + // For each client, fetch only the observations that have + // arrived since we last scanned and try to consume each one + // against any still-unmatched required entry. + for (idx, client_spec) in spec.clients.iter().enumerate() { let client = clients .iter() .find(|client| client.id == client_spec.client_id) @@ -170,33 +229,43 @@ impl CheckpointRunner { spec.name, client_spec.client_id ) })?; - let cursor = cursors - .iter() - .find(|cursor| cursor.client_id == client_spec.client_id) - .with_context(|| { - format!( - "checkpoint '{}' is missing cursor for client {}", - spec.name, client_spec.client_id - ) - })?; - let observed = client.log().snapshot_from(cursor.next_index); - - if !unmatched_required(&client_spec.required, &observed) - .is_empty() - { - all_required_seen = false; - } + let state = &mut states[idx]; + let new_observations = + client.log().snapshot_from(state.scan_cursor); + state.scan_cursor += new_observations.len(); + + consume_observations( + &client_spec.required, + &mut state.matched, + &new_observations, + ); } - if all_required_seen { - for cursor in cursors.iter_mut() { + let all_satisfied = states.iter().all(|state| state.all_matched()); + + if all_satisfied { + // Advance public cursors for clients participating in + // this checkpoint to the end of their log so the next + // checkpoint starts from fresh observations only. + for client_spec in &spec.clients { + let cursor = cursors + .iter_mut() + .find(|cursor| { + cursor.client_id == client_spec.client_id + }) + .with_context(|| { + format!( + "checkpoint '{}' is missing cursor for client {}", + spec.name, client_spec.client_id + ) + })?; let client = clients .iter() - .find(|client| client.id == cursor.client_id) + .find(|client| client.id == client_spec.client_id) .with_context(|| { format!( "checkpoint '{}' cannot advance missing client {}", - spec.name, cursor.client_id + spec.name, client_spec.client_id ) })?; cursor.next_index = client.log().len(); @@ -206,25 +275,33 @@ impl CheckpointRunner { if Instant::now() >= deadline { // Build a useful diagnostic per client and bail. - for client_spec in &spec.clients { + for (idx, client_spec) in spec.clients.iter().enumerate() { + let state = &states[idx]; + if state.all_matched() { + continue; + } + let missing: Vec<&ExpectedUpdate> = client_spec + .required + .iter() + .enumerate() + .filter(|(i, _)| !state.matched[*i]) + .map(|(_, expected)| expected) + .collect(); let Some(client) = clients .iter() .find(|client| client.id == client_spec.client_id) else { continue; }; - let Some(cursor) = cursors.iter().find(|cursor| { - cursor.client_id == client_spec.client_id - }) else { - continue; - }; + let observation_start = cursors + .iter() + .find(|cursor| { + cursor.client_id == client_spec.client_id + }) + .map(|cursor| cursor.next_index) + .unwrap_or(0); let observed = - client.log().snapshot_from(cursor.next_index); - let missing = - unmatched_required(&client_spec.required, &observed); - if missing.is_empty() { - continue; - } + client.log().snapshot_from(observation_start); error!( checkpoint = spec.name, client_id = client_spec.client_id, @@ -255,7 +332,7 @@ mod tests { use crate::layout::ServiceInstance; use crate::observation::ObservedUpdate; - use super::{ExpectedUpdate, unmatched_required}; + use super::{ExpectedUpdate, consume_observations}; fn observed_update() -> ObservedUpdate { ObservedUpdate { @@ -315,7 +392,7 @@ mod tests { } #[test] - fn unmatched_required_returns_empty_when_all_required_match_with_noise() { + fn consume_observations_marks_required_as_matched_with_noise() { let noise = observed_with(0, None); let matching = observed_with(1_000_000, Some("real-sig".to_owned())); let observed = vec![noise, matching]; @@ -325,12 +402,15 @@ mod tests { txn_signature_b58: Some(Some("real-sig".to_owned())), ..Default::default() }]; + let mut matched = vec![false; required.len()]; + + consume_observations(&required, &mut matched, &observed); - assert!(unmatched_required(&required, &observed).is_empty()); + assert_eq!(matched, vec![true]); } #[test] - fn unmatched_required_reports_missing_when_required_never_arrives() { + fn consume_observations_leaves_required_unmatched_when_never_arrives() { let noise = observed_with(0, None); let observed = vec![noise]; @@ -338,10 +418,56 @@ mod tests { lamports: Some(1_000_000), ..Default::default() }]; + let mut matched = vec![false; required.len()]; + + consume_observations(&required, &mut matched, &observed); + + assert_eq!(matched, vec![false]); + } + + #[test] + fn consume_observations_consumes_each_observation_at_most_once() { + // Two near-identical required entries; a single observation + // matching both should only consume one of them. + let observed = + vec![observed_with(1_000_000, Some("real-sig".to_owned()))]; + + let required = vec![ + ExpectedUpdate { + lamports: Some(1_000_000), + ..Default::default() + }, + ExpectedUpdate { + lamports: Some(1_000_000), + ..Default::default() + }, + ]; + let mut matched = vec![false; required.len()]; + + consume_observations(&required, &mut matched, &observed); + + assert_eq!(matched, vec![true, false]); + } + + #[test] + fn consume_observations_is_incremental_across_calls() { + // Simulate two iterations of the wait loop: first only noise + // arrives, then the matching observation arrives. The second + // call only sees the new observation and still satisfies the + // required entry. + let required = vec![ExpectedUpdate { + lamports: Some(1_000_000), + ..Default::default() + }]; + let mut matched = vec![false; required.len()]; + + let first_batch = vec![observed_with(0, None)]; + consume_observations(&required, &mut matched, &first_batch); + assert_eq!(matched, vec![false]); - let missing = unmatched_required(&required, &observed); - assert!(!missing.is_empty()); - assert_eq!(missing.len(), 1); - assert_eq!(missing[0].lamports, Some(1_000_000)); + let second_batch = + vec![observed_with(1_000_000, Some("real-sig".to_owned()))]; + consume_observations(&required, &mut matched, &second_batch); + assert_eq!(matched, vec![true]); } } From da78bbe72f8ccba3ecd7fb23697394b115b14016 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 14:37:50 +0700 Subject: [PATCH 21/68] chore: simplify rust --- geyser-plugin/src/account_update_publisher.rs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/geyser-plugin/src/account_update_publisher.rs b/geyser-plugin/src/account_update_publisher.rs index d3a0fc4..a5d5fb2 100644 --- a/geyser-plugin/src/account_update_publisher.rs +++ b/geyser-plugin/src/account_update_publisher.rs @@ -131,14 +131,14 @@ fn should_publish_subscribed_account( } fn log_ignore_account_update(pubkey: &[u8]) { - if log_enabled!(::log::Level::Trace) { - if let Ok(key) = <&[u8; 32]>::try_from(pubkey) { - trace!( - "Ignoring update for account key: {:?}", - Pubkey::new_from_array(*key) - ); - return; - } + if log_enabled!(::log::Level::Trace) + && let Ok(key) = <&[u8; 32]>::try_from(pubkey) + { + trace!( + "Ignoring update for account key: {:?}", + Pubkey::new_from_array(*key) + ); + return; } if log_enabled!(::log::Level::Trace) { trace!("Ignoring update for account key bytes: {:?}", pubkey); @@ -148,8 +148,8 @@ fn log_ignore_account_update(pubkey: &[u8]) { #[cfg(test)] mod tests { use super::{ - should_publish_backfill_account, should_publish_confirmed_account, - AccountUpdatePublishOutcome, + AccountUpdatePublishOutcome, should_publish_backfill_account, + should_publish_confirmed_account, }; use crate::{ server::subscriptions::AccountSubscriptions, wire::UpdateAccountEvent, From da71800d114f5cc6ae188ad63741c8fba7bd5802 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 14:38:17 +0700 Subject: [PATCH 22/68] chore: minor fixes in ix-tests --- ix-tests/src/client.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ix-tests/src/client.rs b/ix-tests/src/client.rs index 0196acf..b045514 100644 --- a/ix-tests/src/client.rs +++ b/ix-tests/src/client.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use tracing::*; use std::time::{SystemTime, UNIX_EPOCH}; +use tracing::*; use anyhow::Context; use helius_laserstream::grpc::geyser_client::GeyserClient; @@ -66,7 +66,8 @@ impl TestGrpcClient { "failed to parse pubkey" ) }) - .unwrap().to_string() + .unwrap() + .to_string() } fn txn_signature_str(bytes: &[u8]) -> String { if bytes.is_empty() { @@ -80,7 +81,8 @@ impl TestGrpcClient { "failed to parse txn signature" ) }) - .unwrap().to_string() + .unwrap() + .to_string() } let receive_task = tokio::spawn(async move { From 554d5a8b53359206c28556b63cf3a77242d482bf Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 15:06:55 +0700 Subject: [PATCH 23/68] chore: simplify checkpoint asserts --- ix-tests/src/expectation.rs | 328 ++++------------------ ix-tests/src/observation.rs | 10 + ix-tests/src/scenarios/dual_concurrent.rs | 31 +- ix-tests/src/scenarios/dual_restart.rs | 48 +--- ix-tests/src/scenarios/single_basic.rs | 25 +- ix-tests/src/scenarios/single_load.rs | 17 +- 6 files changed, 92 insertions(+), 367 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index a7b3bc7..99b0700 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -33,14 +33,7 @@ pub struct ClientCheckpoint { #[allow(dead_code)] pub struct CheckpointSpec { pub name: &'static str, - pub clients: Vec, -} - -#[derive(Clone, Debug, Default)] -#[allow(dead_code)] -pub struct ClientCursor { - pub client_id: usize, - pub next_index: usize, + pub checkpoints: Vec, } #[derive(Clone, Debug)] @@ -138,47 +131,6 @@ impl ExpectedUpdate { } } -/// Per-client state tracked across loop iterations within a single -/// `wait_until_satisfied` call. We consume each required exactly once -/// (so subsequent iterations only compare unmatched required against -/// newly-arrived observations) and we track how far into the log we -/// have already scanned so we never re-examine an observation. -struct ClientMatchState { - /// Whether each required entry (by index) has already been matched. - matched: Vec, - /// Index into `client.log()` from which we still need to scan. - /// Starts at `cursor.next_index` for the checkpoint. - scan_cursor: usize, -} - -impl ClientMatchState { - fn all_matched(&self) -> bool { - self.matched.iter().all(|matched| *matched) - } -} - -/// Try to consume each new observation against any still-unmatched -/// required entry. Each observation may satisfy at most one required -/// entry, and once a required has been matched it stays matched for -/// the remainder of the checkpoint (so subsequent observations are -/// only compared against the still-unmatched required entries). -fn consume_observations( - required: &[ExpectedUpdate], - matched: &mut [bool], - observations: &[ObservedUpdate], -) { - for observation in observations { - if matched.iter().all(|m| *m) { - return; - } - for (req_idx, expected) in required.iter().enumerate() { - if !matched[req_idx] && expected.matches(observation) { - matched[req_idx] = true; - break; - } - } - } -} #[allow(dead_code)] impl CheckpointRunner { @@ -192,138 +144,64 @@ impl CheckpointRunner { &self, spec: &CheckpointSpec, clients: &[TestGrpcClient], - cursors: &mut [ClientCursor], ) -> anyhow::Result<()> { let deadline = Instant::now() + self.timeout; - // Initialize per-client matching state seeded from the - // existing cursors so we only consider observations newer than - // anything a previous checkpoint already advanced past. - let mut states: Vec = spec - .clients - .iter() - .map(|client_spec| { - let scan_cursor = cursors - .iter() - .find(|cursor| cursor.client_id == client_spec.client_id) - .map(|cursor| cursor.next_index) - .unwrap_or(0); - ClientMatchState { - matched: vec![false; client_spec.required.len()], - scan_cursor, - } - }) - .collect(); - - loop { - // For each client, fetch only the observations that have - // arrived since we last scanned and try to consume each one - // against any still-unmatched required entry. - for (idx, client_spec) in spec.clients.iter().enumerate() { - let client = clients - .iter() - .find(|client| client.id == client_spec.client_id) - .with_context(|| { - format!( - "checkpoint '{}' references unknown client {}", - spec.name, client_spec.client_id - ) - })?; - let state = &mut states[idx]; - let new_observations = - client.log().snapshot_from(state.scan_cursor); - state.scan_cursor += new_observations.len(); - - consume_observations( - &client_spec.required, - &mut state.matched, - &new_observations, - ); - } - - let all_satisfied = states.iter().all(|state| state.all_matched()); - - if all_satisfied { - // Advance public cursors for clients participating in - // this checkpoint to the end of their log so the next - // checkpoint starts from fresh observations only. - for client_spec in &spec.clients { - let cursor = cursors - .iter_mut() - .find(|cursor| { - cursor.client_id == client_spec.client_id - }) - .with_context(|| { - format!( - "checkpoint '{}' is missing cursor for client {}", - spec.name, client_spec.client_id - ) - })?; - let client = clients - .iter() - .find(|client| client.id == client_spec.client_id) - .with_context(|| { - format!( - "checkpoint '{}' cannot advance missing client {}", - spec.name, client_spec.client_id - ) - })?; - cursor.next_index = client.log().len(); - } - return Ok(()); - } - - if Instant::now() >= deadline { - // Build a useful diagnostic per client and bail. - for (idx, client_spec) in spec.clients.iter().enumerate() { - let state = &states[idx]; - if state.all_matched() { - continue; + // For each spec we take the next (in order of arrival) state from + // the matching client and compare them + for check_point in &spec.checkpoints { + let client = clients + .iter() + .find(|client| client.id == check_point.client_id) + .with_context(|| { + format!( + "checkpoint '{}' references unknown client {}", + spec.name, check_point.client_id + ) + })?; + + for (idx, expected) in check_point.required.iter().enumerate() { + 'retry: loop { + let client_state = client.log().consume_next_update(); + if let Some(observed) = client_state { + if expected.matches(&observed) { + trace!( + checkpoint = spec.name, + idx, + client_id = check_point.client_id, + "matched expected update: {:#?}", + expected + ); + break 'retry; + } else { + error!( + checkpoint = spec.name, + idx, + client_id = check_point.client_id, + "expected update did not match observed update.\nExpected: {:#?}\nObserved: {:#?}", + expected, + observed + ); + bail!( + "checkpoint '{}' idx: {} failed for client {}", + spec.name, + idx, + check_point.client_id + ); + } + } else if Instant::now() > deadline { + bail!( + "checkpoint '{}' idx: {} timed out waiting for client {}", + spec.name, + idx, + check_point.client_id + ); } - let missing: Vec<&ExpectedUpdate> = client_spec - .required - .iter() - .enumerate() - .filter(|(i, _)| !state.matched[*i]) - .map(|(_, expected)| expected) - .collect(); - let Some(client) = clients - .iter() - .find(|client| client.id == client_spec.client_id) - else { - continue; - }; - let observation_start = cursors - .iter() - .find(|cursor| { - cursor.client_id == client_spec.client_id - }) - .map(|cursor| cursor.next_index) - .unwrap_or(0); - let observed = - client.log().snapshot_from(observation_start); - error!( - checkpoint = spec.name, - client_id = client_spec.client_id, - "Missing required: {:#?}", - missing - ); - error!( - checkpoint = spec.name, - client_id = client_spec.client_id, - "Observed in window: {:#?}", - observed - ); + sleep(Duration::from_millis(50)).await; } - bail!( - "checkpoint '{}' timed out after {:?}", - spec.name, - self.timeout - ); } - - sleep(Duration::from_millis(50)).await; } + Ok(()) } } @@ -332,7 +210,7 @@ mod tests { use crate::layout::ServiceInstance; use crate::observation::ObservedUpdate; - use super::{ExpectedUpdate, consume_observations}; + use super::{ExpectedUpdate}; fn observed_update() -> ObservedUpdate { ObservedUpdate { @@ -351,26 +229,6 @@ mod tests { } } - fn observed_with( - lamports: u64, - txn_signature_b58: Option, - ) -> ObservedUpdate { - ObservedUpdate { - client_id: 7, - service: ServiceInstance::One, - pubkey_b58: "pubkey".to_owned(), - slot: 42, - lamports, - owner_b58: "owner".to_owned(), - executable: false, - rent_epoch: 5, - write_version: 6, - txn_signature_b58, - data: vec![1, 2, 3], - received_at_millis: 123, - } - } - #[test] fn matches_ignores_none_fields() { let expected = ExpectedUpdate { @@ -390,84 +248,4 @@ mod tests { assert!(!expected.matches(&observed_update())); } - - #[test] - fn consume_observations_marks_required_as_matched_with_noise() { - let noise = observed_with(0, None); - let matching = observed_with(1_000_000, Some("real-sig".to_owned())); - let observed = vec![noise, matching]; - - let required = vec![ExpectedUpdate { - lamports: Some(1_000_000), - txn_signature_b58: Some(Some("real-sig".to_owned())), - ..Default::default() - }]; - let mut matched = vec![false; required.len()]; - - consume_observations(&required, &mut matched, &observed); - - assert_eq!(matched, vec![true]); - } - - #[test] - fn consume_observations_leaves_required_unmatched_when_never_arrives() { - let noise = observed_with(0, None); - let observed = vec![noise]; - - let required = vec![ExpectedUpdate { - lamports: Some(1_000_000), - ..Default::default() - }]; - let mut matched = vec![false; required.len()]; - - consume_observations(&required, &mut matched, &observed); - - assert_eq!(matched, vec![false]); - } - - #[test] - fn consume_observations_consumes_each_observation_at_most_once() { - // Two near-identical required entries; a single observation - // matching both should only consume one of them. - let observed = - vec![observed_with(1_000_000, Some("real-sig".to_owned()))]; - - let required = vec![ - ExpectedUpdate { - lamports: Some(1_000_000), - ..Default::default() - }, - ExpectedUpdate { - lamports: Some(1_000_000), - ..Default::default() - }, - ]; - let mut matched = vec![false; required.len()]; - - consume_observations(&required, &mut matched, &observed); - - assert_eq!(matched, vec![true, false]); - } - - #[test] - fn consume_observations_is_incremental_across_calls() { - // Simulate two iterations of the wait loop: first only noise - // arrives, then the matching observation arrives. The second - // call only sees the new observation and still satisfies the - // required entry. - let required = vec![ExpectedUpdate { - lamports: Some(1_000_000), - ..Default::default() - }]; - let mut matched = vec![false; required.len()]; - - let first_batch = vec![observed_with(0, None)]; - consume_observations(&required, &mut matched, &first_batch); - assert_eq!(matched, vec![false]); - - let second_batch = - vec![observed_with(1_000_000, Some("real-sig".to_owned()))]; - consume_observations(&required, &mut matched, &second_batch); - assert_eq!(matched, vec![true]); - } } diff --git a/ix-tests/src/observation.rs b/ix-tests/src/observation.rs index 6f2e0cd..f8da392 100644 --- a/ix-tests/src/observation.rs +++ b/ix-tests/src/observation.rs @@ -47,6 +47,16 @@ impl ClientLog { guard[start_index..].to_vec() } + /// Takes the next update in the order it came in + /// Removes and returns it from the log + pub fn consume_next_update(&self) -> Option { + if self.entries.lock().unwrap().is_empty() { + None + } else { + Some(self.entries.lock().unwrap().remove(0)) + } + } + pub fn len(&self) -> usize { self.entries.lock().unwrap().len() } diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs index c9d3b95..a9d75da 100644 --- a/ix-tests/src/scenarios/dual_concurrent.rs +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -3,9 +3,7 @@ use anyhow::Context; use crate::accounts::NamedAccount; use crate::client::TestGrpcClient; use crate::context::ScenarioContext; -use crate::expectation::{ - CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, -}; +use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; @@ -15,17 +13,9 @@ pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); let mut services = Vec::new(); let mut clients = Vec::new(); - let mut cursors = Vec::new(); - - let result = run_inner( - ctx, - &spec_one, - &spec_two, - &mut services, - &mut clients, - &mut cursors, - ) - .await; + + let result = + run_inner(ctx, &spec_one, &spec_two, &mut services, &mut clients).await; if let Err(error) = result { return Err(ScenarioFailure { error, clients }); } @@ -45,7 +35,6 @@ async fn run_inner( spec_two: &ServiceSpec, services: &mut Vec, clients: &mut Vec, - cursors: &mut Vec, ) -> anyhow::Result<()> { services.push( ctx.service_controller @@ -79,10 +68,6 @@ async fn run_inner( }; client.replace_subscription(&subscription).await?; clients.push(client); - cursors.push(ClientCursor { - client_id: id, - next_index: 0, - }); } for id in 10..20 { @@ -102,10 +87,6 @@ async fn run_inner( }; client.replace_subscription(&subscription).await?; clients.push(client); - cursors.push(ClientCursor { - client_id: id, - next_index: 0, - }); } ctx.validator.fund_payer().await?; @@ -170,10 +151,10 @@ async fn run_inner( let checkpoint = CheckpointSpec { name: "dual-concurrent-routing", - clients: checkpoint_clients, + checkpoints: checkpoint_clients, }; ctx.checkpoint_runner - .wait_until_satisfied(&checkpoint, clients, cursors) + .wait_until_satisfied(&checkpoint, clients) .await } diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index d3c123f..0f2f785 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -1,11 +1,9 @@ -use anyhow::{Context, bail}; +use anyhow::{bail, Context}; use crate::accounts::{NamedAccount, ScenarioAccounts}; use crate::client::TestGrpcClient; use crate::context::ScenarioContext; -use crate::expectation::{ - CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, -}; +use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::observation::ClientLog; use crate::scenarios::ScenarioFailure; @@ -27,7 +25,6 @@ pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { .map_err(scenario_failure_without_clients)?, ); let mut active_clients = Vec::new(); - let mut cursors = Vec::new(); let result = run_inner( ctx, @@ -35,7 +32,6 @@ pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { &spec_two, &mut service_one, &mut active_clients, - &mut cursors, ) .await; if let Err(error) = result { @@ -63,19 +59,16 @@ async fn run_inner( spec_two: &ServiceSpec, service_one: &mut Option, active_clients: &mut Vec, - cursors: &mut Vec, ) -> anyhow::Result<()> { connect_service_one_clients( &ctx.accounts, active_clients, - cursors, &spec_one.endpoint, ) .await?; connect_service_two_clients( &ctx.accounts, active_clients, - cursors, &spec_two.endpoint, ) .await?; @@ -97,7 +90,7 @@ async fn run_inner( let pre_restart = CheckpointSpec { name: "pre-restart", - clients: vec![ + checkpoints: vec![ repeated_checkpoint( 0..5, expected_update( @@ -136,11 +129,10 @@ async fn run_inner( .collect(), }; ctx.checkpoint_runner - .wait_until_satisfied(&pre_restart, active_clients, cursors) + .wait_until_satisfied(&pre_restart, active_clients) .await?; - let parked_logs = - shutdown_service_one_clients(active_clients, cursors).await?; + let parked_logs = shutdown_service_one_clients(active_clients).await?; shutdown_service(&ctx.service_controller, service_one).await?; ctx.validator @@ -155,7 +147,7 @@ async fn run_inner( let during_restart = CheckpointSpec { name: "during-restart", - clients: vec![ + checkpoints: vec![ empty_checkpoints(10..15), repeated_checkpoint( 15..20, @@ -171,7 +163,7 @@ async fn run_inner( .collect(), }; ctx.checkpoint_runner - .wait_until_satisfied(&during_restart, active_clients, cursors) + .wait_until_satisfied(&during_restart, active_clients) .await?; *service_one = Some( @@ -182,7 +174,6 @@ async fn run_inner( connect_service_one_clients( &ctx.accounts, active_clients, - cursors, &spec_one.endpoint, ) .await?; @@ -198,7 +189,7 @@ async fn run_inner( let post_restart = CheckpointSpec { name: "post-restart", - clients: vec![ + checkpoints: vec![ repeated_checkpoint( 0..5, expected_update( @@ -230,14 +221,13 @@ async fn run_inner( .collect(), }; ctx.checkpoint_runner - .wait_until_satisfied(&post_restart, active_clients, cursors) + .wait_until_satisfied(&post_restart, active_clients) .await } async fn connect_service_one_clients( accounts: &ScenarioAccounts, active_clients: &mut Vec, - cursors: &mut Vec, endpoint: &str, ) -> anyhow::Result<()> { for id in 0..10 { @@ -260,7 +250,7 @@ async fn connect_service_one_clients( .map(|account| accounts.pubkey_b58(account)) .collect::>(); client.replace_subscription(&pubkeys).await?; - upsert_client(active_clients, cursors, client); + upsert_client(active_clients, client); } Ok(()) } @@ -268,7 +258,6 @@ async fn connect_service_one_clients( async fn connect_service_two_clients( accounts: &ScenarioAccounts, active_clients: &mut Vec, - cursors: &mut Vec, endpoint: &str, ) -> anyhow::Result<()> { for id in 10..20 { @@ -291,14 +280,13 @@ async fn connect_service_two_clients( .map(|account| accounts.pubkey_b58(account)) .collect::>(); client.replace_subscription(&pubkeys).await?; - upsert_client(active_clients, cursors, client); + upsert_client(active_clients, client); } Ok(()) } fn upsert_client( active_clients: &mut Vec, - cursors: &mut Vec, client: TestGrpcClient, ) { let client_id = client.id; @@ -309,23 +297,10 @@ fn upsert_client( } else { active_clients.push(client); } - - if let Some(cursor) = cursors - .iter_mut() - .find(|cursor| cursor.client_id == client_id) - { - cursor.next_index = 0; - } else { - cursors.push(ClientCursor { - client_id, - next_index: 0, - }); - } } async fn shutdown_service_one_clients( active_clients: &mut Vec, - cursors: &mut Vec, ) -> anyhow::Result> { let mut parked = Vec::new(); let mut remaining = Vec::new(); @@ -346,7 +321,6 @@ async fn shutdown_service_one_clients( } *active_clients = remaining; - cursors.retain(|cursor| cursor.client_id >= 10); Ok(parked) } diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index c42a3a5..375a1ee 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -5,9 +5,7 @@ use tracing::debug; use crate::accounts::NamedAccount; use crate::client::TestGrpcClient; use crate::context::ScenarioContext; -use crate::expectation::{ - CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, -}; +use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; @@ -28,10 +26,8 @@ pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { .map_err(scenario_failure_without_clients)?, ); let mut clients = Vec::new(); - let mut cursors = Vec::new(); - let result = - run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + let result = run_inner(ctx, &spec.endpoint, &mut clients).await; if let Err(error) = result { return Err(ScenarioFailure { error, clients }); } @@ -49,7 +45,6 @@ async fn run_inner( ctx: &ScenarioContext, endpoint: &str, clients: &mut Vec, - cursors: &mut Vec, ) -> anyhow::Result<()> { for id in 0..4 { let client = TestGrpcClient::connect( @@ -59,10 +54,6 @@ async fn run_inner( ) .await .with_context(|| format!("failed to connect client {id}"))?; - cursors.push(ClientCursor { - client_id: id, - next_index: 0, - }); clients.push(client); } @@ -94,7 +85,7 @@ async fn run_inner( // each account let empty_checkpoint = CheckpointSpec { name: "initial-empty-accounts", - clients: vec![ + checkpoints: vec![ lamport_client_checkpoint(0, simple_a.to_string(), 0, None), lamport_client_checkpoint(1, simple_b.to_string(), 0, None), lamport_client_checkpoint(2, simple_c.to_string(), 0, None), @@ -102,7 +93,7 @@ async fn run_inner( ], }; ctx.checkpoint_runner - .wait_until_satisfied(&empty_checkpoint, clients, cursors) + .wait_until_satisfied(&empty_checkpoint, clients) .await?; debug!("✅ initial empty accounts"); @@ -117,7 +108,7 @@ async fn run_inner( let basic_checkpoint = CheckpointSpec { name: "basic-lamports", - clients: vec![ + checkpoints: vec![ lamport_client_checkpoint( 0, simple_a.to_string(), @@ -139,7 +130,7 @@ async fn run_inner( ], }; ctx.checkpoint_runner - .wait_until_satisfied(&basic_checkpoint, clients, cursors) + .wait_until_satisfied(&basic_checkpoint, clients) .await?; debug!("✅ basic lamports updates"); @@ -167,13 +158,13 @@ async fn run_inner( }; let owner_data_checkpoint = CheckpointSpec { name: "owner-data-change", - clients: vec![ClientCheckpoint { + checkpoints: vec![ClientCheckpoint { client_id: 3, required: vec![owner_data_expected], }], }; ctx.checkpoint_runner - .wait_until_satisfied(&owner_data_checkpoint, clients, cursors) + .wait_until_satisfied(&owner_data_checkpoint, clients) .await?; debug!("✅ owner and data updates"); diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index 4155c65..fb74191 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -3,9 +3,7 @@ use anyhow::Context; use crate::accounts::NamedAccount; use crate::client::TestGrpcClient; use crate::context::ScenarioContext; -use crate::expectation::{ - CheckpointSpec, ClientCheckpoint, ClientCursor, ExpectedUpdate, -}; +use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; use crate::service::{ManagedService, ServiceSpec}; @@ -19,10 +17,8 @@ pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { .map_err(scenario_failure_without_clients)?, ); let mut clients = Vec::new(); - let mut cursors = Vec::new(); - let result = - run_inner(ctx, &spec.endpoint, &mut clients, &mut cursors).await; + let result = run_inner(ctx, &spec.endpoint, &mut clients).await; if let Err(error) = result { return Err(ScenarioFailure { error, clients }); } @@ -40,7 +36,6 @@ async fn run_inner( ctx: &ScenarioContext, endpoint: &str, clients: &mut Vec, - cursors: &mut Vec, ) -> anyhow::Result<()> { let shared_a = ctx.accounts.pubkey_b58(NamedAccount::SharedA); let shared_b = ctx.accounts.pubkey_b58(NamedAccount::SharedB); @@ -59,10 +54,6 @@ async fn run_inner( .with_context(|| { format!("failed to set subscriptions for client {id}") })?; - cursors.push(ClientCursor { - client_id: id, - next_index: 0, - }); clients.push(client); } @@ -96,10 +87,10 @@ async fn run_inner( .collect(); let checkpoint = CheckpointSpec { name: "single-load-fanout", - clients: client_specs, + checkpoints: client_specs, }; ctx.checkpoint_runner - .wait_until_satisfied(&checkpoint, clients, cursors) + .wait_until_satisfied(&checkpoint, clients) .await } From 3df81681d0c4bacf5f8d724b3eb101eb26b16392 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 15:29:37 +0700 Subject: [PATCH 24/68] chore: task to reset kafka/ksql state --- kafka-setup/Makefile | 6 +- kafka-setup/README.md | 6 ++ .../01_recreate-account-updates-topic.sh | 61 +++++++++++++++++++ .../sh/ksql/03_reset-accounts-state.sh | 55 +++++++++++++++++ kafka-setup/sh/reset-state.sh | 21 +++++++ 5 files changed, 148 insertions(+), 1 deletion(-) create mode 100755 kafka-setup/sh/kafka/01_recreate-account-updates-topic.sh create mode 100755 kafka-setup/sh/ksql/03_reset-accounts-state.sh create mode 100755 kafka-setup/sh/reset-state.sh diff --git a/kafka-setup/Makefile b/kafka-setup/Makefile index 2c2bceb..d1c74b0 100644 --- a/kafka-setup/Makefile +++ b/kafka-setup/Makefile @@ -1,6 +1,6 @@ DC := $(shell if command -v docker-compose >/dev/null 2>&1; then echo docker-compose; else echo docker compose; fi) -.PHONY: help up down setup-stream create-table register-schema ready ui ui-down +.PHONY: help up down setup-stream create-table register-schema ready reset-state ui ui-down help: @echo "Available targets:" @@ -10,6 +10,7 @@ help: @echo " create-table - Create the accounts table (latest update per pubkey)" @echo " register-schema - Register the protobuf schema for Redpanda Console" @echo " ready - Start stack, setup stream/table, and register schema" + @echo " reset-state - Rebuild Kafka + ksqlDB state for an already-running stack" @echo " ui - Start Redpanda Console UI (http://localhost:8080)" @echo " ui-down - Stop Redpanda Console" @@ -30,6 +31,9 @@ create-table: ready: up setup-stream create-table register-schema +reset-state: + sh/reset-state.sh + ui: $(DC) up -d redpanda-console diff --git a/kafka-setup/README.md b/kafka-setup/README.md index 257140a..de7ae32 100644 --- a/kafka-setup/README.md +++ b/kafka-setup/README.md @@ -6,6 +6,12 @@ Available workflows: - `make up` - `make ready` +- `make reset-state` - `make down` - `make ui` - `make ui-down` + +`make reset-state` is the narrower option for an already-running +environment. It rebuilds the Kafka topic and the dependent ksqlDB +state without restarting Docker or re-running the broader `make ready` +workflow. diff --git a/kafka-setup/sh/kafka/01_recreate-account-updates-topic.sh b/kafka-setup/sh/kafka/01_recreate-account-updates-topic.sh new file mode 100755 index 0000000..635f02e --- /dev/null +++ b/kafka-setup/sh/kafka/01_recreate-account-updates-topic.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Delete and recreate the Kafka source topic used by the account updates flow. + +TOPIC="${TOPIC:-solana.testnet.account_updates}" +PARTITIONS="${PARTITIONS:-1}" +REPLICATION_FACTOR="${REPLICATION_FACTOR:-1}" +BOOTSTRAP_SERVER="${BOOTSTRAP_SERVER:-kafka:9092}" + +if command -v docker-compose >/dev/null 2>&1; then + DC="docker-compose" +else + DC="docker compose" +fi + +echo "Using compose command: $DC" +echo "Checking Kafka readiness..." +for i in $(seq 1 60); do + if $DC exec -T kafka kafka-topics --bootstrap-server "$BOOTSTRAP_SERVER" --list >/dev/null 2>&1; then + break + fi + sleep 1 + if [[ $i -eq 60 ]]; then + echo "Kafka not ready after 60 seconds" >&2 + exit 1 + fi +done +echo "Kafka is ready." + +if $DC exec -T kafka kafka-topics --bootstrap-server "$BOOTSTRAP_SERVER" --list | grep -Fxq "$TOPIC"; then + echo "Deleting Kafka topic '$TOPIC'..." + $DC exec -T kafka kafka-topics \ + --bootstrap-server "$BOOTSTRAP_SERVER" \ + --delete \ + --topic "$TOPIC" + + for i in $(seq 1 60); do + if ! $DC exec -T kafka kafka-topics --bootstrap-server "$BOOTSTRAP_SERVER" --list | grep -Fxq "$TOPIC"; then + break + fi + sleep 1 + if [[ $i -eq 60 ]]; then + echo "Kafka topic '$TOPIC' was not deleted after 60 seconds" >&2 + exit 1 + fi + done +else + echo "Kafka topic '$TOPIC' does not exist; skipping delete." +fi + +echo "Recreating Kafka topic '$TOPIC'..." +$DC exec -T kafka kafka-topics \ + --bootstrap-server "$BOOTSTRAP_SERVER" \ + --create \ + --if-not-exists \ + --topic "$TOPIC" \ + --replication-factor "$REPLICATION_FACTOR" \ + --partitions "$PARTITIONS" + +echo "Done recreating Kafka topic '$TOPIC'." diff --git a/kafka-setup/sh/ksql/03_reset-accounts-state.sh b/kafka-setup/sh/ksql/03_reset-accounts-state.sh new file mode 100755 index 0000000..64e3498 --- /dev/null +++ b/kafka-setup/sh/ksql/03_reset-accounts-state.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Tear down the ksqlDB objects that derive account state so they can be rebuilt +# from a fresh source topic. + +STREAM="${STREAM:-account_updates_stream}" +TABLE="${TABLE:-accounts}" +KSQL_SERVER_URL="${KSQL_SERVER_URL:-http://ksqldb-server:8088}" + +if command -v docker-compose >/dev/null 2>&1; then + DC="docker-compose" +else + DC="docker compose" +fi + +echo "Using compose command: $DC" +echo "Waiting for ksqlDB server to be ready (via CLI)..." +for i in $(seq 1 60); do + if $DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e 'SHOW STREAMS;' >/dev/null 2>&1; then + break + fi + sleep 1 + if [[ $i -eq 60 ]]; then + echo "ksqlDB not ready after 60 seconds" >&2 + exit 1 + fi +done +echo "ksqlDB is ready." + +QUERIES_OUTPUT="$($DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "SHOW QUERIES;")" +if printf '%s\n' "$QUERIES_OUTPUT" | grep -q 'CTAS_' && printf '%s\n' "$QUERIES_OUTPUT" | grep -q 'ACCOUNTS'; then + echo "Terminating persistent ACCOUNTS query..." + $DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "TERMINATE CTAS_ACCOUNTS_1;" +else + echo "No persistent ACCOUNTS query found; skipping terminate." +fi + +sleep 2 + +TABLES_OUTPUT="$($DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "SHOW TABLES;")" +if printf '%s\n' "$TABLES_OUTPUT" | grep -q "ACCOUNTS"; then + echo "Dropping table '${TABLE}'..." + $DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "DROP TABLE ${TABLE} DELETE TOPIC;" +else + echo "Table '${TABLE}' does not exist; skipping drop." +fi + +STREAMS_OUTPUT="$($DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "SHOW STREAMS;")" +if printf '%s\n' "$STREAMS_OUTPUT" | grep -q "ACCOUNT_UPDATES_STREAM"; then + echo "Dropping stream '${STREAM}'..." + $DC run --rm ksqldb-cli ksql "${KSQL_SERVER_URL}" -e "DROP STREAM ${STREAM};" +else + echo "Stream '${STREAM}' does not exist; skipping drop." +fi diff --git a/kafka-setup/sh/reset-state.sh b/kafka-setup/sh/reset-state.sh new file mode 100755 index 0000000..4ea655d --- /dev/null +++ b/kafka-setup/sh/reset-state.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Rebuild Kafka topic state and the dependent ksqlDB stream/table state without +# restarting the Docker stack. + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +echo "Starting Kafka reset workflow..." + +echo "Resetting ksqlDB state..." +"$SCRIPT_DIR/ksql/03_reset-accounts-state.sh" + +echo "Resetting Kafka source topic..." +"$SCRIPT_DIR/kafka/01_recreate-account-updates-topic.sh" + +echo "Recreating ksqlDB stream and table..." +"$SCRIPT_DIR/ksql/01_setup-streams.sh" +"$SCRIPT_DIR/ksql/02_create-accounts-table.sh" + +echo "Done resetting Kafka and ksqlDB state." From 75fd402204f303b6ff62bdb6660ec3001ff0d30a Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 15:51:22 +0700 Subject: [PATCH 25/68] chore: shorter wait times --- ix-tests/configs/suite.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ix-tests/configs/suite.toml b/ix-tests/configs/suite.toml index 38e7d2b..f334906 100644 --- a/ix-tests/configs/suite.toml +++ b/ix-tests/configs/suite.toml @@ -2,5 +2,5 @@ service_binary = "target/debug/magigblock-grpc-service" validator_rpc_url = "http://127.0.0.1:8899" failure_artifact_root = "target/ix-tests/failures" service_start_timeout_ms = 10000 -checkpoint_timeout_ms = 20000 -transaction_timeout_ms = 20000 +checkpoint_timeout_ms = 2000 +transaction_timeout_ms = 2000 From e26f1990525590687ab8773a96a2db4d363ca94a Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 28 Apr 2026 15:51:44 +0700 Subject: [PATCH 26/68] chore: dumping logs on error --- ix-tests/src/expectation.rs | 3 +-- ix-tests/src/main.rs | 2 ++ ix-tests/src/scenarios/dual_restart.rs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index 99b0700..458d15c 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -131,7 +131,6 @@ impl ExpectedUpdate { } } - #[allow(dead_code)] impl CheckpointRunner { pub fn new(config: &SuiteConfig) -> Self { @@ -210,7 +209,7 @@ mod tests { use crate::layout::ServiceInstance; use crate::observation::ObservedUpdate; - use super::{ExpectedUpdate}; + use super::ExpectedUpdate; fn observed_update() -> ObservedUpdate { ObservedUpdate { diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index b43befb..a79734d 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -83,6 +83,8 @@ async fn main() -> anyhow::Result<()> { ctx.artifacts .write_client_updates(*scenario, &failure.clients)?; } + ctx.artifacts + .dump_service_logs(layout::ServiceInstance::One)?; ctx.artifacts.persist_failure()?; return Err(failure.error); } diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index 0f2f785..4fbdf56 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use crate::accounts::{NamedAccount, ScenarioAccounts}; use crate::client::TestGrpcClient; From 2210038d8e1aaee4247405814591a7b35ccfc318 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 14:07:23 +0700 Subject: [PATCH 27/68] chore: fix geyser plugin launch --- geyser-plugin/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/geyser-plugin/Makefile b/geyser-plugin/Makefile index 87ab5fc..4ccc6bd 100644 --- a/geyser-plugin/Makefile +++ b/geyser-plugin/Makefile @@ -1,5 +1,7 @@ .PHONY: help build build-plugin launch init-config clean +MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Linux) PLUGIN_EXT := so @@ -38,6 +40,7 @@ launch: build-plugin exit 1; \ fi; \ fi + cd "$(MAKEFILE_DIR)" && \ solana-test-validator --log --reset --geyser-plugin-config "$(VALIDATOR_CONFIG)" init-config: From d384b78edd5119f08899dc3b83f8bdde5b4e3805 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:00:46 +0700 Subject: [PATCH 28/68] feat: add rpc_url to ValidatorConfig Amp-Thread-ID: https://ampcode.com/threads/T-019de28c-6339-7789-b2b9-91574b2fb9dc Co-authored-by: Amp --- grpc-service/src/app.rs | 1 + grpc-service/src/config.rs | 9 +++++++++ ix-tests/configs/grpc-service/service-1.toml | 1 + ix-tests/configs/grpc-service/service-2.toml | 1 + 4 files changed, 12 insertions(+) diff --git a/grpc-service/src/app.rs b/grpc-service/src/app.rs index 5318204..695eccb 100644 --- a/grpc-service/src/app.rs +++ b/grpc-service/src/app.rs @@ -189,6 +189,7 @@ mod tests { validator: ValidatorConfig { accounts_filter_url: "http://localhost:3000/filters/accounts" .to_owned(), + rpc_url: "http://localhost:8899".to_owned(), }, grpc: GrpcConfig { bind_host: "127.0.0.1".to_owned(), diff --git a/grpc-service/src/config.rs b/grpc-service/src/config.rs index 8780252..a99d6fe 100644 --- a/grpc-service/src/config.rs +++ b/grpc-service/src/config.rs @@ -14,6 +14,7 @@ const DEFAULT_KSQL_URL: &str = "http://localhost:8088"; const DEFAULT_KSQL_TABLE: &str = "ACCOUNTS"; const DEFAULT_VALIDATOR_ACCOUNTS_FILTER_URL: &str = "http://localhost:3000/filters/accounts"; +const DEFAULT_VALIDATOR_RPC_URL: &str = "http://127.0.0.1:8899"; const DEFAULT_AUTO_OFFSET_RESET: &str = "latest"; const DEFAULT_GRPC_BIND_HOST: &str = "0.0.0.0"; const DEFAULT_GRPC_PORT: u16 = 50051; @@ -46,6 +47,8 @@ pub struct KsqlConfig { #[derive(Clone, Debug)] pub struct ValidatorConfig { pub accounts_filter_url: String, + #[allow(dead_code)] + pub rpc_url: String, } #[derive(Clone, Debug)] @@ -97,6 +100,8 @@ struct FileKsqlConfig { struct FileValidatorConfig { #[serde(default)] accounts_filter_url: Option, + #[serde(default)] + rpc_url: Option, } #[derive(Debug, Deserialize)] @@ -128,6 +133,7 @@ impl Config { }); let validator = file.validator.unwrap_or(FileValidatorConfig { accounts_filter_url: None, + rpc_url: None, }); let grpc = file.grpc.unwrap_or(FileGrpcConfig { bind_host: None, @@ -161,6 +167,9 @@ impl Config { .unwrap_or_else(|| { DEFAULT_VALIDATOR_ACCOUNTS_FILTER_URL.to_owned() }), + rpc_url: validator + .rpc_url + .unwrap_or_else(|| DEFAULT_VALIDATOR_RPC_URL.to_owned()), }, grpc: GrpcConfig { bind_host: grpc diff --git a/ix-tests/configs/grpc-service/service-1.toml b/ix-tests/configs/grpc-service/service-1.toml index 1f10478..4c537e7 100644 --- a/ix-tests/configs/grpc-service/service-1.toml +++ b/ix-tests/configs/grpc-service/service-1.toml @@ -10,6 +10,7 @@ table = "ACCOUNTS" [validator] accounts_filter_url = "http://localhost:3000/filters/accounts" +rpc_url = "http://127.0.0.1:8899" [grpc] bind_host = "0.0.0.0" diff --git a/ix-tests/configs/grpc-service/service-2.toml b/ix-tests/configs/grpc-service/service-2.toml index 934c23c..ae5a348 100644 --- a/ix-tests/configs/grpc-service/service-2.toml +++ b/ix-tests/configs/grpc-service/service-2.toml @@ -10,6 +10,7 @@ table = "ACCOUNTS" [validator] accounts_filter_url = "http://localhost:3000/filters/accounts" +rpc_url = "http://127.0.0.1:8899" [grpc] bind_host = "0.0.0.0" From f7331d26380e6df916f8d1bb147aaaa4804f8f61 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:02:21 +0700 Subject: [PATCH 29/68] feat: add ServiceReadiness primitive Amp-Thread-ID: https://ampcode.com/threads/T-019de28e-1889-70e1-8acd-8644a015c46e Co-authored-by: Amp --- grpc-service/src/grpc_service/mod.rs | 1 + grpc-service/src/grpc_service/readiness.rs | 78 ++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 grpc-service/src/grpc_service/readiness.rs diff --git a/grpc-service/src/grpc_service/mod.rs b/grpc-service/src/grpc_service/mod.rs index f05aeac..5917916 100644 --- a/grpc-service/src/grpc_service/mod.rs +++ b/grpc-service/src/grpc_service/mod.rs @@ -1,6 +1,7 @@ mod convert; mod dispatcher; mod init_subs; +mod readiness; mod runtime; mod service; mod sink; diff --git a/grpc-service/src/grpc_service/readiness.rs b/grpc-service/src/grpc_service/readiness.rs new file mode 100644 index 0000000..0f3aaca --- /dev/null +++ b/grpc-service/src/grpc_service/readiness.rs @@ -0,0 +1,78 @@ +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; + +/// Shared startup-readiness flag for the gRPC service. +/// +/// The service starts in the "not ready" state. Once startup +/// preflight has verified all required dependencies, the owner of the +/// state must call [`ServiceReadiness::mark_ready`]. The Ping handler +/// reads the state to decide whether to advertise the service as ready +/// to clients. +#[derive(Clone, Debug, Default)] +#[allow(dead_code)] +pub(crate) struct ServiceReadiness { + inner: Arc, +} + +impl ServiceReadiness { + /// Construct a new readiness flag in the "not ready" state. + #[allow(dead_code)] + pub(crate) fn new() -> Self { + Self { + inner: Arc::new(AtomicBool::new(false)), + } + } + + /// Test-only constructor that starts in the "ready" state. Used by + /// unit tests that want to bypass the preflight gate. + #[cfg(test)] + pub(crate) fn ready_for_test() -> Self { + Self { + inner: Arc::new(AtomicBool::new(true)), + } + } + + /// Flip the flag to `ready`. Idempotent; safe to call repeatedly. + #[allow(dead_code)] + pub(crate) fn mark_ready(&self) { + self.inner.store(true, Ordering::Release); + } + + /// Return whether the service has finished startup preflight. + #[allow(dead_code)] + pub(crate) fn is_ready(&self) -> bool { + self.inner.load(Ordering::Acquire) + } +} + +#[cfg(test)] +mod tests { + use super::ServiceReadiness; + + #[test] + fn test_new_is_not_ready() { + let r = ServiceReadiness::new(); + assert!(!r.is_ready()); + } + + #[test] + fn test_mark_ready_sets_flag() { + let r = ServiceReadiness::new(); + r.mark_ready(); + assert!(r.is_ready()); + } + + #[test] + fn test_clones_share_state() { + let r = ServiceReadiness::new(); + let r2 = r.clone(); + assert!(!r2.is_ready()); + r.mark_ready(); + assert!(r2.is_ready()); + } + + #[test] + fn test_ready_for_test_is_ready() { + assert!(ServiceReadiness::ready_for_test().is_ready()); + } +} From 5aec64f3123797ea3e72a9edb2e42351d742f321 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:04:05 +0700 Subject: [PATCH 30/68] feat: gate Ping on ServiceReadiness Amp-Thread-ID: https://ampcode.com/threads/T-019de28f-74be-74cc-9cf7-4fcab4ab4554 Co-authored-by: Amp --- grpc-service/src/grpc_service/runtime.rs | 10 +++++++ grpc-service/src/grpc_service/service.rs | 38 ++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/grpc-service/src/grpc_service/runtime.rs b/grpc-service/src/grpc_service/runtime.rs index 64e0175..38cf8b3 100644 --- a/grpc-service/src/grpc_service/runtime.rs +++ b/grpc-service/src/grpc_service/runtime.rs @@ -16,6 +16,7 @@ use tonic::transport::Server; use super::dispatcher::DispatcherHandle; use super::init_subs::InitSubsClient; +use super::readiness::ServiceReadiness; use super::service::GrpcSubscriptionService; use super::sink::GrpcSink; @@ -72,10 +73,12 @@ impl GrpcService { KsqlAccountSnapshotClient::new(config.ksql.clone())?; let validator_subscriptions = InitSubsClient::new(config.validator.accounts_filter_url.clone())?; + let readiness = ServiceReadiness::new(); let service = GrpcSubscriptionService::new( dispatcher, snapshot_store, validator_subscriptions, + readiness.clone(), ) .into_server(); let (shutdown_tx, shutdown_rx) = oneshot::channel(); @@ -94,6 +97,7 @@ impl GrpcService { Ok(GrpcServiceHandle { sink, + readiness, is_running, shutdown_tx: Some(shutdown_tx), task: Some(task), @@ -105,6 +109,7 @@ impl GrpcService { #[derive(Debug)] pub struct GrpcServiceHandle { sink: GrpcSink, + readiness: ServiceReadiness, is_running: Arc, shutdown_tx: Option>, task: Option>>, @@ -116,6 +121,11 @@ impl GrpcServiceHandle { self.sink.clone() } + #[allow(dead_code)] + pub fn readiness(&self) -> ServiceReadiness { + self.readiness.clone() + } + #[allow(dead_code)] pub fn local_addr(&self) -> SocketAddr { self.local_addr diff --git a/grpc-service/src/grpc_service/service.rs b/grpc-service/src/grpc_service/service.rs index 6926fb4..83af46b 100644 --- a/grpc-service/src/grpc_service/service.rs +++ b/grpc-service/src/grpc_service/service.rs @@ -18,6 +18,7 @@ use tracing::{debug, info, warn}; use super::convert::to_subscribe_update; use super::dispatcher::{DispatcherHandle, TargetedSendResult}; +use super::readiness::ServiceReadiness; use crate::domain::{AccountEvent, PubkeyFilter}; use crate::traits::{SnapshotStore, ValidatorSubscriptions}; @@ -37,6 +38,7 @@ pub(crate) struct GrpcSubscriptionService< dispatcher: DispatcherHandle, snapshot_store: P, validator_subscriptions: V, + readiness: ServiceReadiness, } impl< @@ -48,11 +50,13 @@ impl< dispatcher: DispatcherHandle, snapshot_store: P, validator_subscriptions: V, + readiness: ServiceReadiness, ) -> Self { Self { dispatcher, snapshot_store, validator_subscriptions, + readiness, } } @@ -423,6 +427,10 @@ impl< &self, request: Request, ) -> Result, Status> { + if !self.readiness.is_ready() { + debug!("ping rejected: service not ready"); + return Err(Status::unavailable("service not ready")); + } Ok(Response::new(PongResponse { count: request.into_inner().count, })) @@ -487,6 +495,7 @@ mod tests { use crate::domain::{AccountState, PubkeyFilter, bytes_to_base58}; use crate::errors::{GeykagError, GeykagResult}; use crate::grpc_service::dispatcher::DispatcherHandle; + use crate::grpc_service::readiness::ServiceReadiness; use crate::traits::{SnapshotStore, ValidatorSubscriptions}; fn pubkey_bytes(byte: u8) -> [u8; 32] { @@ -938,11 +947,36 @@ mod tests { let snapshot_store = MockSnapshotStore::new(HashMap::new()); let validator = MockValidatorSubscriptions::succeed(); - let service = - GrpcSubscriptionService::new(dispatcher, snapshot_store, validator); + let service = GrpcSubscriptionService::new( + dispatcher, + snapshot_store, + validator, + ServiceReadiness::ready_for_test(), + ); let response = service.ping(Request::new(PingRequest { count: 0 })).await; assert!(response.is_ok()); } + + #[tokio::test] + async fn test_ping_returns_unavailable_when_not_ready() { + use tonic::Code; + let dispatcher = DispatcherHandle::spawn(8, 8); + let snapshot_store = MockSnapshotStore::new(HashMap::new()); + let validator = MockValidatorSubscriptions::succeed(); + + let service = GrpcSubscriptionService::new( + dispatcher, + snapshot_store, + validator, + ServiceReadiness::new(), + ); + + let response = service + .ping(Request::new(PingRequest { count: 0 })) + .await + .unwrap_err(); + assert_eq!(response.code(), Code::Unavailable); + } } From 59ac0aa1a9740d66c1a121f38d045f0f3e6291f2 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:06:00 +0700 Subject: [PATCH 31/68] feat: add startup preflight probes Amp-Thread-ID: https://ampcode.com/threads/T-019de291-122a-721f-a10c-f4c25effd6c2 Co-authored-by: Amp --- grpc-service/src/errors.rs | 51 +++++++ grpc-service/src/kafka.rs | 36 +++++ grpc-service/src/main.rs | 1 + grpc-service/src/preflight.rs | 279 ++++++++++++++++++++++++++++++++++ 4 files changed, 367 insertions(+) create mode 100644 grpc-service/src/preflight.rs diff --git a/grpc-service/src/errors.rs b/grpc-service/src/errors.rs index 7736f0d..48413c1 100644 --- a/grpc-service/src/errors.rs +++ b/grpc-service/src/errors.rs @@ -69,6 +69,57 @@ pub enum GeykagError { #[source] source: reqwest::Error, }, + #[allow(dead_code)] + #[error("startup preflight HTTP client build failed")] + PreflightClientBuild { + #[source] + source: reqwest::Error, + }, + #[allow(dead_code)] + #[error("validator plugin admin probe to {url} failed")] + PreflightValidatorPluginRequest { + url: String, + #[source] + source: reqwest::Error, + }, + #[allow(dead_code)] + #[error( + "validator plugin admin probe to {url} returned non-success status" + )] + PreflightValidatorPluginStatus { + url: String, + #[source] + source: reqwest::Error, + }, + #[allow(dead_code)] + #[error("validator RPC probe to {url} failed")] + PreflightValidatorRpcRequest { + url: String, + #[source] + source: reqwest::Error, + }, + #[allow(dead_code)] + #[error("validator RPC probe to {url} returned non-success status")] + PreflightValidatorRpcStatus { + url: String, + #[source] + source: reqwest::Error, + }, + #[allow(dead_code)] + #[error("Kafka broker {broker} metadata probe failed")] + PreflightKafkaMetadata { + broker: String, + #[source] + source: KafkaError, + }, + #[allow(dead_code)] + #[error( + "startup preflight timed out after {elapsed_ms} ms; last failing probe: {probe}" + )] + PreflightTimeout { + probe: &'static str, + elapsed_ms: u128, + }, #[error("failed to parse ksqlDB response line as JSON: {line}")] KsqlJsonLine { line: String, diff --git a/grpc-service/src/kafka.rs b/grpc-service/src/kafka.rs index 6b0ea5d..c1425d8 100644 --- a/grpc-service/src/kafka.rs +++ b/grpc-service/src/kafka.rs @@ -21,6 +21,42 @@ impl KafkaAccountUpdateStream { Self { config } } + /// Verify that the configured Kafka broker is reachable by + /// requesting cluster metadata. Does not subscribe to the topic and + /// does not consume any messages. + #[allow(dead_code)] + pub fn probe(&self) -> GeykagResult<()> { + use rdkafka::consumer::BaseConsumer; + use rdkafka::consumer::Consumer as _; + use std::time::Duration; + + let mut client_config = ClientConfig::new(); + for (key, value) in &self.config.client { + client_config.set(key, value); + } + client_config + .set("bootstrap.servers", &self.config.bootstrap_servers) + .set("group.id", &self.config.group_id) + .set("auto.offset.reset", &self.config.auto_offset_reset) + .set("enable.auto.commit", "false"); + + let consumer: BaseConsumer = + client_config.create().map_err(|source| { + GeykagError::KafkaConsumerCreate { + broker: self.config.bootstrap_servers.clone(), + source, + } + })?; + + consumer + .fetch_metadata(None, Duration::from_secs(2)) + .map(|_| ()) + .map_err(|source| GeykagError::PreflightKafkaMetadata { + broker: self.config.bootstrap_servers.clone(), + source, + }) + } + pub async fn run( &self, filter: Option<&PubkeyFilter>, diff --git a/grpc-service/src/main.rs b/grpc-service/src/main.rs index 9d26052..77c7eef 100644 --- a/grpc-service/src/main.rs +++ b/grpc-service/src/main.rs @@ -6,6 +6,7 @@ mod grpc_service; mod kafka; mod ksql; mod output; +mod preflight; mod traits; use anyhow::Result; diff --git a/grpc-service/src/preflight.rs b/grpc-service/src/preflight.rs new file mode 100644 index 0000000..e0e8dd2 --- /dev/null +++ b/grpc-service/src/preflight.rs @@ -0,0 +1,279 @@ +use std::time::{Duration, Instant}; + +use reqwest::Client; +use serde_json::json; +use tracing::{debug, info, warn}; + +use crate::config::Config; +use crate::errors::{GeykagError, GeykagResult}; +use crate::kafka::KafkaAccountUpdateStream; + +const PROBE_INITIAL_BACKOFF: Duration = Duration::from_millis(250); +const PROBE_MAX_BACKOFF: Duration = Duration::from_secs(2); +const REQUEST_TIMEOUT: Duration = Duration::from_secs(2); + +/// Run startup preflight against all required dependencies. Returns +/// `Ok(())` once every probe has succeeded at least once. Returns a +/// `PreflightTimeout` error tagged with the last failing probe if the +/// total elapsed time exceeds `total_timeout`. +#[allow(dead_code)] +pub async fn wait_for_dependencies( + config: &Config, + total_timeout: Duration, +) -> GeykagResult<()> { + let started = Instant::now(); + let deadline = started + total_timeout; + let http = build_http_client()?; + + info!( + plugin = config.validator.accounts_filter_url, + rpc = config.validator.rpc_url, + kafka = config.kafka.bootstrap_servers, + "startup preflight: probing dependencies" + ); + + run_probe_with_retry("validator-plugin-admin", deadline, || async { + probe_validator_plugin_admin( + &http, + &config.validator.accounts_filter_url, + ) + .await + }) + .await?; + + run_probe_with_retry("validator-rpc", deadline, || async { + probe_validator_rpc_health(&http, &config.validator.rpc_url).await + }) + .await?; + + let kafka_stream = KafkaAccountUpdateStream::new(config.kafka.clone()); + run_probe_with_retry("kafka-metadata", deadline, || async { + kafka_stream.probe() + }) + .await?; + + let elapsed_ms = started.elapsed().as_millis(); + info!(elapsed_ms, "startup preflight: all dependencies ready"); + Ok(()) +} + +#[allow(dead_code)] +async fn run_probe_with_retry( + probe: &'static str, + deadline: Instant, + mut attempt: F, +) -> GeykagResult<()> +where + F: FnMut() -> Fut, + Fut: std::future::Future>, +{ + let started = Instant::now(); + let mut backoff = PROBE_INITIAL_BACKOFF; + loop { + debug!(probe, "startup preflight: attempting probe"); + match attempt().await { + Ok(()) => { + let elapsed_ms = started.elapsed().as_millis(); + info!(probe, elapsed_ms, "startup preflight: probe ok"); + return Ok(()); + } + Err(error) => { + if Instant::now() >= deadline { + let elapsed_ms = started.elapsed().as_millis(); + warn!( + probe, + elapsed_ms, + error = %error, + "startup preflight: probe deadline exceeded" + ); + return Err(GeykagError::PreflightTimeout { + probe, + elapsed_ms, + }); + } + debug!( + probe, + backoff_ms = backoff.as_millis() as u64, + error = %error, + "startup preflight: probe failed; will retry" + ); + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(PROBE_MAX_BACKOFF); + } + } + } +} + +#[allow(dead_code)] +fn build_http_client() -> GeykagResult { + Client::builder() + .timeout(REQUEST_TIMEOUT) + .build() + .map_err(|source| GeykagError::PreflightClientBuild { source }) +} + +#[allow(dead_code)] +async fn probe_validator_plugin_admin( + http: &Client, + url: &str, +) -> GeykagResult<()> { + let body = r#"{"pubkeys":[]}"#; + let response = http + .post(url) + .header(reqwest::header::CONTENT_TYPE, "application/json") + .body(body) + .send() + .await + .map_err(|source| GeykagError::PreflightValidatorPluginRequest { + url: url.to_owned(), + source, + })?; + + response.error_for_status().map(|_| ()).map_err(|source| { + GeykagError::PreflightValidatorPluginStatus { + url: url.to_owned(), + source, + } + }) +} + +#[allow(dead_code)] +async fn probe_validator_rpc_health( + http: &Client, + url: &str, +) -> GeykagResult<()> { + let body = json!({ + "jsonrpc": "2.0", + "id": 1, + "method": "getHealth", + }); + let response = http + .post(url) + .header(reqwest::header::CONTENT_TYPE, "application/json") + .json(&body) + .send() + .await + .map_err(|source| GeykagError::PreflightValidatorRpcRequest { + url: url.to_owned(), + source, + })?; + + response.error_for_status().map(|_| ()).map_err(|source| { + GeykagError::PreflightValidatorRpcStatus { + url: url.to_owned(), + source, + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::{Router, http::StatusCode, routing::post}; + use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }; + + async fn spawn_test_server(router: Router) -> String { + let listener = + tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, router).await.unwrap(); + }); + format!("http://{addr}") + } + + #[tokio::test] + async fn test_probe_validator_plugin_admin_succeeds_on_2xx() { + let app = Router::new() + .route("/filters/accounts", post(|| async { StatusCode::OK })); + let base = spawn_test_server(app).await; + let url = format!("{base}/filters/accounts"); + let http = build_http_client().unwrap(); + + probe_validator_plugin_admin(&http, &url).await.unwrap(); + } + + #[tokio::test] + async fn test_probe_validator_plugin_admin_fails_on_5xx() { + let app = Router::new().route( + "/filters/accounts", + post(|| async { StatusCode::INTERNAL_SERVER_ERROR }), + ); + let base = spawn_test_server(app).await; + let url = format!("{base}/filters/accounts"); + let http = build_http_client().unwrap(); + + let err = probe_validator_plugin_admin(&http, &url).await.unwrap_err(); + assert!(matches!( + err, + GeykagError::PreflightValidatorPluginStatus { .. } + )); + } + + #[tokio::test] + async fn test_probe_validator_rpc_health_succeeds_on_2xx() { + let app = Router::new().route( + "/", + post(|| async { + ( + StatusCode::OK, + [("content-type", "application/json")], + r#"{"jsonrpc":"2.0","result":"ok","id":1}"#, + ) + }), + ); + let base = spawn_test_server(app).await; + let http = build_http_client().unwrap(); + + probe_validator_rpc_health(&http, &base).await.unwrap(); + } + + #[tokio::test] + async fn test_run_probe_with_retry_eventually_succeeds() { + let calls = Arc::new(AtomicUsize::new(0)); + let calls_inner = calls.clone(); + let deadline = Instant::now() + Duration::from_secs(5); + + run_probe_with_retry("test-probe", deadline, move || { + let calls_inner = calls_inner.clone(); + async move { + let n = calls_inner.fetch_add(1, Ordering::SeqCst); + if n < 2 { + Err(GeykagError::PreflightTimeout { + probe: "test", + elapsed_ms: 0, + }) + } else { + Ok(()) + } + } + }) + .await + .unwrap(); + assert_eq!(calls.load(Ordering::SeqCst), 3); + } + + #[tokio::test] + async fn test_run_probe_with_retry_returns_timeout() { + let deadline = Instant::now() + Duration::from_millis(50); + let err = run_probe_with_retry("test-probe", deadline, || async { + Err(GeykagError::PreflightTimeout { + probe: "noop", + elapsed_ms: 0, + }) + }) + .await + .unwrap_err(); + + assert!(matches!( + err, + GeykagError::PreflightTimeout { + probe: "test-probe", + .. + } + )); + } +} From 881541a538216408508cb92198daf68b18700ce3 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:08:40 +0700 Subject: [PATCH 32/68] feat: orchestrate preflight and readiness in App::run Amp-Thread-ID: https://ampcode.com/threads/T-019de291-122a-721f-a10c-f4c25effd6c2 Co-authored-by: Amp --- grpc-service/src/app.rs | 31 +++++++++++++++++++++- grpc-service/src/grpc_service/mod.rs | 1 + grpc-service/src/grpc_service/readiness.rs | 10 +++---- grpc-service/src/grpc_service/runtime.rs | 1 - 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/grpc-service/src/app.rs b/grpc-service/src/app.rs index 695eccb..6d84207 100644 --- a/grpc-service/src/app.rs +++ b/grpc-service/src/app.rs @@ -1,13 +1,17 @@ use crate::config::Config; use crate::domain::AccountEvent; use crate::errors::GeykagResult; -use crate::grpc_service::{GrpcService, GrpcServiceHandle, GrpcSink}; +use crate::grpc_service::{ + GrpcService, GrpcServiceHandle, GrpcSink, ServiceReadiness, +}; use crate::kafka::KafkaAccountUpdateStream; use crate::ksql::KsqlAccountSnapshotClient; use crate::output::{ConsoleSink, TeeSink}; +use crate::preflight; use crate::traits::{ AccountSink, AccountUpdateSource, SnapshotStore, StatusSink, }; +use std::time::Duration; pub struct App< P: SnapshotStore, @@ -20,6 +24,7 @@ pub struct App< account_update_source: K, sink: A, status_sink: S, + readiness: ServiceReadiness, } impl @@ -42,6 +47,7 @@ impl account_update_source, ConsoleSink::new(), ConsoleSink::new(), + ServiceReadiness::new(), )) } } @@ -68,6 +74,7 @@ impl account_update_source, sink, ConsoleSink::new(), + grpc.readiness(), ); Ok((app, grpc)) @@ -97,6 +104,7 @@ impl account_update_source, sink, ConsoleSink::new(), + grpc.readiness(), ); Ok((app, grpc)) @@ -112,6 +120,7 @@ impl account_update_source: K, sink: A, status_sink: S, + readiness: ServiceReadiness, ) -> Self { Self { config, @@ -119,6 +128,7 @@ impl account_update_source, sink, status_sink, + readiness, } } @@ -145,6 +155,17 @@ impl } } + // Startup preflight gates client-visible readiness. + if !self.readiness.is_ready() { + preflight::wait_for_dependencies( + &self.config, + Duration::from_secs(60), + ) + .await?; + self.readiness.mark_ready(); + tracing::info!("service marked as ready"); + } + self.account_update_source .run(self.config.pubkey_filter.as_ref(), |message| { let event = AccountEvent::Live(message); @@ -168,6 +189,7 @@ mod tests { bytes_to_base58, }; use crate::errors::{GeykagError, GeykagResult}; + use crate::grpc_service::ServiceReadiness; use crate::kafka::StreamMessage; use crate::traits::{ AccountSink, AccountUpdateSource, SnapshotStore, StatusSink, @@ -416,6 +438,7 @@ mod tests { update_source.clone(), sink.clone(), status_sink.clone(), + ServiceReadiness::ready_for_test(), ); app.run().await.unwrap(); @@ -440,6 +463,7 @@ mod tests { update_source.clone(), RecordingSink::new(false, false), status_sink.clone(), + ServiceReadiness::ready_for_test(), ); app.run().await.unwrap(); @@ -464,6 +488,7 @@ mod tests { update_source.clone(), RecordingSink::new(false, false), status_sink.clone(), + ServiceReadiness::ready_for_test(), ); app.run().await.unwrap(); @@ -491,6 +516,7 @@ mod tests { update_source.clone(), RecordingSink::new(false, false), RecordingStatusSink::new(), + ServiceReadiness::ready_for_test(), ); let error = app.run().await.unwrap_err(); @@ -511,6 +537,7 @@ mod tests { update_source.clone(), RecordingSink::new(true, false), RecordingStatusSink::new(), + ServiceReadiness::ready_for_test(), ); let error = app.run().await.unwrap_err(); @@ -530,6 +557,7 @@ mod tests { update_source.clone(), RecordingSink::new(false, true), RecordingStatusSink::new(), + ServiceReadiness::ready_for_test(), ); let error = app.run().await.unwrap_err(); @@ -549,6 +577,7 @@ mod tests { update_source.clone(), RecordingSink::new(false, false), RecordingStatusSink::new(), + ServiceReadiness::ready_for_test(), ); let error = app.run().await.unwrap_err(); diff --git a/grpc-service/src/grpc_service/mod.rs b/grpc-service/src/grpc_service/mod.rs index 5917916..77eccb3 100644 --- a/grpc-service/src/grpc_service/mod.rs +++ b/grpc-service/src/grpc_service/mod.rs @@ -7,5 +7,6 @@ mod service; mod sink; mod utils; +pub use readiness::ServiceReadiness; pub use runtime::{GrpcService, GrpcServiceHandle}; pub use sink::GrpcSink; diff --git a/grpc-service/src/grpc_service/readiness.rs b/grpc-service/src/grpc_service/readiness.rs index 0f3aaca..df1609f 100644 --- a/grpc-service/src/grpc_service/readiness.rs +++ b/grpc-service/src/grpc_service/readiness.rs @@ -10,14 +10,14 @@ use std::sync::atomic::{AtomicBool, Ordering}; /// to clients. #[derive(Clone, Debug, Default)] #[allow(dead_code)] -pub(crate) struct ServiceReadiness { +pub struct ServiceReadiness { inner: Arc, } impl ServiceReadiness { /// Construct a new readiness flag in the "not ready" state. #[allow(dead_code)] - pub(crate) fn new() -> Self { + pub fn new() -> Self { Self { inner: Arc::new(AtomicBool::new(false)), } @@ -26,7 +26,7 @@ impl ServiceReadiness { /// Test-only constructor that starts in the "ready" state. Used by /// unit tests that want to bypass the preflight gate. #[cfg(test)] - pub(crate) fn ready_for_test() -> Self { + pub fn ready_for_test() -> Self { Self { inner: Arc::new(AtomicBool::new(true)), } @@ -34,13 +34,13 @@ impl ServiceReadiness { /// Flip the flag to `ready`. Idempotent; safe to call repeatedly. #[allow(dead_code)] - pub(crate) fn mark_ready(&self) { + pub fn mark_ready(&self) { self.inner.store(true, Ordering::Release); } /// Return whether the service has finished startup preflight. #[allow(dead_code)] - pub(crate) fn is_ready(&self) -> bool { + pub fn is_ready(&self) -> bool { self.inner.load(Ordering::Acquire) } } diff --git a/grpc-service/src/grpc_service/runtime.rs b/grpc-service/src/grpc_service/runtime.rs index 38cf8b3..57ffbd5 100644 --- a/grpc-service/src/grpc_service/runtime.rs +++ b/grpc-service/src/grpc_service/runtime.rs @@ -121,7 +121,6 @@ impl GrpcServiceHandle { self.sink.clone() } - #[allow(dead_code)] pub fn readiness(&self) -> ServiceReadiness { self.readiness.clone() } From 30cb19bc3414230fa87aa72829845e65f0e5713b Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:09:39 +0700 Subject: [PATCH 33/68] refactor: distinguish preflight-pending from connection errors Amp-Thread-ID: https://ampcode.com/threads/T-019de295-4872-75c9-91b2-29f52aaffe88 Co-authored-by: Amp --- ix-tests/src/service.rs | 53 +++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index 4b25f33..ba5f128 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -142,23 +142,46 @@ impl ServiceController { log_paths: &crate::artifacts::ServiceLogPaths, ) -> anyhow::Result<()> { let deadline = tokio::time::Instant::now() + self.service_start_timeout; + let mut announced_waiting = false; loop { - if let Ok(mut client) = - GeyserClient::connect(endpoint.to_owned()).await - && client - .ping(PingRequest { count: 1 }) - .await - .inspect_err(|err| { - warn!( - "failed to ping grpc-service at {}: {err}", - endpoint - ) - }) - .is_ok() - { - info!(endpoint, "grpc-service is ready"); - return Ok(()); + match GeyserClient::connect(endpoint.to_owned()).await { + Ok(mut client) => { + match client.ping(PingRequest { count: 1 }).await { + Ok(_) => { + info!(endpoint, "grpc-service is ready"); + return Ok(()); + } + Err(err) if err.code() == tonic::Code::Unavailable => { + if !announced_waiting { + info!( + endpoint, + message = %err.message(), + "grpc-service listening but not yet ready; waiting for startup preflight" + ); + announced_waiting = true; + } else { + debug!( + endpoint, + message = %err.message(), + "grpc-service still preflight-pending" + ); + } + } + Err(err) => { + warn!( + endpoint, + "ping returned non-readiness error: {err}" + ); + } + } + } + Err(err) => { + debug!( + endpoint, + "grpc-service not yet accepting connections: {err}" + ); + } } if tokio::time::Instant::now() >= deadline { From 32475cd0a13aab451290308087dd6b653227ac79 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:26:19 +0700 Subject: [PATCH 34/68] chore: convenience make tasks --- Makefile | 3 +++ kafka-setup/Makefile | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d64f43f..829b2f0 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,9 @@ kafka-ui: kafka-ui-down: $(MAKE) -C kafka-setup ui-down +kafka-reset-state: + $(MAKE) -C kafka-setup reset-state + ix-tests-build: cargo build -p magigblock-grpc-service cargo build -p ix-tests diff --git a/kafka-setup/Makefile b/kafka-setup/Makefile index d1c74b0..4275266 100644 --- a/kafka-setup/Makefile +++ b/kafka-setup/Makefile @@ -1,3 +1,4 @@ +MAKEFILE_DIR=$(dir $(abspath $(lastword $(MAKEFILE_LIST)))) DC := $(shell if command -v docker-compose >/dev/null 2>&1; then echo docker-compose; else echo docker compose; fi) .PHONY: help up down setup-stream create-table register-schema ready reset-state ui ui-down @@ -21,18 +22,18 @@ down: $(DC) down -v register-schema: - sh/redpanda/01_register-proto-schema.sh + $(MAKEFILE_DIR)sh/redpanda/01_register-proto-schema.sh setup-stream: - sh/ksql/01_setup-streams.sh + $(MAKEFILE_DIR)sh/ksql/01_setup-streams.sh create-table: - sh/ksql/02_create-accounts-table.sh + $(MAKEFILE_DIR)sh/ksql/02_create-accounts-table.sh ready: up setup-stream create-table register-schema reset-state: - sh/reset-state.sh + $(MAKEFILE_DIR)sh/reset-state.sh ui: $(DC) up -d redpanda-console From 6353c8c01c685284efd4ffe5ddb03bc7f4263c94 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 15:26:35 +0700 Subject: [PATCH 35/68] chore: increase checkpoint timeout --- ix-tests/configs/suite.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ix-tests/configs/suite.toml b/ix-tests/configs/suite.toml index f334906..a66abbe 100644 --- a/ix-tests/configs/suite.toml +++ b/ix-tests/configs/suite.toml @@ -2,5 +2,5 @@ service_binary = "target/debug/magigblock-grpc-service" validator_rpc_url = "http://127.0.0.1:8899" failure_artifact_root = "target/ix-tests/failures" service_start_timeout_ms = 10000 -checkpoint_timeout_ms = 2000 +checkpoint_timeout_ms = 8000 transaction_timeout_ms = 2000 From d22a1365c2d7f90d43d039dd820432c351d2f1b0 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 16:16:17 +0700 Subject: [PATCH 36/68] refactor: encode ix-test service ownership --- ix-tests/src/scenarios/dual_concurrent.rs | 6 +- ix-tests/src/scenarios/dual_restart.rs | 6 +- ix-tests/src/scenarios/single_basic.rs | 4 +- ix-tests/src/scenarios/single_load.rs | 4 +- ix-tests/src/service.rs | 71 +++++++++++++++-------- 5 files changed, 56 insertions(+), 35 deletions(-) diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs index a9d75da..c27e13f 100644 --- a/ix-tests/src/scenarios/dual_concurrent.rs +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -6,7 +6,7 @@ use crate::context::ScenarioContext; use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; -use crate::service::{ManagedService, ServiceSpec}; +use crate::service::{ServiceHandle, ServiceSpec}; pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_one = ServiceSpec::for_instance(ServiceInstance::One); @@ -33,7 +33,7 @@ async fn run_inner( ctx: &ScenarioContext, spec_one: &ServiceSpec, spec_two: &ServiceSpec, - services: &mut Vec, + services: &mut Vec, clients: &mut Vec, ) -> anyhow::Result<()> { services.push( @@ -177,7 +177,7 @@ async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { async fn shutdown_services( controller: &crate::service::ServiceController, - services: Vec, + services: Vec, ) -> anyhow::Result<()> { for service in services { controller.shutdown(service).await?; diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index 4fbdf56..e91189a 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -7,7 +7,7 @@ use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::observation::ClientLog; use crate::scenarios::ScenarioFailure; -use crate::service::{ManagedService, ServiceSpec}; +use crate::service::{ServiceHandle, ServiceSpec}; pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_one = ServiceSpec::for_instance(ServiceInstance::One); @@ -57,7 +57,7 @@ async fn run_inner( ctx: &ScenarioContext, spec_one: &ServiceSpec, spec_two: &ServiceSpec, - service_one: &mut Option, + service_one: &mut Option, active_clients: &mut Vec, ) -> anyhow::Result<()> { connect_service_one_clients( @@ -381,7 +381,7 @@ async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { async fn shutdown_service( controller: &crate::service::ServiceController, - service: &mut Option, + service: &mut Option, ) -> anyhow::Result<()> { if let Some(service) = service.take() { controller.shutdown(service).await?; diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index 375a1ee..8fc737b 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -8,7 +8,7 @@ use crate::context::ScenarioContext; use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; -use crate::service::{ManagedService, ServiceSpec}; +use crate::service::{ServiceHandle, ServiceSpec}; const OWNER_DATA_SPACE: u64 = 64; const SYNTHETIC_OWNER_BYTES: [u8; 32] = [ @@ -199,7 +199,7 @@ async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { async fn shutdown_service( controller: &crate::service::ServiceController, - service: &mut Option, + service: &mut Option, ) -> anyhow::Result<()> { if let Some(service) = service.take() { controller.shutdown(service).await?; diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index fb74191..d6b6209 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -6,7 +6,7 @@ use crate::context::ScenarioContext; use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; use crate::layout::ServiceInstance; use crate::scenarios::ScenarioFailure; -use crate::service::{ManagedService, ServiceSpec}; +use crate::service::{ServiceHandle, ServiceSpec}; pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec = ServiceSpec::for_instance(ServiceInstance::One); @@ -103,7 +103,7 @@ async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { async fn shutdown_service( controller: &crate::service::ServiceController, - service: &mut Option, + service: &mut Option, ) -> anyhow::Result<()> { if let Some(service) = service.take() { controller.shutdown(service).await?; diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index ba5f128..f6214c3 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -12,10 +12,27 @@ use crate::config::SuiteConfig; use crate::layout::ServiceInstance; #[allow(dead_code)] -pub struct ManagedService { +pub enum ServiceOwnership { + Owned(tokio::process::Child), + External, +} + +#[allow(dead_code)] +pub struct ServiceHandle { pub instance: ServiceInstance, pub endpoint: String, - child: tokio::process::Child, + pub ownership: ServiceOwnership, +} + +#[allow(dead_code)] +impl ServiceHandle { + pub fn is_owned(&self) -> bool { + matches!(self.ownership, ServiceOwnership::Owned(_)) + } + + pub fn is_external(&self) -> bool { + matches!(self.ownership, ServiceOwnership::External) + } } pub struct ServiceController { @@ -64,7 +81,7 @@ impl ServiceController { &self, spec: &ServiceSpec, artifacts: &RunArtifacts, - ) -> anyhow::Result { + ) -> anyhow::Result { let log_paths = artifacts.service_logs(spec.instance); let stdout_file = std::fs::File::create(&log_paths.stdout) @@ -103,36 +120,40 @@ impl ServiceController { ) })?; - let managed = ManagedService { + let handle = ServiceHandle { instance: spec.instance, endpoint: spec.endpoint.clone(), - child, + ownership: ServiceOwnership::Owned(child), }; self.wait_until_ready(&spec.endpoint, &log_paths).await?; - Ok(managed) + Ok(handle) } - pub async fn shutdown( - &self, - mut service: ManagedService, - ) -> anyhow::Result<()> { - info!( - endpoint = %service.endpoint, - "shutting down grpc-service" - ); - service.child.start_kill().context("failed to send kill")?; - let status = service - .child - .wait() - .await - .context("failed to wait for child")?; - debug!( - endpoint = %service.endpoint, - status = %status, - "grpc-service exited" - ); + pub async fn shutdown(&self, service: ServiceHandle) -> anyhow::Result<()> { + match service.ownership { + ServiceOwnership::Owned(mut child) => { + info!( + endpoint = %service.endpoint, + "shutting down grpc-service" + ); + child.start_kill().context("failed to send kill")?; + let status = + child.wait().await.context("failed to wait for child")?; + debug!( + endpoint = %service.endpoint, + status = %status, + "grpc-service exited" + ); + } + ServiceOwnership::External => { + info!( + endpoint = %service.endpoint, + "skipping shutdown for external grpc-service" + ); + } + } Ok(()) } From 5ed00268dfb761715a47198f8a67c3898f082f69 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 16:18:33 +0700 Subject: [PATCH 37/68] feat: attach to ready external grpc-service --- ix-tests/src/service.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index f6214c3..041c618 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -82,6 +82,18 @@ impl ServiceController { spec: &ServiceSpec, artifacts: &RunArtifacts, ) -> anyhow::Result { + if self.probe_ready(&spec.endpoint).await { + info!( + endpoint = %spec.endpoint, + "harness is reusing an already-running external grpc-service" + ); + return Ok(ServiceHandle { + instance: spec.instance, + endpoint: spec.endpoint.clone(), + ownership: ServiceOwnership::External, + }); + } + let log_paths = artifacts.service_logs(spec.instance); let stdout_file = std::fs::File::create(&log_paths.stdout) @@ -150,13 +162,22 @@ impl ServiceController { ServiceOwnership::External => { info!( endpoint = %service.endpoint, - "skipping shutdown for external grpc-service" + "external grpc-service was left running intentionally" ); } } Ok(()) } + async fn probe_ready(&self, endpoint: &str) -> bool { + let Ok(mut client) = GeyserClient::connect(endpoint.to_owned()).await + else { + return false; + }; + + client.ping(PingRequest { count: 1 }).await.is_ok() + } + async fn wait_until_ready( &self, endpoint: &str, From 3200a687362e2409af06c7700d789fc7c97011b1 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 16:20:08 +0700 Subject: [PATCH 38/68] feat: add single triage scenario --- ix-tests/src/scenarios/single_triage.rs | 118 ++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 ix-tests/src/scenarios/single_triage.rs diff --git a/ix-tests/src/scenarios/single_triage.rs b/ix-tests/src/scenarios/single_triage.rs new file mode 100644 index 0000000..be9edce --- /dev/null +++ b/ix-tests/src/scenarios/single_triage.rs @@ -0,0 +1,118 @@ +use anyhow::Context; +use solana_keypair::Keypair; +use tracing::debug; + +use crate::client::TestGrpcClient; +use crate::context::ScenarioContext; +use crate::expectation::{CheckpointSpec, ClientCheckpoint, ExpectedUpdate}; +use crate::layout::ServiceInstance; +use crate::scenarios::ScenarioFailure; +use crate::service::{ServiceHandle, ServiceSpec}; + +pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { + let spec = ServiceSpec::for_instance(ServiceInstance::One); + let mut service = Some( + ctx.service_controller + .start(&spec, &ctx.artifacts) + .await + .map_err(scenario_failure_without_clients)?, + ); + let mut clients = Vec::new(); + + let result = run_inner(ctx, &spec.endpoint, &mut clients).await; + if let Err(error) = result { + return Err(ScenarioFailure { error, clients }); + } + + shutdown_clients(clients) + .await + .map_err(scenario_failure_without_clients)?; + shutdown_service(&ctx.service_controller, &mut service) + .await + .map_err(scenario_failure_without_clients)?; + Ok(()) +} + +async fn run_inner( + ctx: &ScenarioContext, + endpoint: &str, + clients: &mut Vec, +) -> anyhow::Result<()> { + let client = TestGrpcClient::connect( + 0, + ServiceInstance::One, + endpoint.to_owned(), + ) + .await + .with_context(|| "failed to connect client 0")?; + clients.push(client); + + let random_pubkey = Keypair::new().pubkey(); + debug!("Client 0 subscribing to triage pubkey: {random_pubkey}"); + clients[0] + .replace_subscription(&[random_pubkey.to_string()]) + .await?; + + let bootstrap_checkpoint = CheckpointSpec { + name: "single-triage-bootstrap", + checkpoints: vec![ClientCheckpoint { + client_id: 0, + required: vec![ExpectedUpdate { + pubkey_b58: Some(random_pubkey.to_string()), + lamports: Some(0), + txn_signature_b58: Some(None), + ..Default::default() + }], + }], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&bootstrap_checkpoint, clients) + .await?; + + ctx.validator.fund_payer().await?; + + let airdrop_signature = ctx + .validator + .airdrop(&random_pubkey, 1_000_000) + .await?; + + let airdrop_checkpoint = CheckpointSpec { + name: "single-triage-airdrop", + checkpoints: vec![ClientCheckpoint { + client_id: 0, + required: vec![ExpectedUpdate { + pubkey_b58: Some(random_pubkey.to_string()), + lamports: Some(1_000_000), + txn_signature_b58: Some(Some(airdrop_signature)), + ..Default::default() + }], + }], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&airdrop_checkpoint, clients) + .await +} + +async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { + for client in clients { + client.shutdown().await?; + } + Ok(()) +} + +async fn shutdown_service( + controller: &crate::service::ServiceController, + service: &mut Option, +) -> anyhow::Result<()> { + if let Some(service) = service.take() { + controller.shutdown(service).await?; + } + Ok(()) +} + +fn scenario_failure_without_clients(error: anyhow::Error) -> ScenarioFailure { + ScenarioFailure { + error, + clients: Vec::new(), + } +} From 6c835dec4af64efad7dd5065708fd13a3f353d72 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 16:22:45 +0700 Subject: [PATCH 39/68] feat: wire single triage scenario --- ix-tests/src/accounts.rs | 1 + ix-tests/src/layout.rs | 4 ++++ ix-tests/src/runner.rs | 1 + ix-tests/src/scenario.rs | 3 +++ ix-tests/src/scenarios/mod.rs | 2 ++ ix-tests/src/scenarios/single_triage.rs | 19 +++++++------------ 6 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ix-tests/src/accounts.rs b/ix-tests/src/accounts.rs index 73ef63b..886dc39 100644 --- a/ix-tests/src/accounts.rs +++ b/ix-tests/src/accounts.rs @@ -201,6 +201,7 @@ const BASE_SEEDS: [[u8; 32]; ACCOUNT_COUNT] = [ fn scenario_xor_byte(scenario: ScenarioName) -> u8 { match scenario { ScenarioName::All => unreachable!("All is expanded before accounts"), + ScenarioName::SingleTriage => 0xEE, ScenarioName::SingleBasic => 0xAA, ScenarioName::SingleLoad => 0xBB, ScenarioName::DualConcurrent => 0xCC, diff --git a/ix-tests/src/layout.rs b/ix-tests/src/layout.rs index fb9f570..d2e0da0 100644 --- a/ix-tests/src/layout.rs +++ b/ix-tests/src/layout.rs @@ -19,6 +19,10 @@ pub struct ScenarioLayout { impl ScenarioLayout { pub fn for_scenario(name: ScenarioName) -> Self { match name { + ScenarioName::SingleTriage => Self { + services: vec![ServiceInstance::One], + client_count: 1, + }, ScenarioName::SingleBasic => Self { services: vec![ServiceInstance::One], client_count: 4, diff --git a/ix-tests/src/runner.rs b/ix-tests/src/runner.rs index 0ed4864..612e0cd 100644 --- a/ix-tests/src/runner.rs +++ b/ix-tests/src/runner.rs @@ -3,6 +3,7 @@ use crate::scenario::ScenarioName; pub fn ordered_scenarios(requested: ScenarioName) -> Vec { match requested { ScenarioName::All => vec![ + ScenarioName::SingleTriage, ScenarioName::SingleBasic, ScenarioName::SingleLoad, ScenarioName::DualConcurrent, diff --git a/ix-tests/src/scenario.rs b/ix-tests/src/scenario.rs index 4d6504b..d9c324b 100644 --- a/ix-tests/src/scenario.rs +++ b/ix-tests/src/scenario.rs @@ -3,6 +3,7 @@ use anyhow::bail; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ScenarioName { All, + SingleTriage, SingleBasic, SingleLoad, DualConcurrent, @@ -13,6 +14,7 @@ impl ScenarioName { pub fn parse(input: &str) -> anyhow::Result { match input { "all" => Ok(Self::All), + "single-triage" => Ok(Self::SingleTriage), "single-basic" => Ok(Self::SingleBasic), "single-load" => Ok(Self::SingleLoad), "dual-concurrent" => Ok(Self::DualConcurrent), @@ -24,6 +26,7 @@ impl ScenarioName { pub fn as_str(&self) -> &'static str { match self { Self::All => "all", + Self::SingleTriage => "single-triage", Self::SingleBasic => "single-basic", Self::SingleLoad => "single-load", Self::DualConcurrent => "dual-concurrent", diff --git a/ix-tests/src/scenarios/mod.rs b/ix-tests/src/scenarios/mod.rs index faf8016..638f23a 100644 --- a/ix-tests/src/scenarios/mod.rs +++ b/ix-tests/src/scenarios/mod.rs @@ -2,6 +2,7 @@ mod dual_concurrent; mod dual_restart; mod single_basic; mod single_load; +mod single_triage; use anyhow::anyhow; @@ -19,6 +20,7 @@ pub async fn run_scenario( ctx: &ScenarioContext, ) -> Result<(), ScenarioFailure> { match name { + ScenarioName::SingleTriage => single_triage::run(ctx).await, ScenarioName::SingleBasic => single_basic::run(ctx).await, ScenarioName::SingleLoad => single_load::run(ctx).await, ScenarioName::DualConcurrent => dual_concurrent::run(ctx).await, diff --git a/ix-tests/src/scenarios/single_triage.rs b/ix-tests/src/scenarios/single_triage.rs index be9edce..093d128 100644 --- a/ix-tests/src/scenarios/single_triage.rs +++ b/ix-tests/src/scenarios/single_triage.rs @@ -1,5 +1,5 @@ use anyhow::Context; -use solana_keypair::Keypair; +use solana_keypair::{Keypair, Signer}; use tracing::debug; use crate::client::TestGrpcClient; @@ -38,13 +38,10 @@ async fn run_inner( endpoint: &str, clients: &mut Vec, ) -> anyhow::Result<()> { - let client = TestGrpcClient::connect( - 0, - ServiceInstance::One, - endpoint.to_owned(), - ) - .await - .with_context(|| "failed to connect client 0")?; + let client = + TestGrpcClient::connect(0, ServiceInstance::One, endpoint.to_owned()) + .await + .with_context(|| "failed to connect client 0")?; clients.push(client); let random_pubkey = Keypair::new().pubkey(); @@ -71,10 +68,8 @@ async fn run_inner( ctx.validator.fund_payer().await?; - let airdrop_signature = ctx - .validator - .airdrop(&random_pubkey, 1_000_000) - .await?; + let airdrop_signature = + ctx.validator.airdrop(&random_pubkey, 1_000_000).await?; let airdrop_checkpoint = CheckpointSpec { name: "single-triage-airdrop", From a2adbc20a8ff5b3375f69b06ae0a49a492b3e3d0 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Fri, 1 May 2026 16:24:11 +0700 Subject: [PATCH 40/68] feat: add triage logging --- ix-tests/src/scenarios/single_triage.rs | 43 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/ix-tests/src/scenarios/single_triage.rs b/ix-tests/src/scenarios/single_triage.rs index 093d128..c1c8255 100644 --- a/ix-tests/src/scenarios/single_triage.rs +++ b/ix-tests/src/scenarios/single_triage.rs @@ -1,6 +1,6 @@ use anyhow::Context; use solana_keypair::{Keypair, Signer}; -use tracing::debug; +use tracing::info; use crate::client::TestGrpcClient; use crate::context::ScenarioContext; @@ -11,12 +11,25 @@ use crate::service::{ServiceHandle, ServiceSpec}; pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec = ServiceSpec::for_instance(ServiceInstance::One); - let mut service = Some( - ctx.service_controller - .start(&spec, &ctx.artifacts) - .await - .map_err(scenario_failure_without_clients)?, - ); + let service = ctx + .service_controller + .start(&spec, &ctx.artifacts) + .await + .map_err(scenario_failure_without_clients)?; + + if service.is_external() { + info!( + endpoint = %service.endpoint, + "single-triage attached to already-running external grpc-service" + ); + } else { + info!( + endpoint = %service.endpoint, + "single-triage launched managed grpc-service" + ); + } + + let mut service = Some(service); let mut clients = Vec::new(); let result = run_inner(ctx, &spec.endpoint, &mut clients).await; @@ -38,6 +51,8 @@ async fn run_inner( endpoint: &str, clients: &mut Vec, ) -> anyhow::Result<()> { + info!(endpoint = %endpoint, "single-triage targeting endpoint"); + let client = TestGrpcClient::connect(0, ServiceInstance::One, endpoint.to_owned()) .await @@ -45,7 +60,7 @@ async fn run_inner( clients.push(client); let random_pubkey = Keypair::new().pubkey(); - debug!("Client 0 subscribing to triage pubkey: {random_pubkey}"); + info!(pubkey = %random_pubkey, "single-triage generated random pubkey"); clients[0] .replace_subscription(&[random_pubkey.to_string()]) .await?; @@ -65,6 +80,10 @@ async fn run_inner( ctx.checkpoint_runner .wait_until_satisfied(&bootstrap_checkpoint, clients) .await?; + info!( + pubkey = %random_pubkey, + "single-triage bootstrap lamports=0 checkpoint passed" + ); ctx.validator.fund_payer().await?; @@ -85,7 +104,13 @@ async fn run_inner( }; ctx.checkpoint_runner .wait_until_satisfied(&airdrop_checkpoint, clients) - .await + .await?; + info!( + pubkey = %random_pubkey, + "single-triage post-airdrop checkpoint passed" + ); + + Ok(()) } async fn shutdown_clients(clients: Vec) -> anyhow::Result<()> { From 8be8b762439fdbec1326a3fe0bf603915d3ec020 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 16:36:01 +0700 Subject: [PATCH 41/68] feat: scope grpc service configs per run --- ix-tests/src/artifacts.rs | 23 ++++++++++++++++ ix-tests/src/service.rs | 57 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/ix-tests/src/artifacts.rs b/ix-tests/src/artifacts.rs index 84e621c..4740edf 100644 --- a/ix-tests/src/artifacts.rs +++ b/ix-tests/src/artifacts.rs @@ -18,6 +18,7 @@ pub struct ServiceLogPaths { #[allow(dead_code)] pub struct RunArtifacts { run_dir: PathBuf, + run_id: String, failure_root: PathBuf, persist_on_failure: bool, } @@ -33,17 +34,39 @@ impl RunArtifacts { scenario.as_str(), pid )); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + let run_id = format!("{}-{}", pid, timestamp); std::fs::create_dir_all(&run_dir).with_context(|| { format!("failed to create run dir: {}", run_dir.display()) })?; Ok(Self { run_dir, + run_id, failure_root: config.failure_artifact_root.clone(), persist_on_failure: true, }) } + pub fn run_id(&self) -> &str { + &self.run_id + } + + pub fn generated_service_config_path( + &self, + instance: ServiceInstance, + ) -> PathBuf { + let label = match instance { + ServiceInstance::One => "service-1", + ServiceInstance::Two => "service-2", + }; + + self.run_dir.join(format!("{label}.generated.toml")) + } + pub fn service_logs(&self, instance: ServiceInstance) -> ServiceLogPaths { let label = match instance { ServiceInstance::One => "service-1", diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index 041c618..93eb7ea 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -77,6 +77,49 @@ impl ServiceController { } } + fn write_generated_config( + &self, + spec: &ServiceSpec, + artifacts: &RunArtifacts, + ) -> anyhow::Result { + let base_group_id = match spec.instance { + ServiceInstance::One => "ix-tests-service-1", + ServiceInstance::Two => "ix-tests-service-2", + }; + let run_scoped_group_id = + format!("{base_group_id}-{}", artifacts.run_id()); + let base_group_id_line = format!("group_id = \"{base_group_id}\""); + let generated_group_id_line = + format!("group_id = \"{run_scoped_group_id}\""); + let config_text = std::fs::read_to_string(&spec.config_path) + .with_context(|| { + format!( + "failed to read service config template: {}", + spec.config_path.display() + ) + })?; + if config_text.matches(&base_group_id_line).count() != 1 { + bail!( + "expected exactly one `{}` entry in {}", + base_group_id_line, + spec.config_path.display() + ); + } + let generated_config_text = + config_text.replace(&base_group_id_line, &generated_group_id_line); + let generated_config_path = + artifacts.generated_service_config_path(spec.instance); + std::fs::write(&generated_config_path, generated_config_text) + .with_context(|| { + format!( + "failed to write generated service config: {}", + generated_config_path.display() + ) + })?; + + Ok(generated_config_path) + } + pub async fn start( &self, spec: &ServiceSpec, @@ -94,6 +137,8 @@ impl ServiceController { }); } + let generated_config_path = + self.write_generated_config(spec, artifacts)?; let log_paths = artifacts.service_logs(spec.instance); let stdout_file = std::fs::File::create(&log_paths.stdout) @@ -111,16 +156,24 @@ impl ServiceController { ) })?; + let base_group_id = match spec.instance { + ServiceInstance::One => "ix-tests-service-1", + ServiceInstance::Two => "ix-tests-service-2", + }; + let run_scoped_group_id = + format!("{base_group_id}-{}", artifacts.run_id()); + info!( binary = %self.service_binary.display(), - config = %spec.config_path.display(), + config = %generated_config_path.display(), endpoint = %spec.endpoint, + group_id = %run_scoped_group_id, "starting grpc-service" ); let child = Command::new(&self.service_binary) .arg("--config") - .arg(&spec.config_path) + .arg(&generated_config_path) .stdout(stdout_file) .stderr(stderr_file) .kill_on_drop(true) From 6242cda40b387e7d15c4ab6683e6364cbfab7c9f Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 16:54:16 +0700 Subject: [PATCH 42/68] feat: generate run-scoped grpc service configs --- ix-tests/src/artifacts.rs | 28 ++++++++++++++-------------- ix-tests/src/service.rs | 27 +++++++++++++++++---------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/ix-tests/src/artifacts.rs b/ix-tests/src/artifacts.rs index 4740edf..99e57f9 100644 --- a/ix-tests/src/artifacts.rs +++ b/ix-tests/src/artifacts.rs @@ -29,15 +29,15 @@ impl RunArtifacts { scenario: ScenarioName, ) -> anyhow::Result { let pid = std::process::id(); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); let run_dir = PathBuf::from(format!( "target/ix-tests/tmp/{}-{}", scenario.as_str(), pid )); - let timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_millis(); let run_id = format!("{}-{}", pid, timestamp); std::fs::create_dir_all(&run_dir).with_context(|| { format!("failed to create run dir: {}", run_dir.display()) @@ -59,25 +59,25 @@ impl RunArtifacts { &self, instance: ServiceInstance, ) -> PathBuf { - let label = match instance { - ServiceInstance::One => "service-1", - ServiceInstance::Two => "service-2", - }; - - self.run_dir.join(format!("{label}.generated.toml")) + self.run_dir + .join(format!("{}.generated.toml", Self::service_label(instance))) } pub fn service_logs(&self, instance: ServiceInstance) -> ServiceLogPaths { - let label = match instance { - ServiceInstance::One => "service-1", - ServiceInstance::Two => "service-2", - }; + let label = Self::service_label(instance); ServiceLogPaths { stdout: self.run_dir.join(format!("{label}.stdout.log")), stderr: self.run_dir.join(format!("{label}.stderr.log")), } } + fn service_label(instance: ServiceInstance) -> &'static str { + match instance { + ServiceInstance::One => "service-1", + ServiceInstance::Two => "service-2", + } + } + pub fn dump_service_logs_at(paths: &ServiceLogPaths) -> anyhow::Result<()> { for path in &[&paths.stdout, &paths.stderr] { if path.exists() { diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index 93eb7ea..e541445 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -82,12 +82,9 @@ impl ServiceController { spec: &ServiceSpec, artifacts: &RunArtifacts, ) -> anyhow::Result { - let base_group_id = match spec.instance { - ServiceInstance::One => "ix-tests-service-1", - ServiceInstance::Two => "ix-tests-service-2", - }; + let base_group_id = Self::base_group_id(spec.instance); let run_scoped_group_id = - format!("{base_group_id}-{}", artifacts.run_id()); + Self::run_scoped_group_id(spec.instance, artifacts); let base_group_id_line = format!("group_id = \"{base_group_id}\""); let generated_group_id_line = format!("group_id = \"{run_scoped_group_id}\""); @@ -120,6 +117,20 @@ impl ServiceController { Ok(generated_config_path) } + fn base_group_id(instance: ServiceInstance) -> &'static str { + match instance { + ServiceInstance::One => "ix-tests-service-1", + ServiceInstance::Two => "ix-tests-service-2", + } + } + + fn run_scoped_group_id( + instance: ServiceInstance, + artifacts: &RunArtifacts, + ) -> String { + format!("{}-{}", Self::base_group_id(instance), artifacts.run_id()) + } + pub async fn start( &self, spec: &ServiceSpec, @@ -156,12 +167,8 @@ impl ServiceController { ) })?; - let base_group_id = match spec.instance { - ServiceInstance::One => "ix-tests-service-1", - ServiceInstance::Two => "ix-tests-service-2", - }; let run_scoped_group_id = - format!("{base_group_id}-{}", artifacts.run_id()); + Self::run_scoped_group_id(spec.instance, artifacts); info!( binary = %self.service_binary.display(), From 4d4910b7b87440da13ed35fa8b681db08f6bbf98 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 16:58:21 +0700 Subject: [PATCH 43/68] feat: add grpc-service cooperative shutdown --- Cargo.lock | 2 + grpc-service/Cargo.toml | 1 + grpc-service/src/app.rs | 53 ++++++++++++++++---- grpc-service/src/kafka.rs | 91 +++++++++++++++++++++++------------ grpc-service/src/main.rs | 38 +++++++++++++-- grpc-service/src/preflight.rs | 8 ++- 6 files changed, 148 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 966177c..59fe556 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2442,6 +2442,7 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tokio-stream", + "tokio-util", "toml 0.9.12+spec-1.1.0", "tonic", "tracing", @@ -7391,6 +7392,7 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", "pin-project-lite", "tokio", ] diff --git a/grpc-service/Cargo.toml b/grpc-service/Cargo.toml index a2790b4..45ddcb8 100644 --- a/grpc-service/Cargo.toml +++ b/grpc-service/Cargo.toml @@ -23,6 +23,7 @@ serde_json = "1.0" toml = "0.9.12" tokio = { version = "1.47", features = ["macros", "rt-multi-thread", "signal"] } tokio-stream = { version = "0.1", features = ["net"] } +tokio-util = { version = "0.7", features = ["rt"] } tonic = { version = "0.12", features = ["transport"] } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } diff --git a/grpc-service/src/app.rs b/grpc-service/src/app.rs index 6d84207..9ffdb91 100644 --- a/grpc-service/src/app.rs +++ b/grpc-service/src/app.rs @@ -12,6 +12,7 @@ use crate::traits::{ AccountSink, AccountUpdateSource, SnapshotStore, StatusSink, }; use std::time::Duration; +use tokio_util::sync::CancellationToken; pub struct App< P: SnapshotStore, @@ -25,6 +26,7 @@ pub struct App< sink: A, status_sink: S, readiness: ServiceReadiness, + shutdown: CancellationToken, } impl @@ -39,15 +41,21 @@ impl pub fn new(config: Config) -> GeykagResult { let snapshot_store = KsqlAccountSnapshotClient::new(config.ksql.clone())?; - let account_update_source = - KafkaAccountUpdateStream::new(config.kafka.clone()); + let readiness = ServiceReadiness::new(); + let shutdown = CancellationToken::new(); + let account_update_source = KafkaAccountUpdateStream::new( + config.kafka.clone(), + readiness.clone(), + shutdown.clone(), + ); Ok(Self::build( config, snapshot_store, account_update_source, ConsoleSink::new(), ConsoleSink::new(), - ServiceReadiness::new(), + readiness, + shutdown, )) } } @@ -61,20 +69,28 @@ impl > { #[allow(dead_code)] - pub fn new_grpc(config: Config) -> GeykagResult<(Self, GrpcServiceHandle)> { + pub fn new_grpc( + config: Config, + shutdown: CancellationToken, + ) -> GeykagResult<(Self, GrpcServiceHandle)> { let grpc = GrpcService::start(&config)?; let sink = grpc.sink(); let snapshot_store = KsqlAccountSnapshotClient::new(config.ksql.clone())?; - let account_update_source = - KafkaAccountUpdateStream::new(config.kafka.clone()); + let readiness = grpc.readiness(); + let account_update_source = KafkaAccountUpdateStream::new( + config.kafka.clone(), + readiness.clone(), + shutdown.clone(), + ); let app = Self::build( config, snapshot_store, account_update_source, sink, ConsoleSink::new(), - grpc.readiness(), + readiness, + shutdown, ); Ok((app, grpc)) @@ -91,20 +107,26 @@ impl { pub fn new_grpc_with_console( config: Config, + shutdown: CancellationToken, ) -> GeykagResult<(Self, GrpcServiceHandle)> { let grpc = GrpcService::start(&config)?; let sink = TeeSink::new(grpc.sink(), ConsoleSink::new()); let snapshot_store = KsqlAccountSnapshotClient::new(config.ksql.clone())?; - let account_update_source = - KafkaAccountUpdateStream::new(config.kafka.clone()); + let readiness = grpc.readiness(); + let account_update_source = KafkaAccountUpdateStream::new( + config.kafka.clone(), + readiness.clone(), + shutdown.clone(), + ); let app = Self::build( config, snapshot_store, account_update_source, sink, ConsoleSink::new(), - grpc.readiness(), + readiness, + shutdown, ); Ok((app, grpc)) @@ -121,6 +143,7 @@ impl sink: A, status_sink: S, readiness: ServiceReadiness, + shutdown: CancellationToken, ) -> Self { Self { config, @@ -129,6 +152,7 @@ impl sink, status_sink, readiness, + shutdown, } } @@ -166,6 +190,7 @@ impl tracing::info!("service marked as ready"); } + let _ = &self.shutdown; self.account_update_source .run(self.config.pubkey_filter.as_ref(), |message| { let event = AccountEvent::Live(message); @@ -194,6 +219,7 @@ mod tests { use crate::traits::{ AccountSink, AccountUpdateSource, SnapshotStore, StatusSink, }; + use tokio_util::sync::CancellationToken; fn config(pubkey_filter: Option) -> Config { Config { @@ -439,6 +465,7 @@ mod tests { sink.clone(), status_sink.clone(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); app.run().await.unwrap(); @@ -464,6 +491,7 @@ mod tests { RecordingSink::new(false, false), status_sink.clone(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); app.run().await.unwrap(); @@ -489,6 +517,7 @@ mod tests { RecordingSink::new(false, false), status_sink.clone(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); app.run().await.unwrap(); @@ -517,6 +546,7 @@ mod tests { RecordingSink::new(false, false), RecordingStatusSink::new(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); let error = app.run().await.unwrap_err(); @@ -538,6 +568,7 @@ mod tests { RecordingSink::new(true, false), RecordingStatusSink::new(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); let error = app.run().await.unwrap_err(); @@ -558,6 +589,7 @@ mod tests { RecordingSink::new(false, true), RecordingStatusSink::new(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); let error = app.run().await.unwrap_err(); @@ -578,6 +610,7 @@ mod tests { RecordingSink::new(false, false), RecordingStatusSink::new(), ServiceReadiness::ready_for_test(), + CancellationToken::new(), ); let error = app.run().await.unwrap_err(); diff --git a/grpc-service/src/kafka.rs b/grpc-service/src/kafka.rs index c1425d8..f407942 100644 --- a/grpc-service/src/kafka.rs +++ b/grpc-service/src/kafka.rs @@ -6,19 +6,31 @@ use prost::Message; use rdkafka::Message as KafkaMessage; use rdkafka::config::ClientConfig; use rdkafka::consumer::{Consumer, StreamConsumer}; +use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; use crate::config::KafkaConfig; use crate::domain::{AccountUpdate, PubkeyFilter, bytes_to_base58}; use crate::errors::{GeykagError, GeykagResult}; +use crate::grpc_service::ServiceReadiness; use crate::traits::AccountUpdateSource; pub struct KafkaAccountUpdateStream { config: KafkaConfig, + readiness: ServiceReadiness, + shutdown: CancellationToken, } impl KafkaAccountUpdateStream { - pub fn new(config: KafkaConfig) -> Self { - Self { config } + pub fn new( + config: KafkaConfig, + readiness: ServiceReadiness, + shutdown: CancellationToken, + ) -> Self { + Self { + config, + readiness, + shutdown, + } } /// Verify that the configured Kafka broker is reachable by @@ -100,43 +112,60 @@ impl KafkaAccountUpdateStream { "listening for Kafka messages" ); + let _ = &self.readiness; let mut stream = consumer.stream(); - while let Some(message) = stream.next().await { - match message { - Ok(msg) => { - let Some(payload) = msg.payload() else { - warn!( - partition = msg.partition(), - offset = msg.offset(), - "skipping empty payload" - ); - continue; + loop { + tokio::select! { + _ = self.shutdown.cancelled() => { + info!( + group_id = self.config.group_id, + topic = self.config.topic, + "Kafka consumer shutdown requested" + ); + break; + } + message = stream.next() => { + let Some(message) = message else { + break; }; - match decode_account_update(payload) { - Ok(account) => { - if !account.matches_filter(filter) { + match message { + Ok(msg) => { + let Some(payload) = msg.payload() else { + warn!( + partition = msg.partition(), + offset = msg.offset(), + "skipping empty payload" + ); continue; + }; + + match decode_account_update(payload) { + Ok(account) => { + if !account.matches_filter(filter) { + continue; + } + + handler(StreamMessage { + account, + partition: msg.partition(), + offset: msg.offset(), + timestamp: format!("{:?}", msg.timestamp()), + })?; + } + Err(err) => { + warn!( + partition = msg.partition(), + offset = msg.offset(), + error = %err, + "failed to decode message payload" + ); + } } - - handler(StreamMessage { - account, - partition: msg.partition(), - offset: msg.offset(), - timestamp: format!("{:?}", msg.timestamp()), - })?; - } - Err(err) => { - warn!( - partition = msg.partition(), - offset = msg.offset(), - error = %err, - "failed to decode message payload" - ); } + Err(err) => error!(error = %err, "Kafka consumer error"), } } - Err(err) => error!(error = %err, "Kafka consumer error"), } } diff --git a/grpc-service/src/main.rs b/grpc-service/src/main.rs index 77c7eef..d4fa2a3 100644 --- a/grpc-service/src/main.rs +++ b/grpc-service/src/main.rs @@ -10,6 +10,7 @@ mod preflight; mod traits; use anyhow::Result; +use tokio_util::sync::CancellationToken; use crate::app::App; use crate::config::Config; @@ -18,10 +19,41 @@ use crate::config::Config; async fn main() -> Result<()> { init_tracing(); let config = Config::load()?; - let (app, grpc_handle) = App::new_grpc_with_console(config)?; - let result = app.run().await; + let shutdown = CancellationToken::new(); + let (app, grpc_handle) = + App::new_grpc_with_console(config, shutdown.clone())?; + let mut app_task = tokio::spawn(async move { app.run().await }); + + let app_result = tokio::select! { + result = &mut app_task => result?, + _ = shutdown_signal() => { + tracing::info!("shutdown requested"); + shutdown.cancel(); + app_task.await? + } + }; + grpc_handle.shutdown().await?; - Ok(result?) + Ok(app_result?) +} + +async fn shutdown_signal() { + #[cfg(unix)] + { + use tokio::signal::unix::{SignalKind, signal}; + + let mut terminate = signal(SignalKind::terminate()) + .expect("failed to install SIGTERM handler"); + tokio::select! { + _ = tokio::signal::ctrl_c() => {} + _ = terminate.recv() => {} + } + } + + #[cfg(not(unix))] + { + let _ = tokio::signal::ctrl_c().await; + } } fn init_tracing() { diff --git a/grpc-service/src/preflight.rs b/grpc-service/src/preflight.rs index e0e8dd2..00ee008 100644 --- a/grpc-service/src/preflight.rs +++ b/grpc-service/src/preflight.rs @@ -6,7 +6,9 @@ use tracing::{debug, info, warn}; use crate::config::Config; use crate::errors::{GeykagError, GeykagResult}; +use crate::grpc_service::ServiceReadiness; use crate::kafka::KafkaAccountUpdateStream; +use tokio_util::sync::CancellationToken; const PROBE_INITIAL_BACKOFF: Duration = Duration::from_millis(250); const PROBE_MAX_BACKOFF: Duration = Duration::from_secs(2); @@ -46,7 +48,11 @@ pub async fn wait_for_dependencies( }) .await?; - let kafka_stream = KafkaAccountUpdateStream::new(config.kafka.clone()); + let kafka_stream = KafkaAccountUpdateStream::new( + config.kafka.clone(), + ServiceReadiness::new(), + CancellationToken::new(), + ); run_probe_with_retry("kafka-metadata", deadline, || async { kafka_stream.probe() }) From 69ea2a078079a1751804c73eccb892ff720711d1 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 17:01:05 +0700 Subject: [PATCH 44/68] feat: request graceful ix-test service shutdown --- Cargo.lock | 13 +++++++ ix-tests/Cargo.toml | 1 + ix-tests/src/service.rs | 76 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59fe556..5ad399a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2214,6 +2214,7 @@ dependencies = [ "bs58", "futures", "helius-laserstream", + "nix", "reqwest 0.12.28", "serde", "serde_json", @@ -2547,6 +2548,18 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags 2.11.1", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" diff --git a/ix-tests/Cargo.toml b/ix-tests/Cargo.toml index c1071e3..4177cb1 100644 --- a/ix-tests/Cargo.toml +++ b/ix-tests/Cargo.toml @@ -8,6 +8,7 @@ anyhow = "1.0" bs58 = "0.5" futures = "0.3" helius-laserstream = { git = "https://github.com/magicblock-labs/laserstream-sdk", rev = "fe205cb2b85864d1821027d663813d66160285dc" } +nix = { version = "0.30", features = ["signal"] } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index e541445..5defba9 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -4,6 +4,8 @@ use std::time::Duration; use anyhow::{Context, bail}; use helius_laserstream::grpc::PingRequest; use helius_laserstream::grpc::geyser_client::GeyserClient; +use nix::sys::signal::{self, Signal}; +use nix::unistd::Pid; use tokio::process::Command; use tracing::{debug, info, warn}; @@ -11,6 +13,8 @@ use crate::artifacts::RunArtifacts; use crate::config::SuiteConfig; use crate::layout::ServiceInstance; +const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); + #[allow(dead_code)] pub enum ServiceOwnership { Owned(tokio::process::Child), @@ -206,17 +210,71 @@ impl ServiceController { pub async fn shutdown(&self, service: ServiceHandle) -> anyhow::Result<()> { match service.ownership { ServiceOwnership::Owned(mut child) => { - info!( - endpoint = %service.endpoint, - "shutting down grpc-service" - ); - child.start_kill().context("failed to send kill")?; - let status = - child.wait().await.context("failed to wait for child")?; - debug!( + let pid = child.id(); + + if let Some(pid) = pid { + match signal::kill( + Pid::from_raw(pid as i32), + Signal::SIGTERM, + ) { + Ok(()) => { + info!( + endpoint = %service.endpoint, + pid, + "graceful shutdown requested for grpc-service" + ); + + match tokio::time::timeout( + GRACEFUL_SHUTDOWN_TIMEOUT, + child.wait(), + ) + .await + { + Ok(wait_result) => { + let status = wait_result.context( + "failed to wait for child after SIGTERM", + )?; + info!( + endpoint = %service.endpoint, + pid, + status = %status, + "grpc-service shut down gracefully" + ); + return Ok(()); + } + Err(_) => { + warn!( + endpoint = %service.endpoint, + pid, + timeout = ?GRACEFUL_SHUTDOWN_TIMEOUT, + "graceful shutdown timed out; forcing grpc-service kill" + ); + } + } + } + Err(err) => { + warn!( + endpoint = %service.endpoint, + pid, + error = %err, + "failed to request graceful shutdown; forcing grpc-service kill" + ); + } + } + } + + child + .start_kill() + .context("failed to send forced kill to grpc-service")?; + let status = child + .wait() + .await + .context("failed to wait for child after forced kill")?; + warn!( endpoint = %service.endpoint, + pid = pid.unwrap_or_default(), status = %status, - "grpc-service exited" + "grpc-service shutdown was forced" ); } ServiceOwnership::External => { From 583a05b998fe7a1b71145332cd873c948d94b742 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 17:04:02 +0700 Subject: [PATCH 45/68] feat: gate grpc readiness on kafka assignment --- grpc-service/src/app.rs | 8 +- grpc-service/src/grpc_service/readiness.rs | 86 ++++++++++++++++------ grpc-service/src/kafka.rs | 62 ++++++++++++++-- 3 files changed, 122 insertions(+), 34 deletions(-) diff --git a/grpc-service/src/app.rs b/grpc-service/src/app.rs index 9ffdb91..1330a6a 100644 --- a/grpc-service/src/app.rs +++ b/grpc-service/src/app.rs @@ -186,11 +186,13 @@ impl Duration::from_secs(60), ) .await?; - self.readiness.mark_ready(); - tracing::info!("service marked as ready"); + self.readiness.mark_preflight_ready(); + tracing::info!( + "startup preflight complete; waiting for Kafka consumer assignment before advertising readiness" + ); } - let _ = &self.shutdown; + let _shutdown = &self.shutdown; self.account_update_source .run(self.config.pubkey_filter.as_ref(), |message| { let event = AccountEvent::Live(message); diff --git a/grpc-service/src/grpc_service/readiness.rs b/grpc-service/src/grpc_service/readiness.rs index df1609f..48684fa 100644 --- a/grpc-service/src/grpc_service/readiness.rs +++ b/grpc-service/src/grpc_service/readiness.rs @@ -1,47 +1,58 @@ use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; -/// Shared startup-readiness flag for the gRPC service. +#[derive(Debug, Default)] +struct ServiceReadinessInner { + preflight_ready: AtomicBool, + kafka_ready: AtomicBool, +} + +/// Shared startup-readiness state for the gRPC service. /// -/// The service starts in the "not ready" state. Once startup -/// preflight has verified all required dependencies, the owner of the -/// state must call [`ServiceReadiness::mark_ready`]. The Ping handler -/// reads the state to decide whether to advertise the service as ready -/// to clients. +/// The service starts in the "not ready" state. It becomes ready only +/// after startup preflight has verified all required dependencies and +/// the Kafka consumer has received a partition assignment. #[derive(Clone, Debug, Default)] #[allow(dead_code)] pub struct ServiceReadiness { - inner: Arc, + inner: Arc, } impl ServiceReadiness { - /// Construct a new readiness flag in the "not ready" state. #[allow(dead_code)] pub fn new() -> Self { Self { - inner: Arc::new(AtomicBool::new(false)), + inner: Arc::new(ServiceReadinessInner::default()), } } - /// Test-only constructor that starts in the "ready" state. Used by - /// unit tests that want to bypass the preflight gate. #[cfg(test)] pub fn ready_for_test() -> Self { - Self { - inner: Arc::new(AtomicBool::new(true)), - } + let readiness = Self::new(); + readiness.mark_preflight_ready(); + readiness.mark_kafka_ready(); + readiness } - /// Flip the flag to `ready`. Idempotent; safe to call repeatedly. #[allow(dead_code)] - pub fn mark_ready(&self) { - self.inner.store(true, Ordering::Release); + pub fn mark_preflight_ready(&self) { + self.inner.preflight_ready.store(true, Ordering::Release); + } + + #[allow(dead_code)] + pub fn mark_kafka_ready(&self) { + self.inner.kafka_ready.store(true, Ordering::Release); + } + + #[allow(dead_code)] + pub fn mark_kafka_not_ready(&self) { + self.inner.kafka_ready.store(false, Ordering::Release); } - /// Return whether the service has finished startup preflight. #[allow(dead_code)] pub fn is_ready(&self) -> bool { - self.inner.load(Ordering::Acquire) + self.inner.preflight_ready.load(Ordering::Acquire) + && self.inner.kafka_ready.load(Ordering::Acquire) } } @@ -56,19 +67,48 @@ mod tests { } #[test] - fn test_mark_ready_sets_flag() { + fn test_preflight_alone_does_not_make_service_ready() { + let r = ServiceReadiness::new(); + r.mark_preflight_ready(); + assert!(!r.is_ready()); + } + + #[test] + fn test_kafka_alone_does_not_make_service_ready() { let r = ServiceReadiness::new(); - r.mark_ready(); + r.mark_kafka_ready(); + assert!(!r.is_ready()); + } + + #[test] + fn test_service_becomes_ready_only_after_both_flags() { + let r = ServiceReadiness::new(); + r.mark_preflight_ready(); + r.mark_kafka_ready(); + assert!(r.is_ready()); + } + + #[test] + fn test_mark_kafka_not_ready_clears_readiness() { + let r = ServiceReadiness::new(); + r.mark_preflight_ready(); + r.mark_kafka_ready(); assert!(r.is_ready()); + + r.mark_kafka_not_ready(); + assert!(!r.is_ready()); } #[test] fn test_clones_share_state() { let r = ServiceReadiness::new(); let r2 = r.clone(); - assert!(!r2.is_ready()); - r.mark_ready(); + r.mark_preflight_ready(); + r.mark_kafka_ready(); assert!(r2.is_ready()); + + r2.mark_kafka_not_ready(); + assert!(!r.is_ready()); } #[test] diff --git a/grpc-service/src/kafka.rs b/grpc-service/src/kafka.rs index f407942..8a28f79 100644 --- a/grpc-service/src/kafka.rs +++ b/grpc-service/src/kafka.rs @@ -4,8 +4,9 @@ use magigblock_event_proto::{ }; use prost::Message; use rdkafka::Message as KafkaMessage; +use rdkafka::client::ClientContext; use rdkafka::config::ClientConfig; -use rdkafka::consumer::{Consumer, StreamConsumer}; +use rdkafka::consumer::{Consumer, ConsumerContext, Rebalance, StreamConsumer}; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; @@ -14,6 +15,50 @@ use crate::domain::{AccountUpdate, PubkeyFilter, bytes_to_base58}; use crate::errors::{GeykagError, GeykagResult}; use crate::grpc_service::ServiceReadiness; use crate::traits::AccountUpdateSource; + +struct ReadinessConsumerContext { + readiness: ServiceReadiness, + group_id: String, +} + +impl ClientContext for ReadinessConsumerContext {} + +impl ConsumerContext for ReadinessConsumerContext { + fn post_rebalance( + &self, + _base_consumer: &rdkafka::consumer::BaseConsumer, + rebalance: &Rebalance<'_>, + ) { + match rebalance { + Rebalance::Assign(assignment) if assignment.count() > 0 => { + self.readiness.mark_kafka_ready(); + info!( + group_id = self.group_id, + partition_count = assignment.count(), + "Kafka consumer received partition assignment" + ); + } + Rebalance::Assign(_) => {} + Rebalance::Revoke(partitions) => { + self.readiness.mark_kafka_not_ready(); + info!( + group_id = self.group_id, + partition_count = partitions.count(), + "Kafka consumer partitions revoked" + ); + } + Rebalance::Error(err) => { + self.readiness.mark_kafka_not_ready(); + warn!( + group_id = self.group_id, + error = %err, + "Kafka consumer lost readiness during rebalance" + ); + } + } + } +} + pub struct KafkaAccountUpdateStream { config: KafkaConfig, readiness: ServiceReadiness, @@ -87,12 +132,14 @@ impl KafkaAccountUpdateStream { .set("auto.offset.reset", &self.config.auto_offset_reset) .set("enable.auto.commit", "true"); - let consumer: StreamConsumer = - client_config.create().map_err(|source| { - GeykagError::KafkaConsumerCreate { - broker: self.config.bootstrap_servers.clone(), - source, - } + let consumer: StreamConsumer = client_config + .create_with_context(ReadinessConsumerContext { + readiness: self.readiness.clone(), + group_id: self.config.group_id.clone(), + }) + .map_err(|source| GeykagError::KafkaConsumerCreate { + broker: self.config.bootstrap_servers.clone(), + source, })?; consumer @@ -112,7 +159,6 @@ impl KafkaAccountUpdateStream { "listening for Kafka messages" ); - let _ = &self.readiness; let mut stream = consumer.stream(); loop { tokio::select! { From 263a272c23db9023d6ad98a3c317a9701d3a348a Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 17:06:54 +0700 Subject: [PATCH 46/68] feat(grpc-service): add startup delay observability logs --- grpc-service/src/grpc_service/init_subs.rs | 19 +++++++++- grpc-service/src/grpc_service/service.rs | 41 ++++++++++++++++++-- grpc-service/src/kafka.rs | 44 +++++++++++++++++----- grpc-service/src/main.rs | 1 + 4 files changed, 90 insertions(+), 15 deletions(-) diff --git a/grpc-service/src/grpc_service/init_subs.rs b/grpc-service/src/grpc_service/init_subs.rs index da82962..914a7b1 100644 --- a/grpc-service/src/grpc_service/init_subs.rs +++ b/grpc-service/src/grpc_service/init_subs.rs @@ -1,3 +1,5 @@ +use tracing::{debug, info}; + use crate::errors::{GeykagError, GeykagResult}; use crate::traits::ValidatorSubscriptions; @@ -34,6 +36,12 @@ impl InitSubsClient { .join(","); let body = format!(r#"{{"pubkeys":[{pubkeys_json}]}}"#); + debug!( + url = %self.accounts_filter_url, + pubkey_count = pubkeys.len(), + "validator whitelist HTTP POST starting" + ); + self.http .post(&self.accounts_filter_url) .header(reqwest::header::CONTENT_TYPE, "application/json") @@ -42,8 +50,15 @@ impl InitSubsClient { .await .map_err(|source| GeykagError::InitSubsRequest { source })? .error_for_status() - .map(|_| ()) - .map_err(|source| GeykagError::InitSubsRequestStatus { source }) + .map_err(|source| GeykagError::InitSubsRequestStatus { source })?; + + info!( + url = %self.accounts_filter_url, + pubkey_count = pubkeys.len(), + "validator whitelist HTTP POST completed" + ); + + Ok(()) } } diff --git a/grpc-service/src/grpc_service/service.rs b/grpc-service/src/grpc_service/service.rs index 83af46b..b173107 100644 --- a/grpc-service/src/grpc_service/service.rs +++ b/grpc-service/src/grpc_service/service.rs @@ -105,6 +105,12 @@ async fn bootstrap_new_pubkeys_impl< // will publish one of two Kafka updates: // - the current account update if the account exists // - a MissingAccount update if the account does not exist + debug!( + client_id, + pubkey = %pubkey_b58, + "fetching snapshot bootstrap for pubkey" + ); + let snapshot = match snapshot_store.fetch_one_by_pubkey(&pubkey).await { Ok(snapshot) => snapshot, Err(error) => { @@ -143,7 +149,13 @@ async fn bootstrap_new_pubkeys_impl< }; match dispatcher.send_to_client(client_id, update).await { - Ok(TargetedSendResult::Delivered) => {} + Ok(TargetedSendResult::Delivered) => { + info!( + client_id, + pubkey = %pubkey_b58, + "snapshot bootstrap dispatched" + ); + } Ok(TargetedSendResult::ClientNotFound) => { warn!( client_id, @@ -187,7 +199,8 @@ async fn bootstrap_new_pubkeys_impl< info!( client_id, pubkey_count = pubkeys_to_whitelist.len(), - "whitelisting ksql-missing pubkeys with validator" + pubkeys = ?pubkeys_to_whitelist, + "validator whitelist request starting" ); if let Err(error) = validator_subscriptions @@ -200,6 +213,13 @@ async fn bootstrap_new_pubkeys_impl< error = %error, "failed to whitelist pubkeys with validator" ); + } else { + info!( + client_id, + pubkey_count = pubkeys_to_whitelist.len(), + pubkeys = ?pubkeys_to_whitelist, + "validator whitelist request completed" + ); } } @@ -264,6 +284,15 @@ fn parse_pubkey_list(accounts: &[String]) -> Result, Status> { Ok(set) } +fn normalized_pubkeys(pubkeys: &HashSet<[u8; 32]>) -> Vec { + let mut normalized = pubkeys + .iter() + .map(|pubkey| bs58::encode(pubkey).into_string()) + .collect::>(); + normalized.sort(); + normalized +} + enum FilterOp { Replace(HashSet<[u8; 32]>), Patch { @@ -313,9 +342,15 @@ impl< })?; let initial_filter = parse_accounts_filter(&first_req)?; + let initial_pubkeys = normalized_pubkeys(&initial_filter); info!( filter_size = initial_filter.len(), - "new gRPC subscriber connected" + pubkeys = tracing::field::debug(if initial_pubkeys.is_empty() { + None::<&Vec> + } else { + Some(&initial_pubkeys) + }), + "gRPC subscribe request received" ); // 2. Register with dispatcher using parsed filter diff --git a/grpc-service/src/kafka.rs b/grpc-service/src/kafka.rs index 8a28f79..2f2134d 100644 --- a/grpc-service/src/kafka.rs +++ b/grpc-service/src/kafka.rs @@ -7,8 +7,9 @@ use rdkafka::Message as KafkaMessage; use rdkafka::client::ClientContext; use rdkafka::config::ClientConfig; use rdkafka::consumer::{Consumer, ConsumerContext, Rebalance, StreamConsumer}; +use rdkafka::topic_partition_list::TopicPartitionList; use tokio_util::sync::CancellationToken; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use crate::config::KafkaConfig; use crate::domain::{AccountUpdate, PubkeyFilter, bytes_to_base58}; @@ -34,8 +35,8 @@ impl ConsumerContext for ReadinessConsumerContext { self.readiness.mark_kafka_ready(); info!( group_id = self.group_id, - partition_count = assignment.count(), - "Kafka consumer received partition assignment" + partitions = ?topic_partition_list(assignment), + "Kafka partitions assigned" ); } Rebalance::Assign(_) => {} @@ -43,8 +44,8 @@ impl ConsumerContext for ReadinessConsumerContext { self.readiness.mark_kafka_not_ready(); info!( group_id = self.group_id, - partition_count = partitions.count(), - "Kafka consumer partitions revoked" + partitions = ?topic_partition_list(partitions), + "Kafka partitions revoked" ); } Rebalance::Error(err) => { @@ -59,6 +60,16 @@ impl ConsumerContext for ReadinessConsumerContext { } } +fn topic_partition_list(partitions: &TopicPartitionList) -> Vec { + partitions + .elements() + .iter() + .map(|partition| { + format!("{}:{}", partition.topic(), partition.partition()) + }) + .collect() +} + pub struct KafkaAccountUpdateStream { config: KafkaConfig, readiness: ServiceReadiness, @@ -142,6 +153,14 @@ impl KafkaAccountUpdateStream { source, })?; + info!( + broker = self.config.bootstrap_servers, + topic = self.config.topic, + group_id = self.config.group_id, + auto_offset_reset = self.config.auto_offset_reset, + "Kafka consumer created" + ); + consumer .subscribe(&[&self.config.topic]) .map_err(|source| GeykagError::KafkaSubscribe { @@ -150,13 +169,9 @@ impl KafkaAccountUpdateStream { })?; info!( - broker = self.config.bootstrap_servers, topic = self.config.topic, group_id = self.config.group_id, - auto_offset_reset = self.config.auto_offset_reset, - pubkey_filter = - filter.map(PubkeyFilter::as_str).unwrap_or("(none)"), - "listening for Kafka messages" + "Kafka subscribe issued" ); let mut stream = consumer.stream(); @@ -192,6 +207,15 @@ impl KafkaAccountUpdateStream { continue; } + debug!( + group_id = self.config.group_id, + partition = msg.partition(), + offset = msg.offset(), + pubkey = %account.pubkey_b58, + write_version = account.write_version, + "Kafka message consumed" + ); + handler(StreamMessage { account, partition: msg.partition(), diff --git a/grpc-service/src/main.rs b/grpc-service/src/main.rs index d4fa2a3..66258be 100644 --- a/grpc-service/src/main.rs +++ b/grpc-service/src/main.rs @@ -18,6 +18,7 @@ use crate::config::Config; #[tokio::main] async fn main() -> Result<()> { init_tracing(); + tracing::info!("grpc-service process starting"); let config = Config::load()?; let shutdown = CancellationToken::new(); let (app, grpc_handle) = From 328592d8e73b25a3f0d1114f86f4f27df32c2496 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Sun, 3 May 2026 17:07:55 +0700 Subject: [PATCH 47/68] feat(kafka-setup): tune local kafka rebalance timing --- ix-tests/configs/grpc-service/service-1.toml | 4 ++++ ix-tests/configs/grpc-service/service-2.toml | 4 ++++ kafka-setup/README.md | 4 ++++ kafka-setup/docker-compose.yml | 1 + 4 files changed, 13 insertions(+) diff --git a/ix-tests/configs/grpc-service/service-1.toml b/ix-tests/configs/grpc-service/service-1.toml index 4c537e7..c38cf54 100644 --- a/ix-tests/configs/grpc-service/service-1.toml +++ b/ix-tests/configs/grpc-service/service-1.toml @@ -4,6 +4,10 @@ topic = "solana.testnet.account_updates" group_id = "ix-tests-service-1" auto_offset_reset = "latest" +[kafka.client] +session.timeout.ms = "6000" +heartbeat.interval.ms = "2000" + [ksql] url = "http://localhost:8088" table = "ACCOUNTS" diff --git a/ix-tests/configs/grpc-service/service-2.toml b/ix-tests/configs/grpc-service/service-2.toml index ae5a348..6144ed2 100644 --- a/ix-tests/configs/grpc-service/service-2.toml +++ b/ix-tests/configs/grpc-service/service-2.toml @@ -4,6 +4,10 @@ topic = "solana.testnet.account_updates" group_id = "ix-tests-service-2" auto_offset_reset = "latest" +[kafka.client] +session.timeout.ms = "6000" +heartbeat.interval.ms = "2000" + [ksql] url = "http://localhost:8088" table = "ACCOUNTS" diff --git a/kafka-setup/README.md b/kafka-setup/README.md index de7ae32..e540e12 100644 --- a/kafka-setup/README.md +++ b/kafka-setup/README.md @@ -15,3 +15,7 @@ Available workflows: environment. It rebuilds the Kafka topic and the dependent ksqlDB state without restarting Docker or re-running the broader `make ready` workflow. + +The local compose stack sets `KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0` +to reduce Kafka consumer cold-start delay during development and +integration tests. diff --git a/kafka-setup/docker-compose.yml b/kafka-setup/docker-compose.yml index df9a5eb..78a2eb6 100644 --- a/kafka-setup/docker-compose.yml +++ b/kafka-setup/docker-compose.yml @@ -14,6 +14,7 @@ services: KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 networks: - ksqldb zookeeper: From 20fc740548638eedad0ff4cf7551268a9220f755 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 18:17:29 +0800 Subject: [PATCH 48/68] fix: align grpc probe and bind ports --- ix-tests/configs/grpc-service/service-1.toml | 2 +- ix-tests/configs/grpc-service/service-2.toml | 2 +- ix-tests/src/service.rs | 77 +++++++++++++++++++- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/ix-tests/configs/grpc-service/service-1.toml b/ix-tests/configs/grpc-service/service-1.toml index c38cf54..2c573f1 100644 --- a/ix-tests/configs/grpc-service/service-1.toml +++ b/ix-tests/configs/grpc-service/service-1.toml @@ -18,5 +18,5 @@ rpc_url = "http://127.0.0.1:8899" [grpc] bind_host = "0.0.0.0" -port = 51051 +port = 50051 dispatcher_capacity = 4096 diff --git a/ix-tests/configs/grpc-service/service-2.toml b/ix-tests/configs/grpc-service/service-2.toml index 6144ed2..223eeb8 100644 --- a/ix-tests/configs/grpc-service/service-2.toml +++ b/ix-tests/configs/grpc-service/service-2.toml @@ -18,5 +18,5 @@ rpc_url = "http://127.0.0.1:8899" [grpc] bind_host = "0.0.0.0" -port = 51052 +port = 50052 dispatcher_capacity = 4096 diff --git a/ix-tests/src/service.rs b/ix-tests/src/service.rs index 5defba9..54a675b 100644 --- a/ix-tests/src/service.rs +++ b/ix-tests/src/service.rs @@ -1,4 +1,7 @@ -use std::path::PathBuf; +use std::net::SocketAddr; +use std::path::{Path, PathBuf}; + +use serde::Deserialize; use std::time::Duration; use anyhow::{Context, bail}; @@ -14,6 +17,7 @@ use crate::config::SuiteConfig; use crate::layout::ServiceInstance; const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); +const DEFAULT_SERVICE_GRPC_PORT: u16 = 50051; #[allow(dead_code)] pub enum ServiceOwnership { @@ -44,6 +48,18 @@ pub struct ServiceController { service_start_timeout: Duration, } +#[derive(Debug, Deserialize)] +struct FileServiceConfig { + #[serde(default)] + grpc: Option, +} + +#[derive(Debug, Deserialize)] +struct FileServiceGrpcConfig { + #[serde(default)] + port: Option, +} + pub struct ServiceSpec { pub instance: ServiceInstance, pub config_path: PathBuf, @@ -58,20 +74,73 @@ impl ServiceSpec { config_path: PathBuf::from( "ix-tests/configs/grpc-service/service-1.toml", ), - endpoint: "http://127.0.0.1:51051".to_owned(), + endpoint: "http://127.0.0.1:50051".to_owned(), }, ServiceInstance::Two => Self { instance, config_path: PathBuf::from( "ix-tests/configs/grpc-service/service-2.toml", ), - endpoint: "http://127.0.0.1:51052".to_owned(), + endpoint: "http://127.0.0.1:50052".to_owned(), }, } } } impl ServiceController { + fn endpoint_port(endpoint: &str) -> anyhow::Result { + let socket_addr = endpoint + .strip_prefix("http://") + .ok_or_else(|| { + anyhow::anyhow!("unsupported endpoint format: {endpoint}") + })? + .parse::() + .with_context(|| { + format!("failed to parse endpoint as host:port: {endpoint}") + })?; + Ok(socket_addr.port()) + } + + fn configured_grpc_port(config_path: &Path) -> anyhow::Result { + let config_text = + std::fs::read_to_string(config_path).with_context(|| { + format!( + "failed to read service config: {}", + config_path.display() + ) + })?; + let file: FileServiceConfig = toml::from_str(&config_text) + .with_context(|| { + format!( + "failed to parse service config: {}", + config_path.display() + ) + })?; + Ok(file + .grpc + .and_then(|grpc| grpc.port) + .unwrap_or(DEFAULT_SERVICE_GRPC_PORT)) + } + + fn validate_spec_matches_config( + &self, + spec: &ServiceSpec, + ) -> anyhow::Result<()> { + let endpoint_port = Self::endpoint_port(&spec.endpoint)?; + let config_port = Self::configured_grpc_port(&spec.config_path)?; + if endpoint_port != config_port { + bail!( + "refusing to continue for {:?}: probe endpoint {} uses port {}, but template {} binds port {}; probe/reuse and spawned-service bind port would diverge", + spec.instance, + spec.endpoint, + endpoint_port, + spec.config_path.display(), + config_port, + ); + } + Ok(()) + } + pub fn new(config: &SuiteConfig) -> Self { Self { service_binary: config.service_binary.clone(), @@ -140,6 +209,8 @@ impl ServiceController { spec: &ServiceSpec, artifacts: &RunArtifacts, ) -> anyhow::Result { + self.validate_spec_matches_config(spec)?; + if self.probe_ready(&spec.endpoint).await { info!( endpoint = %spec.endpoint, From 45fb673a8e63c628418fb18aa4844adb6b023b42 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 18:37:26 +0800 Subject: [PATCH 49/68] fix: abort receive task on shutdown Amp-Thread-ID: https://ampcode.com/threads/T-019df7b6-44aa-768c-818c-87ade4cf3e44 Co-authored-by: Amp --- ix-tests/src/client.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/ix-tests/src/client.rs b/ix-tests/src/client.rs index b045514..b1b8566 100644 --- a/ix-tests/src/client.rs +++ b/ix-tests/src/client.rs @@ -216,6 +216,7 @@ impl TestGrpcClient { pub async fn shutdown(self) -> anyhow::Result<()> { drop(self.request_tx); + self.receive_task.abort(); match self.receive_task.await { Ok(result) => result, Err(e) if e.is_cancelled() => Ok(()), From 22ab4d788bff329309ffdbb849764770d1cfedc9 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 18:50:39 +0800 Subject: [PATCH 50/68] fix: toml issues --- ix-tests/configs/grpc-service/service-1.toml | 4 ++-- ix-tests/configs/grpc-service/service-2.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ix-tests/configs/grpc-service/service-1.toml b/ix-tests/configs/grpc-service/service-1.toml index 2c573f1..d80b3b1 100644 --- a/ix-tests/configs/grpc-service/service-1.toml +++ b/ix-tests/configs/grpc-service/service-1.toml @@ -5,8 +5,8 @@ group_id = "ix-tests-service-1" auto_offset_reset = "latest" [kafka.client] -session.timeout.ms = "6000" -heartbeat.interval.ms = "2000" +"session.timeout.ms" = "6000" +"heartbeat.interval.ms" = "2000" [ksql] url = "http://localhost:8088" diff --git a/ix-tests/configs/grpc-service/service-2.toml b/ix-tests/configs/grpc-service/service-2.toml index 223eeb8..5d3bad7 100644 --- a/ix-tests/configs/grpc-service/service-2.toml +++ b/ix-tests/configs/grpc-service/service-2.toml @@ -5,8 +5,8 @@ group_id = "ix-tests-service-2" auto_offset_reset = "latest" [kafka.client] -session.timeout.ms = "6000" -heartbeat.interval.ms = "2000" +"session.timeout.ms" = "6000" +"heartbeat.interval.ms" = "2000" [ksql] url = "http://localhost:8088" From 1d80a3b54da451eb29778a4fcf855ed74314a7c9 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 19:16:54 +0800 Subject: [PATCH 51/68] feat: switch ix-tests to random keypairs per run Amp-Thread-ID: https://ampcode.com/threads/T-019df7da-5102-7128-baa4-52698cee5051 Co-authored-by: Amp --- ix-tests/src/accounts.rs | 231 +++++---------------------------------- ix-tests/src/main.rs | 2 +- 2 files changed, 31 insertions(+), 202 deletions(-) diff --git a/ix-tests/src/accounts.rs b/ix-tests/src/accounts.rs index 886dc39..7dc828f 100644 --- a/ix-tests/src/accounts.rs +++ b/ix-tests/src/accounts.rs @@ -1,9 +1,9 @@ +use std::collections::HashMap; + use solana_keypair::Keypair; use solana_pubkey::Pubkey; use solana_signer::Signer; -use crate::scenario::ScenarioName; - #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum NamedAccount { SimpleA, @@ -27,221 +27,50 @@ pub enum NamedAccount { Hot09, } -impl NamedAccount { - fn all() -> &'static [NamedAccount] { - &[ - Self::SimpleA, - Self::SimpleB, - Self::SimpleC, - Self::SimpleD, - Self::SharedA, - Self::SharedB, - Self::RestartA, - Self::RestartB, - Self::OwnerData, - Self::Hot00, - Self::Hot01, - Self::Hot02, - Self::Hot03, - Self::Hot04, - Self::Hot05, - Self::Hot06, - Self::Hot07, - Self::Hot08, - Self::Hot09, - ] - } - - fn index(self) -> usize { - match self { - Self::SimpleA => 0, - Self::SimpleB => 1, - Self::SimpleC => 2, - Self::SimpleD => 3, - Self::SharedA => 4, - Self::SharedB => 5, - Self::RestartA => 6, - Self::RestartB => 7, - Self::OwnerData => 8, - Self::Hot00 => 9, - Self::Hot01 => 10, - Self::Hot02 => 11, - Self::Hot03 => 12, - Self::Hot04 => 13, - Self::Hot05 => 14, - Self::Hot06 => 15, - Self::Hot07 => 16, - Self::Hot08 => 17, - Self::Hot09 => 18, - } - } -} - -const ACCOUNT_COUNT: usize = 19; - -/// Base seeds — one unique 32-byte array per named account. -/// Each scenario derives its own seeds by XOR-ing with a per-scenario byte. -const BASE_SEEDS: [[u8; 32]; ACCOUNT_COUNT] = [ - // SimpleA - [ - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, - 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, - ], - // SimpleB - [ - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, - 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, - ], - // SimpleC - [ - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, - 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - ], - // SimpleD - [ - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, - 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, - ], - // SharedA - [ - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, - 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, - 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, - ], - // SharedB - [ - 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, - 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - ], - // RestartA - [ - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, - 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - ], - // RestartB - [ - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, - 0xFC, 0xFD, 0xFE, 0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, - ], - // OwnerData - [ - 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, - 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, - 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, - ], - // Hot00 - [ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, - 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, - 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - ], - // Hot01 - [ - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, - 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, - ], - // Hot02 - [ - 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, - 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, - ], - // Hot03 - [ - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, - 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, - ], - // Hot04 - [ - 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, - 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, - ], - // Hot05 - [ - 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, - 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, - 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, - ], - // Hot06 - [ - 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, - 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, - 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x00, - ], - // Hot07 - [ - 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, - 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, - 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, - ], - // Hot08 - [ - 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, - 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, - ], - // Hot09 - [ - 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, - 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, - ], +const ALL_NAMED_ACCOUNTS: &[NamedAccount] = &[ + NamedAccount::SimpleA, + NamedAccount::SimpleB, + NamedAccount::SimpleC, + NamedAccount::SimpleD, + NamedAccount::SharedA, + NamedAccount::SharedB, + NamedAccount::RestartA, + NamedAccount::RestartB, + NamedAccount::OwnerData, + NamedAccount::Hot00, + NamedAccount::Hot01, + NamedAccount::Hot02, + NamedAccount::Hot03, + NamedAccount::Hot04, + NamedAccount::Hot05, + NamedAccount::Hot06, + NamedAccount::Hot07, + NamedAccount::Hot08, + NamedAccount::Hot09, ]; -fn scenario_xor_byte(scenario: ScenarioName) -> u8 { - match scenario { - ScenarioName::All => unreachable!("All is expanded before accounts"), - ScenarioName::SingleTriage => 0xEE, - ScenarioName::SingleBasic => 0xAA, - ScenarioName::SingleLoad => 0xBB, - ScenarioName::DualConcurrent => 0xCC, - ScenarioName::DualRestart => 0xDD, - } -} - -fn derive_seed(scenario: ScenarioName, account: NamedAccount) -> [u8; 32] { - let base = BASE_SEEDS[account.index()]; - let xor = scenario_xor_byte(scenario); - let mut seed = [0u8; 32]; - for i in 0..32 { - seed[i] = base[i] ^ xor; - } - seed -} - pub struct ScenarioAccounts { - keypairs: Vec, + keypairs: HashMap, } #[allow(dead_code)] impl ScenarioAccounts { - pub fn for_scenario(name: ScenarioName) -> Self { - let keypairs = NamedAccount::all() + pub fn new() -> Self { + let keypairs = ALL_NAMED_ACCOUNTS .iter() - .map(|account| { - let seed = derive_seed(name, *account); - Keypair::new_from_array(seed) - }) + .map(|account| (*account, Keypair::new())) .collect(); Self { keypairs } } pub fn keypair(&self, account: NamedAccount) -> &Keypair { - &self.keypairs[account.index()] + self.keypairs + .get(&account) + .expect("ScenarioAccounts missing keypair for account") } pub fn pubkey(&self, account: NamedAccount) -> Pubkey { - self.keypairs[account.index()].pubkey() + self.keypair(account).pubkey() } pub fn pubkey_b58(&self, account: NamedAccount) -> String { diff --git a/ix-tests/src/main.rs b/ix-tests/src/main.rs index a79734d..8b96fce 100644 --- a/ix-tests/src/main.rs +++ b/ix-tests/src/main.rs @@ -70,7 +70,7 @@ async fn main() -> anyhow::Result<()> { service_controller: ServiceController::new(&config), validator: ValidatorDriver::new(&config), checkpoint_runner: CheckpointRunner::new(&config), - accounts: ScenarioAccounts::for_scenario(*scenario), + accounts: ScenarioAccounts::new(), }; match scenarios::run_scenario(*scenario, &ctx).await { From 0aff553d314a10f265aef273ba4253d260916e4b Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 19:17:23 +0800 Subject: [PATCH 52/68] chore: add tracing::info! logging for generated NamedAccount pubkey mapping --- ix-tests/src/accounts.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ix-tests/src/accounts.rs b/ix-tests/src/accounts.rs index 7dc828f..7500b63 100644 --- a/ix-tests/src/accounts.rs +++ b/ix-tests/src/accounts.rs @@ -56,10 +56,20 @@ pub struct ScenarioAccounts { #[allow(dead_code)] impl ScenarioAccounts { pub fn new() -> Self { - let keypairs = ALL_NAMED_ACCOUNTS + let keypairs: HashMap = ALL_NAMED_ACCOUNTS .iter() .map(|account| (*account, Keypair::new())) .collect(); + + let mapping: Vec = ALL_NAMED_ACCOUNTS + .iter() + .map(|account| { + format!("{:?} → {}", account, keypairs[account].pubkey()) + }) + .collect(); + + tracing::info!(accounts = ?mapping, "generated random ScenarioAccounts pubkeys"); + Self { keypairs } } From c05a7e32281f0830c8ff926da12d2c8628235fc1 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 5 May 2026 21:24:23 +0800 Subject: [PATCH 53/68] chore: fix single basic scenario assert --- ix-tests/src/scenarios/single_basic.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index 8fc737b..32b0f92 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -137,7 +137,24 @@ async fn run_inner( let rent_lamports = ctx.validator.rent_exempt_balance(OWNER_DATA_SPACE).await?; - ctx.validator.airdrop(&owner_data, rent_lamports).await?; + let owner_data_airdrop_sig = + ctx.validator.airdrop(&owner_data, rent_lamports).await?; + + let owner_data_funding_checkpoint = CheckpointSpec { + name: "owner-data-funding", + checkpoints: vec![ClientCheckpoint { + client_id: 3, + required: vec![ExpectedUpdate { + pubkey_b58: Some(owner_data.to_string()), + lamports: Some(rent_lamports), + txn_signature_b58: Some(Some(owner_data_airdrop_sig)), + ..Default::default() + }], + }], + }; + ctx.checkpoint_runner + .wait_until_satisfied(&owner_data_funding_checkpoint, clients) + .await?; let synthetic_owner = Pubkey::new_from_array(SYNTHETIC_OWNER_BYTES); let owner_data_sig = ctx @@ -152,6 +169,7 @@ async fn run_inner( let owner_data_expected = ExpectedUpdate { pubkey_b58: Some(owner_data.to_string()), owner_b58: Some(synthetic_owner.to_string()), + lamports: Some(rent_lamports), txn_signature_b58: Some(Some(owner_data_sig)), data: None, ..Default::default() From 6c02ca6a4f3a9129a9d827aeafd60ac2ee21121f Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 15:37:14 +0800 Subject: [PATCH 54/68] fix: accounts need to be rent exempt --- ix-tests/src/scenarios/single_load.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index d6b6209..bc0ca2a 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -59,12 +59,14 @@ async fn run_inner( ctx.validator.fund_payer().await?; + let rent_exempt_lamports = ctx.validator.rent_exempt_balance(0).await?; + let mut expected_updates = Vec::new(); for index in 1..=25u64 { let (account, lamports) = if index % 2 == 1 { - (NamedAccount::SharedA, 10_000 + index) + (NamedAccount::SharedA, rent_exempt_lamports + 10_000 + index) } else { - (NamedAccount::SharedB, 20_000 + index) + (NamedAccount::SharedB, rent_exempt_lamports + 20_000 + index) }; let pubkey_b58 = ctx.accounts.pubkey_b58(account); let sig = ctx From 83532bda5008957d5a31231043e04cc4c5a65a76 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 15:41:03 +0800 Subject: [PATCH 55/68] chore: parallel airdrops --- ix-tests/src/scenarios/dual_concurrent.rs | 18 ++++----- ix-tests/src/scenarios/dual_restart.rs | 43 ++++++++++---------- ix-tests/src/scenarios/single_basic.rs | 13 ++++-- ix-tests/src/scenarios/single_load.rs | 48 ++++++++++++++--------- ix-tests/src/validator.rs | 13 ++++++ 5 files changed, 82 insertions(+), 53 deletions(-) diff --git a/ix-tests/src/scenarios/dual_concurrent.rs b/ix-tests/src/scenarios/dual_concurrent.rs index c27e13f..9cc48d8 100644 --- a/ix-tests/src/scenarios/dual_concurrent.rs +++ b/ix-tests/src/scenarios/dual_concurrent.rs @@ -91,18 +91,16 @@ async fn run_inner( ctx.validator.fund_payer().await?; - let simple_a_sig = ctx + let sigs = ctx .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleA), 1_111_111) - .await?; - let simple_b_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SimpleB), 2_222_222) - .await?; - let shared_a_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedA), 3_333_333) + .airdrops(vec![ + (ctx.accounts.pubkey(NamedAccount::SimpleA), 1_111_111), + (ctx.accounts.pubkey(NamedAccount::SimpleB), 2_222_222), + (ctx.accounts.pubkey(NamedAccount::SharedA), 3_333_333), + ]) .await?; + let [simple_a_sig, simple_b_sig, shared_a_sig]: [String; 3] = + sigs.try_into().expect("expected three airdrop signatures"); let simple_a_expected = ExpectedUpdate { pubkey_b58: Some(simple_a), diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index e91189a..d1843a1 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -75,18 +75,16 @@ async fn run_inner( ctx.validator.fund_payer().await?; - let restart_a_sig = ctx + let sigs = ctx .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 4_444_444) - .await?; - let restart_b_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartB), 5_555_555) - .await?; - let shared_b_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 6_666_666) + .airdrops(vec![ + (ctx.accounts.pubkey(NamedAccount::RestartA), 4_444_444), + (ctx.accounts.pubkey(NamedAccount::RestartB), 5_555_555), + (ctx.accounts.pubkey(NamedAccount::SharedB), 6_666_666), + ]) .await?; + let [restart_a_sig, restart_b_sig, shared_b_sig]: [String; 3] = + sigs.try_into().expect("expected three airdrop signatures"); let pre_restart = CheckpointSpec { name: "pre-restart", @@ -135,13 +133,15 @@ async fn run_inner( let parked_logs = shutdown_service_one_clients(active_clients).await?; shutdown_service(&ctx.service_controller, service_one).await?; - ctx.validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 7_777_777) - .await?; - let during_shared_b_sig = ctx + let sigs = ctx .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 8_888_888) + .airdrops(vec![ + (ctx.accounts.pubkey(NamedAccount::RestartA), 7_777_777), + (ctx.accounts.pubkey(NamedAccount::SharedB), 8_888_888), + ]) .await?; + let [_during_restart_a_sig, during_shared_b_sig]: [String; 2] = + sigs.try_into().expect("expected two airdrop signatures"); assert_logs_unchanged(&parked_logs)?; @@ -178,14 +178,15 @@ async fn run_inner( ) .await?; - let post_restart_a_sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::RestartA), 9_999_999) - .await?; - let post_shared_b_sig = ctx + let sigs = ctx .validator - .airdrop(&ctx.accounts.pubkey(NamedAccount::SharedB), 10_101_010) + .airdrops(vec![ + (ctx.accounts.pubkey(NamedAccount::RestartA), 9_999_999), + (ctx.accounts.pubkey(NamedAccount::SharedB), 10_101_010), + ]) .await?; + let [post_restart_a_sig, post_shared_b_sig]: [String; 2] = + sigs.try_into().expect("expected two airdrop signatures"); let post_restart = CheckpointSpec { name: "post-restart", diff --git a/ix-tests/src/scenarios/single_basic.rs b/ix-tests/src/scenarios/single_basic.rs index 32b0f92..eee12f5 100644 --- a/ix-tests/src/scenarios/single_basic.rs +++ b/ix-tests/src/scenarios/single_basic.rs @@ -102,9 +102,16 @@ async fn run_inner( // correct lamports and signatures ctx.validator.fund_payer().await?; - let simple_a_sig = ctx.validator.airdrop(&simple_a, 1_000_000).await?; - let simple_b_sig = ctx.validator.airdrop(&simple_b, 2_000_000).await?; - let simple_c_sig = ctx.validator.airdrop(&simple_c, 3_000_000).await?; + let sigs = ctx + .validator + .airdrops(vec![ + (simple_a, 1_000_000), + (simple_b, 2_000_000), + (simple_c, 3_000_000), + ]) + .await?; + let [simple_a_sig, simple_b_sig, simple_c_sig]: [String; 3] = + sigs.try_into().expect("expected three airdrop signatures"); let basic_checkpoint = CheckpointSpec { name: "basic-lamports", diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index bc0ca2a..27b6f26 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -61,25 +61,35 @@ async fn run_inner( let rent_exempt_lamports = ctx.validator.rent_exempt_balance(0).await?; - let mut expected_updates = Vec::new(); - for index in 1..=25u64 { - let (account, lamports) = if index % 2 == 1 { - (NamedAccount::SharedA, rent_exempt_lamports + 10_000 + index) - } else { - (NamedAccount::SharedB, rent_exempt_lamports + 20_000 + index) - }; - let pubkey_b58 = ctx.accounts.pubkey_b58(account); - let sig = ctx - .validator - .airdrop(&ctx.accounts.pubkey(account), lamports) - .await?; - expected_updates.push(ExpectedUpdate { - pubkey_b58: Some(pubkey_b58), - lamports: Some(lamports), - txn_signature_b58: Some(Some(sig)), - ..Default::default() - }); - } + let airdrop_specs = (1..=25u64) + .map(|index| { + let (account, lamports) = if index % 2 == 1 { + (NamedAccount::SharedA, rent_exempt_lamports + 10_000 + index) + } else { + (NamedAccount::SharedB, rent_exempt_lamports + 20_000 + index) + }; + (account, ctx.accounts.pubkey(account), lamports) + }) + .collect::>(); + let airdrop_requests = airdrop_specs + .iter() + .map(|(_, pubkey, lamports)| (*pubkey, *lamports)) + .collect(); + let sigs = ctx.validator.airdrops(airdrop_requests).await?; + + let expected_updates = airdrop_specs + .into_iter() + .zip(sigs) + .map(|(spec, sig)| { + let (account, _pubkey, lamports) = spec; + ExpectedUpdate { + pubkey_b58: Some(ctx.accounts.pubkey_b58(account)), + lamports: Some(lamports), + txn_signature_b58: Some(Some(sig)), + ..Default::default() + } + }) + .collect::>(); let client_specs = (0..100) .map(|client_id| ClientCheckpoint { diff --git a/ix-tests/src/validator.rs b/ix-tests/src/validator.rs index 085dc7f..f05e852 100644 --- a/ix-tests/src/validator.rs +++ b/ix-tests/src/validator.rs @@ -1,6 +1,7 @@ use std::time::Duration; use anyhow::Context; +use futures::future::try_join_all; use solana_keypair::Keypair; use solana_pubkey::Pubkey; use solana_rpc_client::{ @@ -66,6 +67,18 @@ impl ValidatorDriver { Ok(sig.to_string()) } + pub async fn airdrops( + &self, + requests: Vec<(Pubkey, u64)>, + ) -> anyhow::Result> { + try_join_all( + requests + .iter() + .map(|(pubkey, lamports)| self.airdrop(pubkey, *lamports)), + ) + .await + } + pub async fn transfer( &self, from: &Keypair, From ae878d25f67349a002e042a2d70ea9ad36da6b48 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 15:44:47 +0800 Subject: [PATCH 56/68] fix: report failed tx status --- ix-tests/src/validator.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/ix-tests/src/validator.rs b/ix-tests/src/validator.rs index f05e852..57272f9 100644 --- a/ix-tests/src/validator.rs +++ b/ix-tests/src/validator.rs @@ -164,11 +164,33 @@ impl ValidatorDriver { sig: &solana_signature::Signature, ) -> anyhow::Result<()> { let deadline = tokio::time::Instant::now() + self.transaction_timeout; + let mut last_status_error = None; loop { - if self.rpc.confirm_transaction(sig).await.unwrap_or(false) { - return Ok(()); + match self + .rpc + .get_signature_status_with_commitment( + sig, + CommitmentConfig::confirmed(), + ) + .await + { + Ok(Some(Ok(()))) => return Ok(()), + Ok(Some(Err(err))) => { + anyhow::bail!("transaction {sig} failed: {err:?}"); + } + Ok(None) => {} + Err(err) => { + last_status_error = Some(err.to_string()); + } } + if tokio::time::Instant::now() >= deadline { + if let Some(error) = last_status_error { + anyhow::bail!( + "transaction {sig} not confirmed within {:?}; last status check error: {error}", + self.transaction_timeout + ); + } anyhow::bail!( "transaction {sig} not confirmed within {:?}", self.transaction_timeout From a0028baaef0fbdaf36ffbc24fbd83cfc0f9f2955 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 15:50:44 +0800 Subject: [PATCH 57/68] chore: confirm sig improvements --- ix-tests/src/expectation.rs | 93 ++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index 458d15c..ccb6e6f 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -45,6 +45,18 @@ pub struct CheckpointRunner { #[allow(dead_code)] impl ExpectedUpdate { pub fn matches(&self, observed: &ObservedUpdate) -> bool { + let mismatches = self.mismatches(observed); + if !mismatches.is_empty() { + warn!("Mismatches:\n {}", mismatches.join("\n ")); + } + mismatches.is_empty() + } + + fn matches_quietly(&self, observed: &ObservedUpdate) -> bool { + self.mismatches(observed).is_empty() + } + + fn mismatches(&self, observed: &ObservedUpdate) -> Vec { let mut mismatches = Vec::new(); if let Some(expected) = &self.pubkey_b58 && observed.pubkey_b58 != *expected @@ -124,10 +136,7 @@ impl ExpectedUpdate { )); } - if !mismatches.is_empty() { - warn!("Mismatches:\n {}", mismatches.join("\n ")); - } - mismatches.is_empty() + mismatches } } @@ -159,43 +168,51 @@ impl CheckpointRunner { ) })?; - for (idx, expected) in check_point.required.iter().enumerate() { - 'retry: loop { - let client_state = client.log().consume_next_update(); - if let Some(observed) = client_state { - if expected.matches(&observed) { - trace!( - checkpoint = spec.name, - idx, - client_id = check_point.client_id, - "matched expected update: {:#?}", - expected - ); - break 'retry; - } else { - error!( - checkpoint = spec.name, - idx, - client_id = check_point.client_id, - "expected update did not match observed update.\nExpected: {:#?}\nObserved: {:#?}", - expected, - observed - ); - bail!( - "checkpoint '{}' idx: {} failed for client {}", - spec.name, - idx, - check_point.client_id - ); - } - } else if Instant::now() > deadline { - bail!( - "checkpoint '{}' idx: {} timed out waiting for client {}", - spec.name, + let mut matched = vec![false; check_point.required.len()]; + while matched.iter().any(|is_matched| !*is_matched) { + let client_state = client.log().consume_next_update(); + if let Some(observed) = client_state { + let matched_idx = + check_point.required.iter().enumerate().find_map( + |(idx, expected)| { + (!matched[idx] + && expected.matches_quietly(&observed)) + .then_some(idx) + }, + ); + + if let Some(idx) = matched_idx { + matched[idx] = true; + trace!( + checkpoint = spec.name, idx, - check_point.client_id + client_id = check_point.client_id, + "matched expected update: {:#?}", + check_point.required[idx] + ); + } else { + debug!( + checkpoint = spec.name, + client_id = check_point.client_id, + observed = ?observed, + "skipping non-required update while waiting for checkpoint" ); } + } else if Instant::now() > deadline { + let missing = matched + .iter() + .enumerate() + .filter_map(|(idx, is_matched)| { + (!*is_matched).then_some(idx) + }) + .collect::>(); + bail!( + "checkpoint '{}' timed out waiting for client {}; missing required update indexes {:?}", + spec.name, + check_point.client_id, + missing + ); + } else { sleep(Duration::from_millis(50)).await; } } From aa9682735a9f4c80e31f3d268f7df1368bc82886 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 15:56:26 +0800 Subject: [PATCH 58/68] fix: relax load checkpoint matching --- ix-tests/src/expectation.rs | 37 +++++++++++++++++++------ ix-tests/src/scenarios/single_load.rs | 39 ++++++++++++++++++--------- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/ix-tests/src/expectation.rs b/ix-tests/src/expectation.rs index ccb6e6f..43440e7 100644 --- a/ix-tests/src/expectation.rs +++ b/ix-tests/src/expectation.rs @@ -172,14 +172,11 @@ impl CheckpointRunner { while matched.iter().any(|is_matched| !*is_matched) { let client_state = client.log().consume_next_update(); if let Some(observed) = client_state { - let matched_idx = - check_point.required.iter().enumerate().find_map( - |(idx, expected)| { - (!matched[idx] - && expected.matches_quietly(&observed)) - .then_some(idx) - }, - ); + let matched_idx = next_required_match( + &check_point.required, + &matched, + &observed, + ); if let Some(idx) = matched_idx { matched[idx] = true; @@ -221,6 +218,30 @@ impl CheckpointRunner { } } +fn next_required_match( + required: &[ExpectedUpdate], + matched: &[bool], + observed: &ObservedUpdate, +) -> Option { + let next_for_observed_pubkey = + required.iter().enumerate().find(|(idx, expected)| { + !matched[*idx] + && expected.pubkey_b58.as_deref() + == Some(observed.pubkey_b58.as_str()) + }); + + if let Some((idx, expected)) = next_for_observed_pubkey { + return expected.matches_quietly(observed).then_some(idx); + } + + required.iter().enumerate().find_map(|(idx, expected)| { + (!matched[idx] + && expected.pubkey_b58.is_none() + && expected.matches_quietly(observed)) + .then_some(idx) + }) +} + #[cfg(test)] mod tests { use crate::layout::ServiceInstance; diff --git a/ix-tests/src/scenarios/single_load.rs b/ix-tests/src/scenarios/single_load.rs index 27b6f26..1d53ed0 100644 --- a/ix-tests/src/scenarios/single_load.rs +++ b/ix-tests/src/scenarios/single_load.rs @@ -75,21 +75,34 @@ async fn run_inner( .iter() .map(|(_, pubkey, lamports)| (*pubkey, *lamports)) .collect(); - let sigs = ctx.validator.airdrops(airdrop_requests).await?; + ctx.validator.airdrops(airdrop_requests).await?; - let expected_updates = airdrop_specs - .into_iter() - .zip(sigs) - .map(|(spec, sig)| { - let (account, _pubkey, lamports) = spec; - ExpectedUpdate { - pubkey_b58: Some(ctx.accounts.pubkey_b58(account)), - lamports: Some(lamports), - txn_signature_b58: Some(Some(sig)), - ..Default::default() + let (shared_a_balance, shared_b_balance) = airdrop_specs.iter().fold( + (0, 0), + |(shared_a_balance, shared_b_balance), (account, _pubkey, lamports)| { + match account { + NamedAccount::SharedA => { + (shared_a_balance + lamports, shared_b_balance) + } + NamedAccount::SharedB => { + (shared_a_balance, shared_b_balance + lamports) + } + _ => unreachable!("single-load only airdrops shared accounts"), } - }) - .collect::>(); + }, + ); + let expected_updates = vec![ + ExpectedUpdate { + pubkey_b58: Some(ctx.accounts.pubkey_b58(NamedAccount::SharedA)), + lamports: Some(shared_a_balance), + ..Default::default() + }, + ExpectedUpdate { + pubkey_b58: Some(ctx.accounts.pubkey_b58(NamedAccount::SharedB)), + lamports: Some(shared_b_balance), + ..Default::default() + }, + ]; let client_specs = (0..100) .map(|client_id| ClientCheckpoint { From 8b7ee7e2e10294f268c8fd8e1954e00c5e57cc64 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Mon, 11 May 2026 17:49:57 +0800 Subject: [PATCH 59/68] fix: name dual restart balances --- ix-tests/src/scenarios/dual_restart.rs | 87 +++++++++++++++++++++----- 1 file changed, 72 insertions(+), 15 deletions(-) diff --git a/ix-tests/src/scenarios/dual_restart.rs b/ix-tests/src/scenarios/dual_restart.rs index d1843a1..90643c0 100644 --- a/ix-tests/src/scenarios/dual_restart.rs +++ b/ix-tests/src/scenarios/dual_restart.rs @@ -9,6 +9,38 @@ use crate::observation::ClientLog; use crate::scenarios::ScenarioFailure; use crate::service::{ServiceHandle, ServiceSpec}; +const PRE_RESTART_A_AIRDROP_LAMPORTS: u64 = 4_444_444; +const PRE_RESTART_B_AIRDROP_LAMPORTS: u64 = 5_555_555; +const PRE_SHARED_B_AIRDROP_LAMPORTS: u64 = 6_666_666; + +const DURING_RESTART_A_AIRDROP_LAMPORTS: u64 = 7_777_777; +const DURING_SHARED_B_AIRDROP_LAMPORTS: u64 = 8_888_888; + +const POST_RESTART_A_AIRDROP_LAMPORTS: u64 = 9_999_999; +const POST_SHARED_B_AIRDROP_LAMPORTS: u64 = 10_101_010; + +// Account updates report the account's resulting balance, not the +// individual airdrop delta. The first airdrops below target fresh random +// accounts, so their resulting balances are equal to their airdrop amounts. +const PRE_RESTART_A_EXPECTED_BALANCE: u64 = PRE_RESTART_A_AIRDROP_LAMPORTS; +const PRE_RESTART_B_EXPECTED_BALANCE: u64 = PRE_RESTART_B_AIRDROP_LAMPORTS; +const PRE_SHARED_B_EXPECTED_BALANCE: u64 = PRE_SHARED_B_AIRDROP_LAMPORTS; + +// During restart, service one is offline but the validator still applies both +// airdrops. SharedB already has the pre-restart balance when this update is +// emitted, so the expected lamports are cumulative. +const DURING_RESTART_A_EXPECTED_BALANCE: u64 = + PRE_RESTART_A_EXPECTED_BALANCE + DURING_RESTART_A_AIRDROP_LAMPORTS; +const DURING_SHARED_B_EXPECTED_BALANCE: u64 = + PRE_SHARED_B_EXPECTED_BALANCE + DURING_SHARED_B_AIRDROP_LAMPORTS; + +// After service one restarts, live updates again carry full account balances. +// These expectations include all earlier airdrops to the same account. +const POST_RESTART_A_EXPECTED_BALANCE: u64 = + DURING_RESTART_A_EXPECTED_BALANCE + POST_RESTART_A_AIRDROP_LAMPORTS; +const POST_SHARED_B_EXPECTED_BALANCE: u64 = + DURING_SHARED_B_EXPECTED_BALANCE + POST_SHARED_B_AIRDROP_LAMPORTS; + pub async fn run(ctx: &ScenarioContext) -> Result<(), ScenarioFailure> { let spec_one = ServiceSpec::for_instance(ServiceInstance::One); let spec_two = ServiceSpec::for_instance(ServiceInstance::Two); @@ -78,9 +110,18 @@ async fn run_inner( let sigs = ctx .validator .airdrops(vec![ - (ctx.accounts.pubkey(NamedAccount::RestartA), 4_444_444), - (ctx.accounts.pubkey(NamedAccount::RestartB), 5_555_555), - (ctx.accounts.pubkey(NamedAccount::SharedB), 6_666_666), + ( + ctx.accounts.pubkey(NamedAccount::RestartA), + PRE_RESTART_A_AIRDROP_LAMPORTS, + ), + ( + ctx.accounts.pubkey(NamedAccount::RestartB), + PRE_RESTART_B_AIRDROP_LAMPORTS, + ), + ( + ctx.accounts.pubkey(NamedAccount::SharedB), + PRE_SHARED_B_AIRDROP_LAMPORTS, + ), ]) .await?; let [restart_a_sig, restart_b_sig, shared_b_sig]: [String; 3] = @@ -93,7 +134,7 @@ async fn run_inner( 0..5, expected_update( ctx.accounts.pubkey_b58(NamedAccount::RestartA), - 4_444_444, + PRE_RESTART_A_EXPECTED_BALANCE, restart_a_sig, ), ), @@ -101,7 +142,7 @@ async fn run_inner( 5..10, expected_update( ctx.accounts.pubkey_b58(NamedAccount::SharedB), - 6_666_666, + PRE_SHARED_B_EXPECTED_BALANCE, shared_b_sig.clone(), ), ), @@ -109,7 +150,7 @@ async fn run_inner( 10..15, expected_update( ctx.accounts.pubkey_b58(NamedAccount::RestartB), - 5_555_555, + PRE_RESTART_B_EXPECTED_BALANCE, restart_b_sig, ), ), @@ -117,7 +158,7 @@ async fn run_inner( 15..20, expected_update( ctx.accounts.pubkey_b58(NamedAccount::SharedB), - 6_666_666, + PRE_SHARED_B_EXPECTED_BALANCE, shared_b_sig, ), ), @@ -136,8 +177,14 @@ async fn run_inner( let sigs = ctx .validator .airdrops(vec![ - (ctx.accounts.pubkey(NamedAccount::RestartA), 7_777_777), - (ctx.accounts.pubkey(NamedAccount::SharedB), 8_888_888), + ( + ctx.accounts.pubkey(NamedAccount::RestartA), + DURING_RESTART_A_AIRDROP_LAMPORTS, + ), + ( + ctx.accounts.pubkey(NamedAccount::SharedB), + DURING_SHARED_B_AIRDROP_LAMPORTS, + ), ]) .await?; let [_during_restart_a_sig, during_shared_b_sig]: [String; 2] = @@ -145,6 +192,8 @@ async fn run_inner( assert_logs_unchanged(&parked_logs)?; + // SharedB remains subscribed on service two while service one is down; + // lamports are the cumulative balance (6_666_666 + 8_888_888), not just the second airdrop. let during_restart = CheckpointSpec { name: "during-restart", checkpoints: vec![ @@ -153,7 +202,7 @@ async fn run_inner( 15..20, expected_update( ctx.accounts.pubkey_b58(NamedAccount::SharedB), - 8_888_888, + DURING_SHARED_B_EXPECTED_BALANCE, during_shared_b_sig, ), ), @@ -181,13 +230,21 @@ async fn run_inner( let sigs = ctx .validator .airdrops(vec![ - (ctx.accounts.pubkey(NamedAccount::RestartA), 9_999_999), - (ctx.accounts.pubkey(NamedAccount::SharedB), 10_101_010), + ( + ctx.accounts.pubkey(NamedAccount::RestartA), + POST_RESTART_A_AIRDROP_LAMPORTS, + ), + ( + ctx.accounts.pubkey(NamedAccount::SharedB), + POST_SHARED_B_AIRDROP_LAMPORTS, + ), ]) .await?; let [post_restart_a_sig, post_shared_b_sig]: [String; 2] = sigs.try_into().expect("expected two airdrop signatures"); + // Reconnected live updates still report full balances: RestartA includes all + // three RestartA airdrops and SharedB includes all three SharedB airdrops. let post_restart = CheckpointSpec { name: "post-restart", checkpoints: vec![ @@ -195,7 +252,7 @@ async fn run_inner( 0..5, expected_update( ctx.accounts.pubkey_b58(NamedAccount::RestartA), - 9_999_999, + POST_RESTART_A_EXPECTED_BALANCE, post_restart_a_sig, ), ), @@ -203,7 +260,7 @@ async fn run_inner( 5..10, expected_update( ctx.accounts.pubkey_b58(NamedAccount::SharedB), - 10_101_010, + POST_SHARED_B_EXPECTED_BALANCE, post_shared_b_sig.clone(), ), ), @@ -212,7 +269,7 @@ async fn run_inner( 15..20, expected_update( ctx.accounts.pubkey_b58(NamedAccount::SharedB), - 10_101_010, + POST_SHARED_B_EXPECTED_BALANCE, post_shared_b_sig, ), ), From 057e4a97630361c0121a5cf723fddcfade5fee29 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 16:14:29 +0800 Subject: [PATCH 60/68] feat: add path-aware plugin config preflight --- geyser-plugin/src/config.rs | 12 +- geyser-plugin/src/lib.rs | 2 + geyser-plugin/src/preflight.rs | 324 +++++++++++++++++++++++++++++++++ 3 files changed, 332 insertions(+), 6 deletions(-) create mode 100644 geyser-plugin/src/preflight.rs diff --git a/geyser-plugin/src/config.rs b/geyser-plugin/src/config.rs index d713b7b..7915d6e 100644 --- a/geyser-plugin/src/config.rs +++ b/geyser-plugin/src/config.rs @@ -77,10 +77,10 @@ pub struct PluginConfig { #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] -struct ValidatorConfig { +pub(crate) struct ValidatorConfig { #[allow(dead_code)] - libpath: String, - config_file: PathBuf, + pub(crate) libpath: PathBuf, + pub(crate) config_file: PathBuf, } fn default_shutdown_timeout_ms() -> u64 { @@ -171,14 +171,14 @@ impl Config { } } - fn fill_defaults(&mut self) { + pub(crate) fn fill_defaults(&mut self) { self.set_default("request.required.acks", "1"); self.set_default("message.timeout.ms", "30000"); self.set_default("compression.type", "lz4"); self.set_default("partitioner", "murmur2_random"); } - fn validate(&self) -> PluginResult<()> { + pub(crate) fn validate(&self) -> PluginResult<()> { if self.kafka.bootstrap_servers.trim().is_empty() { return Err(GeyserPluginError::ConfigFileReadError { msg: "missing required config field `kafka.bootstrap_servers`" @@ -257,7 +257,7 @@ fn read_to_string(path: &Path) -> PluginResult { Ok(contents) } -fn resolve_runtime_config_path( +pub(crate) fn resolve_runtime_config_path( wrapper_path: &Path, runtime_path: &Path, ) -> PathBuf { diff --git a/geyser-plugin/src/lib.rs b/geyser-plugin/src/lib.rs index beb1cbc..045952a 100644 --- a/geyser-plugin/src/lib.rs +++ b/geyser-plugin/src/lib.rs @@ -21,6 +21,8 @@ mod initial_account_backfill; mod ksql; mod metrics; mod plugin; +#[allow(dead_code)] +mod preflight; mod publisher; mod server; mod version; diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs new file mode 100644 index 0000000..4364124 --- /dev/null +++ b/geyser-plugin/src/preflight.rs @@ -0,0 +1,324 @@ +// Copyright 2022 Blockdaemon Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use { + agave_geyser_plugin_interface::geyser_plugin_interface::GeyserPluginError, + std::{error::Error, fmt, fs, path::PathBuf}, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StartupError { + pub subsystem: &'static str, + pub field: Option<&'static str>, + pub target: Option, + pub cause: String, + pub action: String, +} + +impl StartupError { + pub fn new( + subsystem: &'static str, + field: Option<&'static str>, + target: Option>, + cause: impl Into, + action: impl Into, + ) -> Self { + Self { + subsystem, + field, + target: target.map(Into::into), + cause: cause.into(), + action: action.into(), + } + } +} + +impl fmt::Display for StartupError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "ERROR {} startup check failed", self.subsystem)?; + if let Some(field) = self.field { + writeln!(f, " field: {field}")?; + } + if let Some(target) = &self.target { + writeln!(f, " target: {target}")?; + } + writeln!(f, " cause: {}", self.cause)?; + write!(f, " action: {}", self.action) + } +} + +impl Error for StartupError {} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ValidatorConfigPaths { + pub wrapper_path: PathBuf, + pub libpath: PathBuf, + pub runtime_config_path: PathBuf, +} + +#[derive(Debug)] +pub struct LoadedPluginConfig { + pub paths: Option, + pub config: crate::config::Config, +} + +pub fn load_config_with_paths( + config_path: impl AsRef, +) -> Result { + let config_path = config_path.as_ref(); + let contents = fs::read_to_string(config_path).map_err(|error| { + StartupError::new( + "config", + None, + Some(config_path.display().to_string()), + format!("failed to read config file: {error}"), + "check that the file exists and is readable", + ) + })?; + + match serde_json::from_str::(&contents) { + Ok(wrapper) => { + let libpath = + resolve_wrapper_relative_path(config_path, &wrapper.libpath); + let runtime_config_path = + crate::config::resolve_runtime_config_path( + config_path, + &wrapper.config_file, + ); + if !runtime_config_path.exists() { + return Err(StartupError::new( + "config", + Some("config_file"), + Some(runtime_config_path.display().to_string()), + "runtime TOML config does not exist", + "create the runtime TOML config or update config_file in the validator JSON wrapper", + )); + } + + let config = + read_parse_validate_runtime_config(&runtime_config_path)?; + Ok(LoadedPluginConfig { + paths: Some(ValidatorConfigPaths { + wrapper_path: config_path.to_path_buf(), + libpath, + runtime_config_path, + }), + config, + }) + } + Err(error) => { + let looks_like_json = config_path + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("json")) + || matches!( + contents.trim_start().as_bytes().first(), + Some(b'{') | Some(b'[') + ); + if looks_like_json { + return Err(StartupError::new( + "config", + None, + Some(config_path.display().to_string()), + format!("invalid validator config JSON: {error}"), + "fix the validator JSON wrapper; it must contain libpath and config_file", + )); + } + + let config = parse_validate_runtime_config(&contents, config_path)?; + Ok(LoadedPluginConfig { + paths: None, + config, + }) + } + } +} + +fn resolve_wrapper_relative_path( + wrapper_path: &std::path::Path, + path: &std::path::Path, +) -> PathBuf { + if path.is_absolute() { + path.to_path_buf() + } else { + wrapper_path + .parent() + .unwrap_or_else(|| std::path::Path::new(".")) + .join(path) + } +} + +fn read_parse_validate_runtime_config( + path: &std::path::Path, +) -> Result { + let contents = fs::read_to_string(path).map_err(|error| { + config_error_to_startup_error( + path, + GeyserPluginError::ConfigFileReadError { + msg: format!("failed to read runtime TOML config: {error}"), + }, + ) + })?; + parse_validate_runtime_config(&contents, path) +} + +fn parse_validate_runtime_config( + contents: &str, + path: &std::path::Path, +) -> Result { + let mut config: crate::config::Config = toml::from_str(contents) + .map_err(|error| toml_error_to_startup_error(path, error))?; + config.fill_defaults(); + config + .validate() + .map_err(|error| config_error_to_startup_error(path, error))?; + Ok(config) +} + +fn toml_error_to_startup_error( + path: &std::path::Path, + error: toml::de::Error, +) -> StartupError { + StartupError::new( + "config", + None, + Some(path.display().to_string()), + error.to_string(), + "fix the runtime TOML config", + ) +} + +fn config_error_to_startup_error( + path: &std::path::Path, + error: GeyserPluginError, +) -> StartupError { + let cause = match error { + GeyserPluginError::ConfigFileReadError { msg } => msg, + other => other.to_string(), + }; + StartupError::new( + "config", + None, + Some(path.display().to_string()), + cause, + "fix the runtime TOML config", + ) +} + +#[cfg(test)] +mod tests { + use super::load_config_with_paths; + use std::{ + fs, + path::PathBuf, + time::{SystemTime, UNIX_EPOCH}, + }; + + fn temp_dir(test_name: &str) -> PathBuf { + std::env::temp_dir().join(format!( + "geyser-plugin-preflight-{test_name}-{}", + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() + )) + } + + fn valid_runtime_config() -> &'static str { + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[plugin] +local_rpc_url = "http://127.0.0.1:8899" +admin = "127.0.0.1:8080" +"# + } + + #[test] + fn malformed_validator_json_reports_invalid_validator_config_json() { + let base = temp_dir("malformed-json"); + fs::create_dir_all(&base).unwrap(); + let wrapper_path = base.join("plugin-config.json"); + fs::write(&wrapper_path, r#"{ "libpath": "plugin.so", "#).unwrap(); + + let error = load_config_with_paths(&wrapper_path).unwrap_err(); + assert_eq!(error.subsystem, "config"); + assert!(error.cause.contains("invalid validator config JSON")); + + fs::remove_dir_all(&base).unwrap(); + } + + #[test] + fn missing_config_file_target_reports_config_file_field() { + let base = temp_dir("missing-config-file"); + fs::create_dir_all(&base).unwrap(); + let wrapper_path = base.join("plugin-config.json"); + fs::write( + &wrapper_path, + r#"{ + "libpath": "plugin.so", + "config_file": "missing.toml" +}"#, + ) + .unwrap(); + + let error = load_config_with_paths(&wrapper_path).unwrap_err(); + assert_eq!(error.subsystem, "config"); + assert_eq!(error.field, Some("config_file")); + assert!(error.target.as_deref().unwrap().ends_with("missing.toml")); + + fs::remove_dir_all(&base).unwrap(); + } + + #[test] + fn malformed_runtime_toml_reports_config_subsystem() { + let base = temp_dir("malformed-runtime-toml"); + fs::create_dir_all(&base).unwrap(); + let runtime_path = base.join("runtime.toml"); + fs::write(&runtime_path, "[kafka\nbootstrap_servers = nope").unwrap(); + let wrapper_path = base.join("plugin-config.json"); + fs::write( + &wrapper_path, + r#"{ + "libpath": "plugin.so", + "config_file": "runtime.toml" +}"#, + ) + .unwrap(); + + let error = load_config_with_paths(&wrapper_path).unwrap_err(); + assert_eq!(error.subsystem, "config"); + assert!(error.action.contains("fix the runtime TOML config")); + + fs::remove_dir_all(&base).unwrap(); + } + + #[test] + fn direct_toml_config_loads_successfully() { + let base = temp_dir("direct-toml"); + fs::create_dir_all(&base).unwrap(); + let runtime_path = base.join("runtime.toml"); + fs::write(&runtime_path, valid_runtime_config()).unwrap(); + + let loaded = load_config_with_paths(&runtime_path).unwrap(); + assert!(loaded.paths.is_none()); + assert_eq!(loaded.config.kafka.topic, "solana.testnet.account_updates"); + + fs::remove_dir_all(&base).unwrap(); + } +} From 2e3f84632f73abc34ede90d5edce00977e08e2d0 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 16:21:41 +0800 Subject: [PATCH 61/68] feat: run static startup checks on plugin load --- geyser-plugin/src/config.rs | 2 + geyser-plugin/src/plugin/mod.rs | 27 +++++++++--- geyser-plugin/src/preflight.rs | 77 ++++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 8 deletions(-) diff --git a/geyser-plugin/src/config.rs b/geyser-plugin/src/config.rs index 7915d6e..37be6a9 100644 --- a/geyser-plugin/src/config.rs +++ b/geyser-plugin/src/config.rs @@ -125,6 +125,7 @@ impl Default for KsqlConfig { impl Config { /// Read plugin config from either a validator JSON wrapper or a TOML runtime config. + #[allow(dead_code)] pub fn read_from>(config_path: P) -> PluginResult { let config_path = config_path.as_ref(); let contents = read_to_string(config_path)?; @@ -250,6 +251,7 @@ impl Config { } } +#[allow(dead_code)] fn read_to_string(path: &Path) -> PluginResult { let mut file = File::open(path)?; let mut contents = String::new(); diff --git a/geyser-plugin/src/plugin/mod.rs b/geyser-plugin/src/plugin/mod.rs index fa6a980..dbe9de5 100644 --- a/geyser-plugin/src/plugin/mod.rs +++ b/geyser-plugin/src/plugin/mod.rs @@ -88,17 +88,14 @@ impl GeyserPlugin for KafkaPlugin { self.name(), config_file ); - let config = Config::read_from(config_file)?; + let loaded = crate::preflight::run_static_startup_checks(config_file) + .map_err(startup_error_to_plugin_error)?; + let config = loaded.config; let (version_n, version_s) = get_rdkafka_version(); info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); - let mut producer_config = ClientConfig::new(); - for (key, value) in &config.kafka.client { - producer_config.set(key, value); - } - producer_config - .set("bootstrap.servers", &config.kafka.bootstrap_servers); + let producer_config = build_producer_config(&config); let producer = rdkafka::producer::ThreadedProducer::from_config_and_context( &producer_config, @@ -264,6 +261,22 @@ impl KafkaPlugin { } } +fn build_producer_config(config: &Config) -> ClientConfig { + let mut producer_config = ClientConfig::new(); + for (key, value) in &config.kafka.client { + producer_config.set(key, value); + } + producer_config.set("bootstrap.servers", &config.kafka.bootstrap_servers); + producer_config +} + +fn startup_error_to_plugin_error( + error: crate::preflight::StartupError, +) -> PluginError { + error!("{error}"); + PluginError::Custom(Box::new(std::io::Error::other(error.to_string()))) +} + #[derive(Clone, Debug, Default, PartialEq, Eq)] struct RestoreTrackingSummary { deduplicated_count: usize, diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs index 4364124..b02492d 100644 --- a/geyser-plugin/src/preflight.rs +++ b/geyser-plugin/src/preflight.rs @@ -73,6 +73,24 @@ pub struct LoadedPluginConfig { pub config: crate::config::Config, } +pub(crate) fn run_static_startup_checks( + config_path: impl AsRef, +) -> Result { + let loaded = load_config_with_paths(config_path)?; + if let Some(paths) = &loaded.paths + && !paths.libpath.exists() + { + return Err(StartupError::new( + "config", + Some("libpath"), + Some(paths.libpath.display().to_string()), + "plugin shared library does not exist", + "run make geyser-plugin-build or update libpath in the validator JSON wrapper", + )); + } + Ok(loaded) +} + pub fn load_config_with_paths( config_path: impl AsRef, ) -> Result { @@ -218,7 +236,7 @@ fn config_error_to_startup_error( #[cfg(test)] mod tests { - use super::load_config_with_paths; + use super::{load_config_with_paths, run_static_startup_checks}; use std::{ fs, path::PathBuf, @@ -321,4 +339,61 @@ admin = "127.0.0.1:8080" fs::remove_dir_all(&base).unwrap(); } + + #[test] + fn static_startup_checks_report_missing_libpath() { + let base = temp_dir("missing-libpath"); + fs::create_dir_all(&base).unwrap(); + let runtime_path = base.join("runtime.toml"); + fs::write(&runtime_path, valid_runtime_config()).unwrap(); + let wrapper_path = base.join("plugin-config.json"); + fs::write( + &wrapper_path, + r#"{ + "libpath": "missing-plugin.so", + "config_file": "runtime.toml" +}"#, + ) + .unwrap(); + + let error = run_static_startup_checks(&wrapper_path).unwrap_err(); + assert_eq!(error.subsystem, "config"); + assert_eq!(error.field, Some("libpath")); + assert!( + error + .target + .as_deref() + .unwrap() + .ends_with("missing-plugin.so") + ); + assert_eq!(error.cause, "plugin shared library does not exist"); + assert!(error.action.contains("make geyser-plugin-build")); + + fs::remove_dir_all(&base).unwrap(); + } + + #[test] + fn static_startup_checks_accept_existing_libpath() { + let base = temp_dir("existing-libpath"); + fs::create_dir_all(&base).unwrap(); + let runtime_path = base.join("runtime.toml"); + fs::write(&runtime_path, valid_runtime_config()).unwrap(); + let libpath = base.join("plugin.so"); + fs::write(&libpath, "").unwrap(); + let wrapper_path = base.join("plugin-config.json"); + fs::write( + &wrapper_path, + r#"{ + "libpath": "plugin.so", + "config_file": "runtime.toml" +}"#, + ) + .unwrap(); + + let loaded = run_static_startup_checks(&wrapper_path).unwrap(); + let paths = loaded.paths.unwrap(); + assert_eq!(paths.libpath, libpath); + + fs::remove_dir_all(&base).unwrap(); + } } From 4d2a4cee7921642c27c88bfe8eb781053461b461 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 16:30:19 +0800 Subject: [PATCH 62/68] feat: add kafka startup readiness check --- geyser-plugin/src/plugin/mod.rs | 6 ++ geyser-plugin/src/preflight.rs | 134 +++++++++++++++++++++++++++++++- 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/geyser-plugin/src/plugin/mod.rs b/geyser-plugin/src/plugin/mod.rs index dbe9de5..658b0c1 100644 --- a/geyser-plugin/src/plugin/mod.rs +++ b/geyser-plugin/src/plugin/mod.rs @@ -95,6 +95,9 @@ impl GeyserPlugin for KafkaPlugin { let (version_n, version_s) = get_rdkafka_version(); info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); + crate::preflight::check_kafka_readiness(&config) + .map_err(startup_error_to_plugin_error)?; + let producer_config = build_producer_config(&config); let producer = rdkafka::producer::ThreadedProducer::from_config_and_context( @@ -274,6 +277,9 @@ fn startup_error_to_plugin_error( error: crate::preflight::StartupError, ) -> PluginError { error!("{error}"); + if error.subsystem == "kafka" { + std::process::exit(1); + } PluginError::Custom(Box::new(std::io::Error::other(error.to_string()))) } diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs index b02492d..2225a37 100644 --- a/geyser-plugin/src/preflight.rs +++ b/geyser-plugin/src/preflight.rs @@ -14,9 +14,16 @@ use { agave_geyser_plugin_interface::geyser_plugin_interface::GeyserPluginError, + rdkafka::{ + ClientConfig, + producer::{BaseProducer, Producer}, + }, std::{error::Error, fmt, fs, path::PathBuf}, }; +pub(crate) const STARTUP_CHECK_TIMEOUT: std::time::Duration = + std::time::Duration::from_secs(3); + #[derive(Debug, Clone, PartialEq, Eq)] pub struct StartupError { pub subsystem: &'static str, @@ -91,6 +98,96 @@ pub(crate) fn run_static_startup_checks( Ok(loaded) } +pub fn check_kafka_readiness( + config: &crate::config::Config, +) -> Result<(), StartupError> { + let mut producer_config = ClientConfig::new(); + for (key, value) in &config.kafka.client { + producer_config.set(key, value); + } + producer_config.set("bootstrap.servers", &config.kafka.bootstrap_servers); + + let producer: BaseProducer = producer_config.create().map_err(|error| { + StartupError::new( + "kafka", + Some("kafka.client"), + Some(config.kafka.bootstrap_servers.clone()), + format!( + "failed to create kafka producer for readiness check: {error}" + ), + "fix kafka.bootstrap_servers or kafka.client settings in geyser-plugin/plugin-config.toml", + ) + })?; + + let metadata = match producer + .client() + .fetch_metadata(Some(&config.kafka.topic), STARTUP_CHECK_TIMEOUT) + { + Ok(metadata) => metadata, + Err(error) => { + drop_readiness_producer(producer); + return Err(StartupError::new( + "kafka", + Some("kafka.bootstrap_servers"), + Some(config.kafka.bootstrap_servers.clone()), + format!("failed to fetch kafka metadata: {error}"), + "start Kafka with make kafka-ready or update kafka.bootstrap_servers in geyser-plugin/plugin-config.toml", + )); + } + }; + + drop_readiness_producer(producer); + + validate_topic_metadata(&metadata, &config.kafka.topic).map_err(|cause| { + StartupError::new( + "kafka", + Some("kafka.topic"), + Some(config.kafka.topic.clone()), + cause, + "create the topic or enable broker topic auto-creation before launching the validator", + ) + }) +} + +fn drop_readiness_producer(producer: BaseProducer) { + let _ = producer.flush(STARTUP_CHECK_TIMEOUT); + drop(producer); + std::thread::sleep(std::time::Duration::from_millis(100)); +} + +fn validate_topic_metadata( + metadata: &rdkafka::metadata::Metadata, + topic: &str, +) -> Result<(), String> { + validate_topic_entries( + metadata.topics().iter().map(|topic_metadata| { + ( + topic_metadata.name(), + topic_metadata.error().map(|error| format!("{error:?}")), + ) + }), + topic, + ) +} + +fn validate_topic_entries<'a>( + topic_entries: impl IntoIterator)>, + topic: &str, +) -> Result<(), String> { + let Some((_, error)) = topic_entries + .into_iter() + .find(|(topic_name, _)| *topic_name == topic) + else { + return Err("topic is not present in broker metadata".to_string()); + }; + + if let Some(error) = error { + return Err(format!("topic metadata error: {error}")); + } + + Ok(()) +} + pub fn load_config_with_paths( config_path: impl AsRef, ) -> Result { @@ -236,7 +333,10 @@ fn config_error_to_startup_error( #[cfg(test)] mod tests { - use super::{load_config_with_paths, run_static_startup_checks}; + use super::{ + load_config_with_paths, run_static_startup_checks, + validate_topic_entries, + }; use std::{ fs, path::PathBuf, @@ -396,4 +496,36 @@ admin = "127.0.0.1:8080" fs::remove_dir_all(&base).unwrap(); } + + #[test] + fn topic_metadata_validation_accepts_present_topic_without_error() { + let result = validate_topic_entries( + [("other", None), ("solana.testnet.account_updates", None)], + "solana.testnet.account_updates", + ); + + assert_eq!(result, Ok(())); + } + + #[test] + fn topic_metadata_validation_reports_missing_topic() { + let error = + validate_topic_entries([("other", None)], "missing").unwrap_err(); + + assert_eq!(error, "topic is not present in broker metadata"); + } + + #[test] + fn topic_metadata_validation_reports_topic_error() { + let error = validate_topic_entries( + [( + "solana.testnet.account_updates", + Some("unknown topic".into()), + )], + "solana.testnet.account_updates", + ) + .unwrap_err(); + + assert_eq!(error, "topic metadata error: unknown topic"); + } } From 3f543ddf7b468e9a0563c733b2396f20a9834d5e Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 16:52:27 +0800 Subject: [PATCH 63/68] feat: split ksql startup restore into prefetch and restore Amp-Thread-ID: https://ampcode.com/threads/T-019e1b5e-3180-777e-9969-0d19e019fb98 Co-authored-by: Amp --- geyser-plugin/src/config.rs | 120 ++++++++++++++++++++++++++++++-- geyser-plugin/src/ksql.rs | 61 +--------------- geyser-plugin/src/plugin/mod.rs | 83 +++++++++++++++------- geyser-plugin/src/preflight.rs | 47 +++++++++++++ 4 files changed, 221 insertions(+), 90 deletions(-) diff --git a/geyser-plugin/src/config.rs b/geyser-plugin/src/config.rs index 37be6a9..66e40a1 100644 --- a/geyser-plugin/src/config.rs +++ b/geyser-plugin/src/config.rs @@ -239,18 +239,44 @@ impl Config { .to_owned(), }); } + } - if self.ksql.table.trim().is_empty() { - return Err(GeyserPluginError::ConfigFileReadError { - msg: "invalid config field `ksql.table`: table must not be empty".to_owned(), - }); + validate_ksql_identifier(&self.ksql.table).map_err(|error| { + GeyserPluginError::ConfigFileReadError { + msg: format!("invalid config field `ksql.table`: {error}"), } - } + })?; Ok(()) } } +/// Validates that `identifier` is a safe ksqlDB identifier suitable for +/// direct interpolation into a SQL statement. The identifier must start with +/// an ASCII letter or `_` and may otherwise contain only ASCII alphanumeric +/// characters or `_`. +pub(crate) fn validate_ksql_identifier( + identifier: &str, +) -> std::io::Result<&str> { + let mut chars = identifier.chars(); + let first = chars.next().ok_or_else(|| { + std::io::Error::other("ksql identifier must not be empty") + })?; + if !(first.is_ascii_alphabetic() || first == '_') { + return Err(std::io::Error::other(format!( + "invalid ksql identifier `{identifier}`: must start with an ASCII letter or `_`" + ))); + } + for c in chars { + if !(c.is_ascii_alphanumeric() || c == '_') { + return Err(std::io::Error::other(format!( + "invalid ksql identifier `{identifier}`: only ASCII alphanumeric characters and `_` are allowed" + ))); + } + } + Ok(identifier) +} + #[allow(dead_code)] fn read_to_string(path: &Path) -> PluginResult { let mut file = File::open(path)?; @@ -275,12 +301,45 @@ pub(crate) fn resolve_runtime_config_path( #[cfg(test)] mod tests { - use super::Config; + use super::{Config, validate_ksql_identifier}; use std::{ fs, time::{SystemTime, UNIX_EPOCH}, }; + #[test] + fn test_validates_simple_identifier() { + assert_eq!(validate_ksql_identifier("accounts").unwrap(), "accounts"); + assert_eq!(validate_ksql_identifier("_x").unwrap(), "_x"); + assert_eq!(validate_ksql_identifier("A1_b2").unwrap(), "A1_b2"); + } + + #[test] + fn test_rejects_empty_identifier() { + let error = validate_ksql_identifier("").unwrap_err().to_string(); + assert!(error.contains("must not be empty")); + } + + #[test] + fn test_rejects_identifier_starting_with_digit() { + let error = validate_ksql_identifier("1bad").unwrap_err().to_string(); + assert!(error.contains("must start with an ASCII letter")); + } + + #[test] + fn test_rejects_identifier_with_invalid_characters() { + let error = validate_ksql_identifier("accounts; DROP TABLE x") + .unwrap_err() + .to_string(); + assert!(error.contains("only ASCII alphanumeric")); + } + + #[test] + fn test_rejects_identifier_with_quote() { + let error = validate_ksql_identifier("a\"b").unwrap_err().to_string(); + assert!(error.contains("only ASCII alphanumeric")); + } + fn parse_config(toml: &str) -> Result { let mut config: Config = toml::from_str(toml).map_err(|error| error.to_string())?; @@ -608,6 +667,55 @@ admin = "127.0.0.1:8080" ); } + #[test] + fn test_rejects_invalid_ksql_table_identifier() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[ksql] +url = "http://127.0.0.1:8088" +table = "bad-name" + +[plugin] +local_rpc_url = "http://127.0.0.1:8899" +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `ksql.table`")); + assert!(error.contains("only ASCII alphanumeric")); + } + + #[test] + fn test_rejects_ksql_table_starting_with_digit() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[ksql] +table = "1bad" + +[plugin] +local_rpc_url = "http://127.0.0.1:8899" +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `ksql.table`")); + assert!(error.contains("must start with an ASCII letter")); + } + #[test] fn test_passes_through_kafka_client_overrides() { let config = parse_config( diff --git a/geyser-plugin/src/ksql.rs b/geyser-plugin/src/ksql.rs index e595a19..99d26dc 100644 --- a/geyser-plugin/src/ksql.rs +++ b/geyser-plugin/src/ksql.rs @@ -32,7 +32,7 @@ impl KsqlPubkeyRestoreClient { } pub(crate) fn fetch_pubkeys(&self) -> io::Result> { - let table = validate_ksql_identifier(&self.table)?; + let table = crate::config::validate_ksql_identifier(&self.table)?; let sql = format!("SELECT PUBKEY FROM {table};"); let query_url = format!("{}/query-stream", self.base_url); debug!( @@ -72,30 +72,6 @@ impl KsqlPubkeyRestoreClient { } } -/// Validates that `identifier` is a safe ksqlDB identifier suitable for -/// direct interpolation into a SQL statement. The identifier must start with -/// an ASCII letter or `_` and may otherwise contain only ASCII alphanumeric -/// characters or `_`. -pub(crate) fn validate_ksql_identifier(identifier: &str) -> io::Result<&str> { - let mut chars = identifier.chars(); - let first = chars - .next() - .ok_or_else(|| io::Error::other("ksql identifier must not be empty"))?; - if !(first.is_ascii_alphabetic() || first == '_') { - return Err(io::Error::other(format!( - "invalid ksql identifier `{identifier}`: must start with an ASCII letter or `_`" - ))); - } - for c in chars { - if !(c.is_ascii_alphanumeric() || c == '_') { - return Err(io::Error::other(format!( - "invalid ksql identifier `{identifier}`: only ASCII alphanumeric characters and `_` are allowed" - ))); - } - } - Ok(identifier) -} - pub(crate) fn parse_pubkeys_stream( reader: impl BufRead, ) -> io::Result> { @@ -172,7 +148,7 @@ pub(crate) fn parse_pubkeys_stream( #[cfg(test)] mod tests { - use super::{parse_pubkeys_stream, validate_ksql_identifier}; + use super::parse_pubkeys_stream; fn pubkey(byte: u8) -> [u8; 32] { [byte; 32] @@ -248,37 +224,4 @@ mod tests { assert!(error.contains("expected 32 decoded PUBKEY bytes")); } - - #[test] - fn test_validates_simple_identifier() { - assert_eq!(validate_ksql_identifier("accounts").unwrap(), "accounts"); - assert_eq!(validate_ksql_identifier("_x").unwrap(), "_x"); - assert_eq!(validate_ksql_identifier("A1_b2").unwrap(), "A1_b2"); - } - - #[test] - fn test_rejects_empty_identifier() { - let error = validate_ksql_identifier("").unwrap_err().to_string(); - assert!(error.contains("must not be empty")); - } - - #[test] - fn test_rejects_identifier_starting_with_digit() { - let error = validate_ksql_identifier("1bad").unwrap_err().to_string(); - assert!(error.contains("must start with an ASCII letter")); - } - - #[test] - fn test_rejects_identifier_with_invalid_characters() { - let error = validate_ksql_identifier("accounts; DROP TABLE x") - .unwrap_err() - .to_string(); - assert!(error.contains("only ASCII alphanumeric")); - } - - #[test] - fn test_rejects_identifier_with_quote() { - let error = validate_ksql_identifier("a\"b").unwrap_err().to_string(); - assert!(error.contains("only ASCII alphanumeric")); - } } diff --git a/geyser-plugin/src/plugin/mod.rs b/geyser-plugin/src/plugin/mod.rs index 658b0c1..3bd28c3 100644 --- a/geyser-plugin/src/plugin/mod.rs +++ b/geyser-plugin/src/plugin/mod.rs @@ -39,6 +39,7 @@ use { rdkafka::{ClientConfig, config::FromClientConfigAndContext}, std::{ fmt::{Debug, Formatter}, + io, sync::{Arc, Mutex, MutexGuard}, time::Duration, }, @@ -95,6 +96,8 @@ impl GeyserPlugin for KafkaPlugin { let (version_n, version_s) = get_rdkafka_version(); info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); + let prefetched_restore = Self::fetch_tracking_from_ksql(&config)?; + crate::preflight::check_kafka_readiness(&config) .map_err(startup_error_to_plugin_error)?; @@ -120,8 +123,8 @@ impl GeyserPlugin for KafkaPlugin { config.plugin.local_rpc_url.clone(), ) .map_err(|error| PluginError::Custom(Box::new(error)))?; - Self::restore_tracking_from_ksql( - &config, + Self::restore_prefetched_tracking( + prefetched_restore, &self.account_subscriptions, &initial_account_backfill, )?; @@ -218,39 +221,68 @@ impl KafkaPlugin { .expect("update_account_topic is unavailable") } - fn restore_tracking_from_ksql( + fn fetch_tracking_from_ksql( config: &Config, - account_subscriptions: &AccountSubscriptions, - initial_account_backfill: &InitialAccountBackfill, - ) -> PluginResult<()> { + ) -> PluginResult> { let Some(raw_url) = config.ksql.url.as_deref() else { - return Ok(()); + return Ok(None); }; let url = raw_url.trim(); - - let table = &config.ksql.table; + let table = config.ksql.table.clone(); info!("Startup ksql restore enabled, url={}, table={}", url, table); - let client = KsqlPubkeyRestoreClient::new(url, table) - .map_err(|error| PluginError::Custom(Box::new(error)))?; - let pubkeys = client - .fetch_pubkeys() - .map_err(|error| PluginError::Custom(Box::new(error)))?; + let client = KsqlPubkeyRestoreClient::new(url, &table).map_err( + |error| { + error!( + "Startup ksql restore failed before plugin initialization completed: {error}" + ); + PluginError::Custom(Box::new(io::Error::other(format!( + "Startup ksql restore failed before plugin initialization completed: {error}" + )))) + }, + )?; + let pubkeys = client.fetch_pubkeys().map_err(|error| { + error!( + "Startup ksql restore failed before plugin initialization completed: {error}" + ); + PluginError::Custom(Box::new(io::Error::other(format!( + "Startup ksql restore failed before plugin initialization completed: {error}" + )))) + })?; let fetched_count = pubkeys.len(); info!( "Fetched {} pubkeys from ksql startup restore", fetched_count ); + Ok(Some(KsqlStartupRestore { + url: url.to_owned(), + table, + pubkeys, + })) + } + + fn restore_prefetched_tracking( + restore: Option, + account_subscriptions: &AccountSubscriptions, + initial_account_backfill: &InitialAccountBackfill, + ) -> PluginResult<()> { + let Some(restore) = restore else { + return Ok(()); + }; + + let fetched_count = restore.pubkeys.len(); let summary = restore_pubkeys_in_chunks( account_subscriptions, initial_account_backfill.handle_ref(), - pubkeys, + restore.pubkeys, ) .map_err(add_accounts_error_to_plugin_error)?; info!( - "Completed startup ksql restore, fetched_count={}, deduplicated_count={}, chunk_count={}, accepted_count={}, newly_added_count={}, retried_backfill_count={}, duplicate_count={}", + "Completed startup ksql restore, url={}, table={}, fetched_count={}, deduplicated_count={}, chunk_count={}, accepted_count={}, newly_added_count={}, retried_backfill_count={}, duplicate_count={}", + restore.url, + restore.table, fetched_count, summary.deduplicated_count, summary.chunk_count, @@ -264,6 +296,13 @@ impl KafkaPlugin { } } +#[derive(Debug, Default)] +struct KsqlStartupRestore { + url: String, + table: String, + pubkeys: Vec<[u8; 32]>, +} + fn build_producer_config(config: &Config) -> ClientConfig { let mut producer_config = ClientConfig::new(); for (key, value) in &config.kafka.client { @@ -393,19 +432,13 @@ mod tests { } #[test] - fn test_restore_tracking_from_ksql_is_noop_when_disabled() { + fn test_fetch_tracking_from_ksql_is_noop_when_disabled() { let config = Config::default(); let subs = AccountSubscriptions::new(); - let initial_account_backfill = - crate::initial_account_backfill::InitialAccountBackfill::default(); - let result = KafkaPlugin::restore_tracking_from_ksql( - &config, - &subs, - &initial_account_backfill, - ); + let result = KafkaPlugin::fetch_tracking_from_ksql(&config); - assert!(result.is_ok()); + assert!(matches!(result, Ok(None))); assert!(!subs.contains_sync(&pk(1))); } diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs index 2225a37..47b497f 100644 --- a/geyser-plugin/src/preflight.rs +++ b/geyser-plugin/src/preflight.rs @@ -98,6 +98,53 @@ pub(crate) fn run_static_startup_checks( Ok(loaded) } +pub fn check_ksql_readiness( + config: &crate::config::Config, +) -> Result<(), StartupError> { + let Some(raw_url) = config.ksql.url.as_deref() else { + return Ok(()); + }; + let url = raw_url.trim(); + + crate::config::validate_ksql_identifier(&config.ksql.table).map_err( + |error| { + StartupError::new( + "ksql", + Some("ksql.table"), + Some(config.ksql.table.clone()), + format!("invalid ksql.table identifier: {error}"), + "fix ksql.table to be a valid SQL identifier", + ) + }, + )?; + + let client = crate::ksql::KsqlPubkeyRestoreClient::new( + url, + &config.ksql.table, + ) + .map_err(|error| { + StartupError::new( + "ksql", + Some("ksql.url"), + Some(url.to_owned()), + format!("failed to run startup restore query: {error}"), + "start ksqlDB with make kafka-ready, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", + ) + })?; + + client.fetch_pubkeys().map_err(|error| { + StartupError::new( + "ksql", + Some("ksql.url"), + Some(url.to_owned()), + format!("failed to run startup restore query: {error}"), + "start ksqlDB with make kafka-ready, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", + ) + })?; + + Ok(()) +} + pub fn check_kafka_readiness( config: &crate::config::Config, ) -> Result<(), StartupError> { From a60c0b09c6b00e956db26b895e2ba1a3506ca011 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 16:57:27 +0800 Subject: [PATCH 64/68] feat: add admin bind startup check Amp-Thread-ID: https://ampcode.com/threads/T-019e1b63-c250-70ac-ab23-54c5a641b8ea Co-authored-by: Amp --- geyser-plugin/src/metrics.rs | 72 +++++++++++++++++--------------- geyser-plugin/src/plugin/mod.rs | 3 ++ geyser-plugin/src/preflight.rs | 59 +++++++++++++++++++++++++- geyser-plugin/src/server/mod.rs | 20 +++++++-- geyser-plugin/src/server/prom.rs | 10 +++-- 5 files changed, 124 insertions(+), 40 deletions(-) diff --git a/geyser-plugin/src/metrics.rs b/geyser-plugin/src/metrics.rs index 7cdcfbf..bd37770 100644 --- a/geyser-plugin/src/metrics.rs +++ b/geyser-plugin/src/metrics.rs @@ -6,7 +6,7 @@ use { producer::{DeliveryResult, ProducerContext}, statistics::Statistics, }, - std::sync::Once, + std::sync::OnceLock, }; lazy_static::lazy_static! { @@ -58,38 +58,44 @@ lazy_static::lazy_static! { ).unwrap(); } -pub fn register_metrics() { - static REGISTER: Once = Once::new(); - REGISTER.call_once(|| { - macro_rules! register { - ($collector:ident) => { - REGISTRY - .register(Box::new($collector.clone())) - .expect("collector can't be registered"); - }; - } - register!(VERSION); - register!(UPLOAD_ACCOUNTS_TOTAL); - register!(INITIAL_BACKFILL_REQUESTS_ENQUEUED_TOTAL); - register!(INITIAL_BACKFILL_PUBKEYS_ENQUEUED_TOTAL); - register!(INITIAL_BACKFILL_RPC_ATTEMPTS_TOTAL); - register!(INITIAL_BACKFILL_RPC_FAILURES_TOTAL); - register!(INITIAL_BACKFILL_SNAPSHOTS_TOTAL); - register!(INITIAL_BACKFILL_IN_FLIGHT); - register!(KAFKA_STATS); - - for (key, value) in &[ - ("version", VERSION_INFO.version), - ("solana", VERSION_INFO.solana), - ("git", VERSION_INFO.git), - ("rustc", VERSION_INFO.rustc), - ("buildts", VERSION_INFO.buildts), - ] { - VERSION - .with_label_values(&[key.to_string(), value.to_string()]) - .inc(); - } - }); +pub fn register_metrics() -> Result<(), String> { + static REGISTER_RESULT: OnceLock> = OnceLock::new(); + REGISTER_RESULT + .get_or_init(|| { + register_all_metrics().map_err(|error| error.to_string()) + }) + .clone() +} + +fn register_all_metrics() -> Result<(), prometheus::Error> { + macro_rules! register { + ($collector:ident) => { + REGISTRY.register(Box::new($collector.clone()))?; + }; + } + register!(VERSION); + register!(UPLOAD_ACCOUNTS_TOTAL); + register!(INITIAL_BACKFILL_REQUESTS_ENQUEUED_TOTAL); + register!(INITIAL_BACKFILL_PUBKEYS_ENQUEUED_TOTAL); + register!(INITIAL_BACKFILL_RPC_ATTEMPTS_TOTAL); + register!(INITIAL_BACKFILL_RPC_FAILURES_TOTAL); + register!(INITIAL_BACKFILL_SNAPSHOTS_TOTAL); + register!(INITIAL_BACKFILL_IN_FLIGHT); + register!(KAFKA_STATS); + + for (key, value) in &[ + ("version", VERSION_INFO.version), + ("solana", VERSION_INFO.solana), + ("git", VERSION_INFO.git), + ("rustc", VERSION_INFO.rustc), + ("buildts", VERSION_INFO.buildts), + ] { + VERSION + .with_label_values(&[key.to_string(), value.to_string()]) + .inc(); + } + + Ok(()) } #[derive(Debug, Default, Clone, Copy)] diff --git a/geyser-plugin/src/plugin/mod.rs b/geyser-plugin/src/plugin/mod.rs index 3bd28c3..0313a29 100644 --- a/geyser-plugin/src/plugin/mod.rs +++ b/geyser-plugin/src/plugin/mod.rs @@ -93,6 +93,9 @@ impl GeyserPlugin for KafkaPlugin { .map_err(startup_error_to_plugin_error)?; let config = loaded.config; + crate::preflight::check_admin_bind(&config) + .map_err(startup_error_to_plugin_error)?; + let (version_n, version_s) = get_rdkafka_version(); info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs index 47b497f..2d2a268 100644 --- a/geyser-plugin/src/preflight.rs +++ b/geyser-plugin/src/preflight.rs @@ -145,6 +145,24 @@ pub fn check_ksql_readiness( Ok(()) } +pub fn check_admin_bind( + config: &crate::config::Config, +) -> Result<(), StartupError> { + match std::net::TcpListener::bind(config.plugin.admin) { + Ok(listener) => { + drop(listener); + Ok(()) + } + Err(error) => Err(StartupError::new( + "admin", + Some("plugin.admin"), + Some(config.plugin.admin.to_string()), + format!("failed to bind admin HTTP address: {error}"), + "choose a free plugin.admin port or stop the process currently using it", + )), + } +} + pub fn check_kafka_readiness( config: &crate::config::Config, ) -> Result<(), StartupError> { @@ -381,11 +399,12 @@ fn config_error_to_startup_error( #[cfg(test)] mod tests { use super::{ - load_config_with_paths, run_static_startup_checks, + check_admin_bind, load_config_with_paths, run_static_startup_checks, validate_topic_entries, }; use std::{ fs, + net::TcpListener, path::PathBuf, time::{SystemTime, UNIX_EPOCH}, }; @@ -575,4 +594,42 @@ admin = "127.0.0.1:8080" assert_eq!(error, "topic metadata error: unknown topic"); } + + #[test] + fn check_admin_bind_reports_address_in_use() { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let local_addr = listener.local_addr().unwrap(); + + let mut config = crate::config::Config::default(); + config.plugin.admin = local_addr; + + let error = check_admin_bind(&config).unwrap_err(); + assert_eq!(error.subsystem, "admin"); + assert_eq!(error.field, Some("plugin.admin")); + assert_eq!( + error.target.as_deref(), + Some(local_addr.to_string().as_str()) + ); + assert!(error.cause.contains("failed to bind admin HTTP address")); + assert!(error.action.contains("free plugin.admin port")); + + drop(listener); + } + + #[test] + fn check_admin_bind_succeeds_on_free_port() { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let local_addr = listener.local_addr().unwrap(); + drop(listener); + + let mut config = crate::config::Config::default(); + config.plugin.admin = local_addr; + + // It is possible (though unlikely) for the OS to assign the port to a + // different process between the drop and the bind here. Treat both + // outcomes as acceptable for this test of the success path. + if let Err(error) = check_admin_bind(&config) { + panic!("expected admin bind to succeed, got: {error}"); + } + } } diff --git a/geyser-plugin/src/server/mod.rs b/geyser-plugin/src/server/mod.rs index 4eaf21f..bb3f9ad 100644 --- a/geyser-plugin/src/server/mod.rs +++ b/geyser-plugin/src/server/mod.rs @@ -32,12 +32,26 @@ impl HttpService { initial_account_backfill: InitialAccountBackfillHandle, metrics_enabled: bool, ) -> IoResult { - if metrics_enabled { - register_metrics(); + if metrics_enabled && let Err(error) = register_metrics() { + let message = + format!("failed to register Prometheus metrics: {error}"); + error!("{message}"); + return Err(std::io::Error::other(message)); } let runtime = Runtime::new()?; - let listener = runtime.block_on(TcpListener::bind(address))?; + let listener = + runtime + .block_on(TcpListener::bind(address)) + .map_err(|error| { + let message = format!( + "Failed to bind admin HTTP API to {address}: {error}. \ + Choose a free plugin.admin port or stop the process \ + currently using it." + ); + error!("{message}"); + std::io::Error::new(error.kind(), message) + })?; runtime.spawn(async move { loop { diff --git a/geyser-plugin/src/server/prom.rs b/geyser-plugin/src/server/prom.rs index 06bb0ae..a27d494 100644 --- a/geyser-plugin/src/server/prom.rs +++ b/geyser-plugin/src/server/prom.rs @@ -10,7 +10,11 @@ pub fn metrics_handler() -> Response> { error!("could not encode custom metrics: {}", error); String::new() }); - Response::builder() - .body(Full::new(Bytes::from(metrics))) - .unwrap() + match Response::builder().body(Full::new(Bytes::from(metrics))) { + Ok(response) => response, + Err(error) => { + error!("failed to build metrics response: {error}"); + Response::new(Full::new(Bytes::new())) + } + } } From f3fe5a84cda38b99bbb90828d1c9a9eb03d78755 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Tue, 12 May 2026 17:03:50 +0800 Subject: [PATCH 65/68] feat: validate local_rpc_url and improve backfill errors Amp-Thread-ID: https://ampcode.com/threads/T-019e1b68-4183-759b-8b06-bb7473bba060 Co-authored-by: Amp --- geyser-plugin/src/config.rs | 122 +++++++++++++++++- .../src/initial_account_backfill/mod.rs | 13 +- .../src/initial_account_backfill/rpc.rs | 48 ++++++- geyser-plugin/src/server/accounts.rs | 18 ++- 4 files changed, 191 insertions(+), 10 deletions(-) diff --git a/geyser-plugin/src/config.rs b/geyser-plugin/src/config.rs index 66e40a1..9dca5cf 100644 --- a/geyser-plugin/src/config.rs +++ b/geyser-plugin/src/config.rs @@ -193,10 +193,40 @@ impl Config { }); } - if self.plugin.local_rpc_url.trim().is_empty() { + let trimmed_local_rpc_url = self.plugin.local_rpc_url.trim(); + if trimmed_local_rpc_url.is_empty() { return Err(GeyserPluginError::ConfigFileReadError { - msg: "missing required config field `plugin.local_rpc_url`" - .to_owned(), + msg: + "invalid config field `plugin.local_rpc_url`: URL must not be empty" + .to_owned(), + }); + } + + let parsed_local_rpc_url = + Url::parse(trimmed_local_rpc_url).map_err(|error| { + GeyserPluginError::ConfigFileReadError { + msg: format!( + "invalid config field `plugin.local_rpc_url`: {error}" + ), + } + })?; + + match parsed_local_rpc_url.scheme() { + "http" | "https" => {} + scheme => { + return Err(GeyserPluginError::ConfigFileReadError { + msg: format!( + "invalid config field `plugin.local_rpc_url`: unsupported scheme `{scheme}`" + ), + }); + } + } + + if !parsed_local_rpc_url.has_host() { + return Err(GeyserPluginError::ConfigFileReadError { + msg: + "invalid config field `plugin.local_rpc_url`: host is required" + .to_owned(), }); } @@ -716,6 +746,92 @@ admin = "127.0.0.1:8080" assert!(error.contains("must start with an ASCII letter")); } + #[test] + fn test_rejects_empty_local_rpc_url() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[plugin] +local_rpc_url = " " +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `plugin.local_rpc_url`")); + assert!(error.contains("URL must not be empty")); + } + + #[test] + fn test_rejects_local_rpc_url_without_scheme() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[plugin] +local_rpc_url = "127.0.0.1:8899" +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `plugin.local_rpc_url`")); + assert!(error.contains("relative URL without a base")); + } + + #[test] + fn test_rejects_local_rpc_url_with_unsupported_scheme() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[plugin] +local_rpc_url = "ftp://127.0.0.1:8899" +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `plugin.local_rpc_url`")); + assert!(error.contains("unsupported scheme `ftp`")); + } + + #[test] + fn test_rejects_local_rpc_url_without_host() { + let error = parse_config( + r#" +libpath = "target/release/libsolana_accountsdb_plugin_kafka.so" + +[kafka] +bootstrap_servers = "localhost:9092" +topic = "solana.testnet.account_updates" + +[plugin] +local_rpc_url = "http://:8899" +admin = "127.0.0.1:8080" +"#, + ) + .unwrap_err(); + + assert!(error.contains("invalid config field `plugin.local_rpc_url`")); + assert!( + error.contains("empty host") || error.contains("host is required") + ); + } + #[test] fn test_passes_through_kafka_client_overrides() { let config = parse_config( diff --git a/geyser-plugin/src/initial_account_backfill/mod.rs b/geyser-plugin/src/initial_account_backfill/mod.rs index 888abb0..7784fee 100644 --- a/geyser-plugin/src/initial_account_backfill/mod.rs +++ b/geyser-plugin/src/initial_account_backfill/mod.rs @@ -55,9 +55,10 @@ impl InitialAccountBackfill { update_account_topic, subscriptions, client: RpcClient::new_with_commitment( - local_rpc_url, + local_rpc_url.clone(), CommitmentConfig::confirmed(), ), + local_rpc_url, }); let handle = InitialAccountBackfillHandle { inner: inner.clone(), @@ -96,6 +97,7 @@ impl InitialAccountBackfill { String::new(), CommitmentConfig::confirmed(), ), + local_rpc_url: String::new(), }); Self { handle: InitialAccountBackfillHandle { inner }, @@ -254,11 +256,17 @@ struct InitialAccountBackfillInner { update_account_topic: Arc, subscriptions: AccountSubscriptions, client: RpcClient, + local_rpc_url: String, } impl InitialAccountBackfillInner { async fn process_request(&self, pubkeys: &[[u8; 32]]) { - match rpc::fetch_account_events_for_request(&self.client, pubkeys).await + match rpc::fetch_account_events_for_request( + &self.client, + &self.local_rpc_url, + pubkeys, + ) + .await { Ok(events) => { info!( @@ -399,6 +407,7 @@ mod tests { String::new(), CommitmentConfig::confirmed(), ), + local_rpc_url: String::new(), }), rx, ) diff --git a/geyser-plugin/src/initial_account_backfill/rpc.rs b/geyser-plugin/src/initial_account_backfill/rpc.rs index 2477ecc..a8de068 100644 --- a/geyser-plugin/src/initial_account_backfill/rpc.rs +++ b/geyser-plugin/src/initial_account_backfill/rpc.rs @@ -23,17 +23,22 @@ use { pub(crate) async fn fetch_account_events_for_request( client: &RpcClient, + local_rpc_url: &str, pubkeys: &[[u8; 32]], ) -> io::Result> { let mut events = Vec::with_capacity(pubkeys.len()); for chunk in pubkeys.chunks(INITIAL_BACKFILL_MAX_RPC_KEYS_PER_REQUEST) { - events.extend(fetch_account_events_for_chunk(client, chunk).await?); + events.extend( + fetch_account_events_for_chunk(client, local_rpc_url, chunk) + .await?, + ); } Ok(events) } async fn fetch_account_events_for_chunk( client: &RpcClient, + local_rpc_url: &str, pubkeys: &[[u8; 32]], ) -> io::Result> { let keys = pubkeys @@ -106,9 +111,10 @@ async fn fetch_account_events_for_chunk( .with_label_values(&["failed"]) .inc(); warn!( - "Initial account backfill RPC request failed for {} pubkeys, \ + "Initial account backfill RPC request failed for {} pubkeys via {}, \ attempt={}/{}: {error}", pubkeys.len(), + local_rpc_url, attempt, INITIAL_BACKFILL_MAX_ATTEMPTS ); @@ -123,7 +129,24 @@ async fn fetch_account_events_for_chunk( } } - Err(io::Error::other(last_error.unwrap())) + let last_error_message = last_error + .map(|error| error.to_string()) + .unwrap_or_else(|| "unknown error".to_owned()); + Err(io::Error::other(format_exhausted_error( + local_rpc_url, + INITIAL_BACKFILL_MAX_ATTEMPTS, + &last_error_message, + ))) +} + +pub(crate) fn format_exhausted_error( + local_rpc_url: &str, + max_attempts: usize, + last_error_message: &str, +) -> String { + format!( + "initial account backfill RPC failed after {max_attempts} attempts via {local_rpc_url}: {last_error_message}" + ) } pub(crate) const SYSTEM_PROGRAM_ID: Pubkey = @@ -169,3 +192,22 @@ pub(crate) fn map_missing_account( account_age: 0, } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_exhausted_error_includes_url_and_attempts() { + let message = format_exhausted_error( + "http://127.0.0.1:8899", + INITIAL_BACKFILL_MAX_ATTEMPTS, + "connection refused", + ); + + assert!(message.starts_with(&format!( + "initial account backfill RPC failed after {INITIAL_BACKFILL_MAX_ATTEMPTS} attempts via http://127.0.0.1:8899: " + ))); + assert!(message.contains("connection refused")); + } +} diff --git a/geyser-plugin/src/server/accounts.rs b/geyser-plugin/src/server/accounts.rs index c13878a..5b5e11f 100644 --- a/geyser-plugin/src/server/accounts.rs +++ b/geyser-plugin/src/server/accounts.rs @@ -190,15 +190,29 @@ pub async fn handle_post_accounts( } Err(AddAccountsError::QueueFull(outcome)) => json_response( StatusCode::SERVICE_UNAVAILABLE, - &AccountsResponse::from(outcome), + &BackfillUnavailableResponse { + error: "initial account backfill queue is full; retry the request after the validator RPC is available" + .to_owned(), + accounts: AccountsResponse::from(outcome), + }, ), Err(AddAccountsError::BackfillUnavailable(outcome)) => json_response( StatusCode::INTERNAL_SERVER_ERROR, - &AccountsResponse::from(outcome), + &BackfillUnavailableResponse { + error: "initial account backfill enqueue failed; retry the request after the validator RPC is available" + .to_owned(), + accounts: AccountsResponse::from(outcome), + }, ), } } +#[derive(serde::Serialize)] +struct BackfillUnavailableResponse { + error: String, + accounts: AccountsResponse, +} + impl From for AccountsResponse { fn from(outcome: AddAccountsOutcome) -> Self { Self { From c38d0c855fec315cf36b79734cb6ffc30ba7bddf Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Thu, 14 May 2026 11:15:19 +0800 Subject: [PATCH 66/68] docs: document safe-start startup checks --- geyser-plugin/README.md | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/geyser-plugin/README.md b/geyser-plugin/README.md index 467584a..7257a20 100644 --- a/geyser-plugin/README.md +++ b/geyser-plugin/README.md @@ -123,6 +123,65 @@ admin = "127.0.0.1:3000" `kafka.bootstrap_servers`, `kafka.topic`, `plugin.local_rpc_url`, and `plugin.admin` are required. The `admin` bind address serves `POST /filters/accounts` and, when `plugin.metrics` is `true`, also `GET /metrics`. If `ksql.url` is set, it must be a valid absolute `http` or `https` base URL and startup will fail if the restore query cannot complete. Legacy filter arrays and legacy transaction, slot-status, block, and wrapping options are rejected during config parsing. +## Startup checks + +Startup checks run from `KafkaPlugin::on_load` during normal validator/plugin startup. They run whether the validator is launched through `make geyser-plugin-launch` or directly with `solana-test-validator --geyser-plugin-config ...`; no separate preflight binary or Makefile target is required. + +Startup validates: + +- validator JSON wrapper +- runtime TOML config +- plugin library path +- admin bind address +- ksqlDB startup restore when `ksql.url` is configured +- Kafka bootstrap/topic readiness +- local RPC URL syntax only, not local RPC liveness + +The local RPC endpoint is not required to be reachable during startup checks because it belongs to the validator being launched. + +### Safe-start manual test matrix + +| Scenario | Temporary change | Expected prefix | +| --- | --- | --- | +| malformed validator JSON | invalid JSON in wrapper copy | `ERROR config startup check failed` | +| missing runtime TOML | JSON `config_file` points to missing TOML | `ERROR config startup check failed` | +| malformed runtime TOML | invalid TOML in runtime copy | `ERROR config startup check failed` | +| missing Kafka bootstrap | empty `kafka.bootstrap_servers` | `ERROR config startup check failed` | +| Kafka down | no Kafka on configured bootstrap | `ERROR kafka startup check failed` | +| ksqlDB down | `ksql.url = "http://127.0.0.1:1"` | `ERROR ksql startup check failed` | +| invalid ksql table | `table = "bad-name"` | `ERROR config startup check failed` | +| admin port in use | keep listener on configured port | `ERROR admin startup check failed` | +| malformed local RPC URL | `local_rpc_url = "127.0.0.1:8899"` | `ERROR config startup check failed` | + +Proof command for the original issue: + +```shell +make geyser-plugin-launch +``` + +Expected with no dependencies running: + +- exits non-zero +- prints a Kafka startup check error with action `make kafka-ready` +- validator/plugin startup exits gracefully +- does not print `Segmentation fault` + +Direct validator path that must receive the same checks: + +```shell +cd geyser-plugin +solana-test-validator --log --reset --geyser-plugin-config plugin-config.json +``` + +Expected failures and messages must match the Makefile path because the checks live in `KafkaPlugin::on_load`. + +Success path: + +```shell +make kafka-ready +make geyser-plugin-launch +``` + ## Whitelist Management Account inclusion is managed through the HTTP API: From 458884b506288b70c4558c3078f676c46272c255 Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Thu, 14 May 2026 11:23:21 +0800 Subject: [PATCH 67/68] fix: complete safe-start verification cleanup --- geyser-plugin/README.md | 2 +- geyser-plugin/src/plugin/mod.rs | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/geyser-plugin/README.md b/geyser-plugin/README.md index 7257a20..096291a 100644 --- a/geyser-plugin/README.md +++ b/geyser-plugin/README.md @@ -143,7 +143,7 @@ The local RPC endpoint is not required to be reachable during startup checks bec | Scenario | Temporary change | Expected prefix | | --- | --- | --- | -| malformed validator JSON | invalid JSON in wrapper copy | `ERROR config startup check failed` | +| malformed validator JSON | invalid JSON in wrapper copy | Agave rejects the wrapper before plugin load with `FailedToLoadPlugin`; no segfault | | missing runtime TOML | JSON `config_file` points to missing TOML | `ERROR config startup check failed` | | malformed runtime TOML | invalid TOML in runtime copy | `ERROR config startup check failed` | | missing Kafka bootstrap | empty `kafka.bootstrap_servers` | `ERROR config startup check failed` | diff --git a/geyser-plugin/src/plugin/mod.rs b/geyser-plugin/src/plugin/mod.rs index 0313a29..f3aa774 100644 --- a/geyser-plugin/src/plugin/mod.rs +++ b/geyser-plugin/src/plugin/mod.rs @@ -89,6 +89,9 @@ impl GeyserPlugin for KafkaPlugin { self.name(), config_file ); + let (version_n, version_s) = get_rdkafka_version(); + info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); + let loaded = crate::preflight::run_static_startup_checks(config_file) .map_err(startup_error_to_plugin_error)?; let config = loaded.config; @@ -96,13 +99,12 @@ impl GeyserPlugin for KafkaPlugin { crate::preflight::check_admin_bind(&config) .map_err(startup_error_to_plugin_error)?; - let (version_n, version_s) = get_rdkafka_version(); - info!("rd_kafka_version: {:#08x}, {}", version_n, version_s); - - let prefetched_restore = Self::fetch_tracking_from_ksql(&config)?; - crate::preflight::check_kafka_readiness(&config) .map_err(startup_error_to_plugin_error)?; + crate::preflight::check_ksql_readiness(&config) + .map_err(startup_error_to_plugin_error)?; + + let prefetched_restore = Self::fetch_tracking_from_ksql(&config)?; let producer_config = build_producer_config(&config); let producer = @@ -319,10 +321,7 @@ fn startup_error_to_plugin_error( error: crate::preflight::StartupError, ) -> PluginError { error!("{error}"); - if error.subsystem == "kafka" { - std::process::exit(1); - } - PluginError::Custom(Box::new(std::io::Error::other(error.to_string()))) + std::process::exit(1); } #[derive(Clone, Debug, Default, PartialEq, Eq)] From bcded8a6fb6c6ae784794bf0b020f763f3c158dd Mon Sep 17 00:00:00 2001 From: Thorsten Lorenz Date: Thu, 14 May 2026 11:37:59 +0800 Subject: [PATCH 68/68] fix: generalize preflight error actions --- geyser-plugin/README.md | 2 +- geyser-plugin/src/preflight.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/geyser-plugin/README.md b/geyser-plugin/README.md index 096291a..3d90e9e 100644 --- a/geyser-plugin/README.md +++ b/geyser-plugin/README.md @@ -162,7 +162,7 @@ make geyser-plugin-launch Expected with no dependencies running: - exits non-zero -- prints a Kafka startup check error with action `make kafka-ready` +- prints a Kafka startup check error with action `ensure Kafka is reachable at kafka.bootstrap_servers or update kafka.bootstrap_servers` - validator/plugin startup exits gracefully - does not print `Segmentation fault` diff --git a/geyser-plugin/src/preflight.rs b/geyser-plugin/src/preflight.rs index 2d2a268..2862bbf 100644 --- a/geyser-plugin/src/preflight.rs +++ b/geyser-plugin/src/preflight.rs @@ -92,7 +92,7 @@ pub(crate) fn run_static_startup_checks( Some("libpath"), Some(paths.libpath.display().to_string()), "plugin shared library does not exist", - "run make geyser-plugin-build or update libpath in the validator JSON wrapper", + "build the plugin shared library or update libpath in the validator JSON wrapper", )); } Ok(loaded) @@ -128,7 +128,7 @@ pub fn check_ksql_readiness( Some("ksql.url"), Some(url.to_owned()), format!("failed to run startup restore query: {error}"), - "start ksqlDB with make kafka-ready, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", + "ensure ksqlDB is reachable at ksql.url, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", ) })?; @@ -138,7 +138,7 @@ pub fn check_ksql_readiness( Some("ksql.url"), Some(url.to_owned()), format!("failed to run startup restore query: {error}"), - "start ksqlDB with make kafka-ready, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", + "ensure ksqlDB is reachable at ksql.url, fix ksql.url/ksql.table, or remove ksql.url to disable startup restore", ) })?; @@ -180,7 +180,7 @@ pub fn check_kafka_readiness( format!( "failed to create kafka producer for readiness check: {error}" ), - "fix kafka.bootstrap_servers or kafka.client settings in geyser-plugin/plugin-config.toml", + "fix kafka.bootstrap_servers or kafka.client settings", ) })?; @@ -196,7 +196,7 @@ pub fn check_kafka_readiness( Some("kafka.bootstrap_servers"), Some(config.kafka.bootstrap_servers.clone()), format!("failed to fetch kafka metadata: {error}"), - "start Kafka with make kafka-ready or update kafka.bootstrap_servers in geyser-plugin/plugin-config.toml", + "ensure Kafka is reachable at kafka.bootstrap_servers or update kafka.bootstrap_servers", )); } }; @@ -533,7 +533,7 @@ admin = "127.0.0.1:8080" .ends_with("missing-plugin.so") ); assert_eq!(error.cause, "plugin shared library does not exist"); - assert!(error.action.contains("make geyser-plugin-build")); + assert!(error.action.contains("build the plugin shared library")); fs::remove_dir_all(&base).unwrap(); }