diff --git a/.claude/skills/signet-storage/SKILL.md b/.claude/skills/signet-storage/SKILL.md index 119e3a2..daf8048 100644 --- a/.claude/skills/signet-storage/SKILL.md +++ b/.claude/skills/signet-storage/SKILL.md @@ -178,8 +178,9 @@ ColdStorageReadHandle -- Read-only handle - **`HistoryError`** -- `NonContiguousBlock`, `ParentHashMismatch`, `DbNotEmpty`, `EmptyRange`, `NoBlocks`, `HeightOutOfRange`, `Db(E)`, `IntList`. -- **`ColdStorageError`** -- `Backend(E)`, `NotFound`, `Cancelled`, - `Backpressure`, `TaskTerminated`. +- **`ColdStorageError`** -- `Backend(E)`, `NotFound`, `TooManyLogs`, + `DeadlineExceeded`, `StreamDeadlineExceeded`, `ReorgDetected`, + `TaskTerminated`. - **`StorageError`** -- `Hot(HistoryError)` | `Cold(ColdStorageError)`. diff --git a/CLAUDE.md b/CLAUDE.md index 4a7409c..f98bf99 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -21,7 +21,7 @@ indexed by block. Uses task-based async pattern with handles. ## Key Traits -- `ColdStorage`: Backend interface for cold storage +- `ColdStorageBackend`: Backend interface for cold storage - `HotKv`, `HotKvRead`, `HotKvWrite`: Hot storage abstractions - `HistoryRead`, `HistoryWrite`: Higher-level table operations diff --git a/Cargo.toml b/Cargo.toml index be09d71..fcad954 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,5 +74,6 @@ tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["rt"] } itertools = "0.14" lru = "0.16" +metrics = "0.24.2" sqlx = { version = "0.8", default-features = false } tracing = "0.1.44" diff --git a/crates/cold-mdbx/src/backend.rs b/crates/cold-mdbx/src/backend.rs index 4dfaf97..21be1e8 100644 --- a/crates/cold-mdbx/src/backend.rs +++ b/crates/cold-mdbx/src/backend.rs @@ -1,4 +1,4 @@ -//! MDBX backend implementation for [`ColdStorage`]. +//! MDBX backend implementation for [`ColdStorageBackend`]. //! //! This module provides an MDBX-based implementation of the cold storage //! backend. It uses the table definitions from this crate and the MDBX @@ -10,7 +10,7 @@ use crate::{ }; use alloy::{consensus::transaction::Recovered, primitives::BlockNumber}; use signet_cold::{ - BlockData, ColdReceipt, ColdResult, ColdStorage, ColdStorageError, ColdStorageRead, + BlockData, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageError, ColdStorageRead, ColdStorageWrite, Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, }; @@ -24,7 +24,15 @@ use signet_storage_types::{ ConfirmationMeta, DbSignetEvent, DbZenithHeader, IndexedReceipt, RecoveredTx, SealedHeader, TransactionSigned, TxLocation, }; -use std::path::Path; +use std::{ + path::Path, + time::{Duration, Instant}, +}; + +/// Default read deadline for MDBX read operations. +pub(crate) const DEFAULT_READ_TIMEOUT: Duration = Duration::from_millis(500); +/// Default advisory write deadline for MDBX write operations. +pub(crate) const DEFAULT_WRITE_TIMEOUT: Duration = Duration::from_secs(2); /// Write a single block's data into an open read-write transaction. /// @@ -117,7 +125,7 @@ fn produce_log_stream_blocking( for block_num in from..=to { // Check the deadline before starting each block so we // don't begin reading after the caller's timeout. - if std::time::Instant::now() > deadline { + if Instant::now() > deadline { let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded)); return; } @@ -139,6 +147,12 @@ fn produce_log_stream_blocking( let mut block_count = 0usize; for result in iter { + // Per-receipt deadline check bounds iteration cost across + // blocks with many receipts. + if Instant::now() > deadline { + let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded)); + return; + } let (tx_idx, ir): (u64, IndexedReceipt) = try_stream!(sender, result); let tx_hash = ir.tx_hash; let first_log_index = ir.first_log_index; @@ -146,6 +160,15 @@ fn produce_log_stream_blocking( if !filter.matches(&log) { continue; } + // Per-log deadline check bounds stall time when the + // consumer is slow: `blocking_send` parks this thread, + // so without this check a single block with thousands + // of matching logs can run arbitrarily past the + // deadline. + if Instant::now() > deadline { + let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded)); + return; + } // Enforce the global log limit across all blocks. block_count += 1; if block_count > remaining { @@ -183,12 +206,42 @@ fn produce_log_stream_blocking( /// MDBX-based cold storage backend. /// /// This backend stores historical blockchain data in an MDBX database. -/// It implements the [`ColdStorage`] trait for use with the cold storage +/// It implements the [`ColdStorageBackend`] trait for use with the cold storage /// task runner. +/// +/// # Timeout semantics +/// +/// - **Iterator reads** (`get_logs`, `get_signet_events` range, +/// `get_zenith_headers` range, `produce_log_stream`) enforce +/// `read_timeout` via in-body `Instant::now()` checks between +/// iteration steps. See [`with_read_timeout`](Self::with_read_timeout). +/// - **Point lookups** (`get_header`, `get_transaction`, `get_receipt`, +/// `get_zenith_header`, `get_latest_block`, and the per-block +/// `*_in_block` helpers) do **NOT** enforce a wall-clock deadline. +/// MDBX point reads are expected to be sub-millisecond on hot pages, +/// but may block on disk I/O for cold pages or while an adjacent +/// writer performs a page split. An unbounded stall here ties up one +/// `spawn_blocking` worker AND one `read_sem` permit on the handle +/// side — the handle does not wrap these calls in +/// `tokio::time::timeout`. Callers that need fail-fast behavior on +/// stuck I/O should apply their own timeout at the call site. +/// - **Writes** (`append_block`, `append_blocks`, `truncate_above`, +/// `drain_above`) are uninterruptible MDBX commits. The handle +/// measures end-to-end latency (including `write_sem` wait and the +/// read drain) against `write_timeout` and emits a `tracing::warn!` +/// on overrun via the `ColdStorageBackend::write_timeout` accessor; +/// `write_timeout` is an SLO/alerting signal only, not a hard abort. #[derive(Clone)] pub struct MdbxColdBackend { /// The MDBX environment. env: DatabaseEnv, + /// Wall-clock deadline for iterator read operations; checked between + /// per-block (and per-receipt / per-event) iteration steps. Point + /// lookups do NOT consult this deadline — see the type-level docs. + read_timeout: Duration, + /// Advisory deadline for write operations. Writes that exceed this are + /// logged via `tracing::warn!` but still report success. + write_timeout: Duration, } impl std::fmt::Debug for MdbxColdBackend { @@ -200,7 +253,42 @@ impl std::fmt::Debug for MdbxColdBackend { impl MdbxColdBackend { /// Create a new backend from an existing MDBX environment. const fn from_env(env: DatabaseEnv) -> Self { - Self { env } + Self { env, read_timeout: DEFAULT_READ_TIMEOUT, write_timeout: DEFAULT_WRITE_TIMEOUT } + } + + /// Set the read deadline for iterator reads. + /// + /// Applied between per-block and per-item iteration steps on iterator + /// reads (`get_logs`, range queries, `produce_log_stream`). Point + /// lookups (`get_header`, `get_transaction`, etc.) do NOT consult + /// this deadline — see the type-level docs on [`MdbxColdBackend`] + /// for the exemption rationale and its operational implications. + /// + /// # Panics + /// + /// Panics if `read_timeout` is zero — a zero deadline is a + /// configuration mistake, not a "disable" signal, and the trait + /// contract requires a real bound. + #[must_use] + pub fn with_read_timeout(mut self, read_timeout: Duration) -> Self { + assert!(!read_timeout.is_zero(), "read_timeout must be non-zero"); + self.read_timeout = read_timeout; + self + } + + /// Set the advisory write deadline. Writes exceeding this threshold + /// emit a `tracing::warn!` but still report success to the caller; + /// MDBX commits are uninterruptible. + /// + /// # Panics + /// + /// Panics if `write_timeout` is zero. See + /// [`with_read_timeout`](Self::with_read_timeout). + #[must_use] + pub fn with_write_timeout(mut self, write_timeout: Duration) -> Self { + assert!(!write_timeout.is_zero(), "write_timeout must be non-zero"); + self.write_timeout = write_timeout; + self } /// Open an existing MDBX cold storage database in read-only mode. @@ -272,10 +360,10 @@ impl MdbxColdBackend { } fn get_header_inner( - &self, + env: &DatabaseEnv, spec: HeaderSpecifier, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let block_num = match spec { HeaderSpecifier::Number(n) => n, HeaderSpecifier::Hash(h) => { @@ -289,31 +377,35 @@ impl MdbxColdBackend { } fn get_headers_inner( - &self, + env: &DatabaseEnv, specs: Vec, + deadline: Instant, + read_timeout: Duration, ) -> Result>, MdbxColdError> { - let tx = self.env.tx()?; - specs - .into_iter() - .map(|spec| { - let block_num = match spec { - HeaderSpecifier::Number(n) => Some(n), - HeaderSpecifier::Hash(h) => tx.traverse::()?.exact(&h)?, - }; - block_num - .map(|n| tx.traverse::()?.exact(&n)) - .transpose() - .map(Option::flatten) - }) - .collect::>() - .map_err(Into::into) + let tx = env.tx()?; + let mut out = Vec::with_capacity(specs.len()); + for spec in specs { + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } + let block_num = match spec { + HeaderSpecifier::Number(n) => Some(n), + HeaderSpecifier::Hash(h) => tx.traverse::()?.exact(&h)?, + }; + let header = match block_num { + Some(n) => tx.traverse::()?.exact(&n)?, + None => None, + }; + out.push(header); + } + Ok(out) } fn get_transaction_inner( - &self, + env: &DatabaseEnv, spec: TransactionSpecifier, ) -> Result>, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let (block, index) = match spec { TransactionSpecifier::Hash(h) => { let Some(loc) = tx.traverse::()?.exact(&h)? else { @@ -346,10 +438,10 @@ impl MdbxColdBackend { } fn get_receipt_inner( - &self, + env: &DatabaseEnv, spec: ReceiptSpecifier, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let (block, index) = match spec { ReceiptSpecifier::TxHash(h) => { let Some(loc) = tx.traverse::()?.exact(&h)? else { @@ -369,18 +461,18 @@ impl MdbxColdBackend { } fn get_zenith_header_by_number( - &self, + env: &DatabaseEnv, block: BlockNumber, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; Ok(tx.traverse::()?.exact(&block)?) } fn collect_transactions_in_block( - &self, + env: &DatabaseEnv, block: BlockNumber, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; tx.traverse_dual::()? .iter_k2(&block)? .zip(tx.traverse_dual::()?.iter_k2(&block)?) @@ -393,8 +485,11 @@ impl MdbxColdBackend { .collect() } - fn count_transactions_in_block(&self, block: BlockNumber) -> Result { - let tx = self.env.tx()?; + fn count_transactions_in_block( + env: &DatabaseEnv, + block: BlockNumber, + ) -> Result { + let tx = env.tx()?; let mut count = 0u64; for item in tx.traverse_dual::()?.iter_k2(&block)? { item?; @@ -404,10 +499,10 @@ impl MdbxColdBackend { } fn collect_receipts_in_block( - &self, + env: &DatabaseEnv, block: BlockNumber, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let Some(sealed) = tx.traverse::()?.exact(&block)? else { return Ok(Vec::new()); }; @@ -421,10 +516,10 @@ impl MdbxColdBackend { } fn collect_signet_events_in_block( - &self, + env: &DatabaseEnv, block: BlockNumber, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; tx.traverse_dual::()? .iter_k2(&block)? .map(|item| item.map(|(_, v)| v)) @@ -433,15 +528,25 @@ impl MdbxColdBackend { } fn collect_signet_events_in_range( - &self, + env: &DatabaseEnv, start: BlockNumber, end: BlockNumber, + deadline: Instant, + read_timeout: Duration, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let mut cursor = tx.traverse_dual::()?; let mut events = Vec::new(); for block in start..=end { + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } for item in cursor.iter_k2(&block)? { + // Per-event deadline check so a single block with many + // events cannot run past the deadline. + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } events.push(item?.1); } } @@ -449,11 +554,13 @@ impl MdbxColdBackend { } fn collect_zenith_headers_in_range( - &self, + env: &DatabaseEnv, start: BlockNumber, end: BlockNumber, + deadline: Instant, + read_timeout: Duration, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let mut cursor = tx.new_cursor::()?; let mut headers = Vec::new(); @@ -470,6 +577,9 @@ impl MdbxColdBackend { } while let Some((key, value)) = cursor.read_next()? { + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } let block_num = BlockNumber::decode_key(&key)?; if block_num > end { break; @@ -583,11 +693,13 @@ impl MdbxColdBackend { } fn get_logs_inner( - &self, + env: &DatabaseEnv, filter: &Filter, max_logs: usize, + deadline: Instant, + read_timeout: Duration, ) -> Result, MdbxColdError> { - let tx = self.env.tx()?; + let tx = env.tx()?; let mut results = Vec::new(); let from = filter.get_from_block().unwrap_or(0); @@ -606,6 +718,9 @@ impl MdbxColdBackend { let mut receipt_cursor = tx.traverse_dual::()?; for block_num in from..=to { + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } let Some(sealed) = header_cursor.exact(&block_num)? else { continue; }; @@ -613,6 +728,11 @@ impl MdbxColdBackend { let block_timestamp = sealed.timestamp; for item in receipt_cursor.iter_k2(&block_num)? { + // Per-receipt deadline check so a single block with + // thousands of receipts cannot run past the deadline. + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } let (tx_idx, ir) = item?; let tx_hash = ir.tx_hash; let first_log_index = ir.first_log_index; @@ -620,6 +740,13 @@ impl MdbxColdBackend { if !filter.matches(&log) { continue; } + // Per-log deadline check: a single receipt with + // thousands of matching logs would otherwise run + // unchecked past the deadline. Mirrors the + // streaming path in `produce_log_stream_blocking`. + if Instant::now() > deadline { + return Err(MdbxColdError::Timeout(read_timeout)); + } if results.len() >= max_logs { return Err(MdbxColdError::TooManyLogs(max_logs)); } @@ -643,76 +770,128 @@ impl MdbxColdBackend { impl ColdStorageRead for MdbxColdBackend { async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { - Ok(self.get_header_inner(spec)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::get_header_inner(&env, spec)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_headers( &self, specs: Vec, ) -> ColdResult>> { - Ok(self.get_headers_inner(specs)?) + let env = self.env.clone(); + let read_timeout = self.read_timeout; + tokio::task::spawn_blocking(move || { + let deadline = Instant::now() + read_timeout; + Self::get_headers_inner(&env, specs, deadline, read_timeout) + }) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_transaction( &self, spec: TransactionSpecifier, ) -> ColdResult>> { - Ok(self.get_transaction_inner(spec)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::get_transaction_inner(&env, spec)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_transactions_in_block(&self, block: BlockNumber) -> ColdResult> { - Ok(self.collect_transactions_in_block(block)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::collect_transactions_in_block(&env, block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { - Ok(self.count_transactions_in_block(block)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::count_transactions_in_block(&env, block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { - Ok(self.get_receipt_inner(spec)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::get_receipt_inner(&env, spec)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { - Ok(self.collect_receipts_in_block(block)?) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || Self::collect_receipts_in_block(&env, block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_signet_events( &self, spec: SignetEventsSpecifier, ) -> ColdResult> { - let events = match spec { - SignetEventsSpecifier::Block(block) => self.collect_signet_events_in_block(block)?, - SignetEventsSpecifier::BlockRange { start, end } => { - self.collect_signet_events_in_range(start, end)? + let env = self.env.clone(); + let read_timeout = self.read_timeout; + tokio::task::spawn_blocking(move || { + let deadline = Instant::now() + read_timeout; + match spec { + SignetEventsSpecifier::Block(block) => { + Self::collect_signet_events_in_block(&env, block) + } + SignetEventsSpecifier::BlockRange { start, end } => { + Self::collect_signet_events_in_range(&env, start, end, deadline, read_timeout) + } } - }; - Ok(events) + }) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_zenith_header( &self, spec: ZenithHeaderSpecifier, ) -> ColdResult> { + let env = self.env.clone(); let block = match spec { ZenithHeaderSpecifier::Number(n) => n, ZenithHeaderSpecifier::Range { start, .. } => start, }; - Ok(self.get_zenith_header_by_number(block)?) + tokio::task::spawn_blocking(move || Self::get_zenith_header_by_number(&env, block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_zenith_headers( &self, spec: ZenithHeaderSpecifier, ) -> ColdResult> { - let headers = match spec { - ZenithHeaderSpecifier::Number(n) => { - self.get_zenith_header_by_number(n)?.into_iter().collect() - } - ZenithHeaderSpecifier::Range { start, end } => { - self.collect_zenith_headers_in_range(start, end)? + let env = self.env.clone(); + let read_timeout = self.read_timeout; + tokio::task::spawn_blocking(move || { + let deadline = Instant::now() + read_timeout; + match spec { + ZenithHeaderSpecifier::Number(n) => { + Ok(Self::get_zenith_header_by_number(&env, n)?.into_iter().collect()) + } + ZenithHeaderSpecifier::Range { start, end } => { + Self::collect_zenith_headers_in_range(&env, start, end, deadline, read_timeout) + } } - }; - Ok(headers) + }) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn get_logs( @@ -720,13 +899,22 @@ impl ColdStorageRead for MdbxColdBackend { filter: &Filter, max_logs: usize, ) -> ColdResult> { - Ok(self.get_logs_inner(filter, max_logs)?) + let env = self.env.clone(); + let read_timeout = self.read_timeout; + let filter = filter.clone(); + tokio::task::spawn_blocking(move || { + let deadline = Instant::now() + read_timeout; + Self::get_logs_inner(&env, &filter, max_logs, deadline, read_timeout) + }) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } async fn produce_log_stream(&self, filter: &Filter, params: signet_cold::StreamParams) { let env = self.env.clone(); // ENG-2036: clone required to move into spawn_blocking. Eliminating - // this would require changing the ColdStorage trait to pass owned + // this would require changing the ColdStorageBackend trait to pass owned // Filter, which is a cross-crate API change. let filter = filter.clone(); let std_deadline = params.deadline.into_std(); @@ -745,35 +933,63 @@ impl ColdStorageRead for MdbxColdBackend { } async fn get_latest_block(&self) -> ColdResult> { - let tx = self.env.tx().map_err(MdbxColdError::from)?; - let mut cursor = tx.new_cursor::().map_err(MdbxColdError::from)?; - let latest = cursor - .last() - .map_err(MdbxColdError::from)? - .map(|(key, _)| BlockNumber::decode_key(&key)) - .transpose() - .map_err(MdbxColdError::from)?; - Ok(latest) + let env = self.env.clone(); + tokio::task::spawn_blocking(move || -> Result, MdbxColdError> { + let tx = env.tx()?; + let mut cursor = tx.new_cursor::()?; + cursor + .last()? + .map(|(key, _)| BlockNumber::decode_key(&key)) + .transpose() + .map_err(MdbxColdError::from) + }) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } } impl ColdStorageWrite for MdbxColdBackend { - async fn append_block(&mut self, data: BlockData) -> ColdResult<()> { - Ok(self.append_block_inner(data)?) + async fn append_block(&self, data: BlockData) -> ColdResult<()> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.append_block_inner(data)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } - async fn append_blocks(&mut self, data: Vec) -> ColdResult<()> { - Ok(self.append_blocks_inner(data)?) + async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.append_blocks_inner(data)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } - async fn truncate_above(&mut self, block: BlockNumber) -> ColdResult<()> { - Ok(self.truncate_above_inner(block)?) + async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.truncate_above_inner(block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } } -impl ColdStorage for MdbxColdBackend { - async fn drain_above(&mut self, block: BlockNumber) -> ColdResult>> { - Ok(self.drain_above_inner(block)?) +impl ColdStorageBackend for MdbxColdBackend { + fn read_timeout(&self) -> Option { + Some(self.read_timeout) + } + + fn write_timeout(&self) -> Option { + Some(self.write_timeout) + } + + async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.drain_above_inner(block)) + .await + .map_err(|_| ColdStorageError::TaskTerminated)? + .map_err(ColdStorageError::from) } } @@ -783,10 +999,27 @@ mod tests { use signet_cold::conformance::conformance; use tempfile::tempdir; - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn mdbx_backend_conformance() { let dir = tempdir().unwrap(); let backend = MdbxColdBackend::open_rw(dir.path()).unwrap(); conformance(backend).await.unwrap(); } + + /// Regression: writes must not rely on `tokio::task::block_in_place`, + /// which panics on a single-threaded runtime. + #[tokio::test(flavor = "current_thread")] + async fn writes_work_on_current_thread_runtime() { + use signet_cold::{ColdStorageRead, ColdStorageWrite, conformance::make_test_block}; + + let dir = tempdir().unwrap(); + let backend = MdbxColdBackend::open_rw(dir.path()).unwrap(); + + backend.append_block(make_test_block(0)).await.unwrap(); + backend.append_blocks(vec![make_test_block(1), make_test_block(2)]).await.unwrap(); + backend.truncate_above(1).await.unwrap(); + + let latest = backend.get_latest_block().await.unwrap(); + assert_eq!(latest, Some(1)); + } } diff --git a/crates/cold-mdbx/src/connector.rs b/crates/cold-mdbx/src/connector.rs index 6b47e04..9f857cc 100644 --- a/crates/cold-mdbx/src/connector.rs +++ b/crates/cold-mdbx/src/connector.rs @@ -2,11 +2,14 @@ //! //! Unified connector that can open both hot and cold MDBX databases. -use crate::{MdbxColdBackend, MdbxColdError}; +use crate::{ + MdbxColdBackend, MdbxColdError, + backend::{DEFAULT_READ_TIMEOUT, DEFAULT_WRITE_TIMEOUT}, +}; use signet_cold::ColdConnect; use signet_hot::HotConnect; use signet_hot_mdbx::{DatabaseArguments, DatabaseEnv, MdbxError}; -use std::path::PathBuf; +use std::{path::PathBuf, time::Duration}; /// Errors that can occur when initializing MDBX connectors. #[derive(Debug, thiserror::Error)] @@ -46,12 +49,19 @@ pub enum MdbxConnectorError { pub struct MdbxConnector { path: PathBuf, db_args: DatabaseArguments, + read_timeout: Duration, + write_timeout: Duration, } impl MdbxConnector { /// Create a new MDBX connector with default database arguments. pub fn new(path: impl Into) -> Self { - Self { path: path.into(), db_args: DatabaseArguments::new() } + Self { + path: path.into(), + db_args: DatabaseArguments::new(), + read_timeout: DEFAULT_READ_TIMEOUT, + write_timeout: DEFAULT_WRITE_TIMEOUT, + } } /// Set custom database arguments. @@ -64,6 +74,32 @@ impl MdbxConnector { self } + /// Set the read deadline applied to the cold backend produced by + /// [`ColdConnect::connect`]. See [`MdbxColdBackend::with_read_timeout`]. + /// + /// # Panics + /// + /// Panics if `read_timeout` is zero. + #[must_use] + pub fn with_read_timeout(mut self, read_timeout: Duration) -> Self { + assert!(!read_timeout.is_zero(), "read_timeout must be non-zero"); + self.read_timeout = read_timeout; + self + } + + /// Set the advisory write deadline applied to the cold backend produced + /// by [`ColdConnect::connect`]. See [`MdbxColdBackend::with_write_timeout`]. + /// + /// # Panics + /// + /// Panics if `write_timeout` is zero. + #[must_use] + pub fn with_write_timeout(mut self, write_timeout: Duration) -> Self { + assert!(!write_timeout.is_zero(), "write_timeout must be non-zero"); + self.write_timeout = write_timeout; + self + } + /// Get a reference to the path. pub fn path(&self) -> &std::path::Path { &self.path @@ -111,6 +147,11 @@ impl ColdConnect for MdbxConnector { // MDBX open is sync, but wrapped in async for trait consistency // Opens read-write and creates tables let path = self.path.clone(); - async move { MdbxColdBackend::open_rw(&path) } + let read_timeout = self.read_timeout; + let write_timeout = self.write_timeout; + async move { + MdbxColdBackend::open_rw(&path) + .map(|b| b.with_read_timeout(read_timeout).with_write_timeout(write_timeout)) + } } } diff --git a/crates/cold-mdbx/src/error.rs b/crates/cold-mdbx/src/error.rs index 9a160d0..8c9f4af 100644 --- a/crates/cold-mdbx/src/error.rs +++ b/crates/cold-mdbx/src/error.rs @@ -20,12 +20,17 @@ pub enum MdbxColdError { /// Too many logs matched the filter. #[error("too many logs: limit is {0}")] TooManyLogs(usize), + + /// An MDBX read exceeded its configured deadline. + #[error("mdbx read timed out after {0:?}")] + Timeout(std::time::Duration), } impl From for signet_cold::ColdStorageError { fn from(error: MdbxColdError) -> Self { match error { MdbxColdError::TooManyLogs(limit) => Self::TooManyLogs { limit }, + MdbxColdError::Timeout(duration) => Self::DeadlineExceeded(duration), other => Self::Backend(Box::new(other)), } } diff --git a/crates/cold-mdbx/tests/timeout.rs b/crates/cold-mdbx/tests/timeout.rs new file mode 100644 index 0000000..8e1b374 --- /dev/null +++ b/crates/cold-mdbx/tests/timeout.rs @@ -0,0 +1,33 @@ +//! Read timeouts must trip when an iterator exceeds the configured budget. + +use signet_cold::{ColdStorageError, ColdStorageRead, SignetEventsSpecifier}; +use signet_cold_mdbx::MdbxColdBackend; +use std::time::Duration; +use tempfile::TempDir; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn iterator_read_timeout_trips_on_huge_range() { + let dir = TempDir::new().unwrap(); + let backend = + MdbxColdBackend::open_rw(dir.path()).unwrap().with_read_timeout(Duration::from_nanos(1)); + + let res = backend + .get_signet_events(SignetEventsSpecifier::BlockRange { start: 0, end: 1_000_000 }) + .await; + + let err = res.expect_err("should time out"); + assert!( + matches!(err, ColdStorageError::DeadlineExceeded(_)), + "expected DeadlineExceeded, got: {err:?}" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn point_lookup_does_not_check_deadline() { + let dir = TempDir::new().unwrap(); + let backend = + MdbxColdBackend::open_rw(dir.path()).unwrap().with_read_timeout(Duration::from_nanos(1)); + + let res = backend.get_latest_block().await.unwrap(); + assert!(res.is_none()); +} diff --git a/crates/cold-sql/src/backend.rs b/crates/cold-sql/src/backend.rs index 7182914..ae22992 100644 --- a/crates/cold-sql/src/backend.rs +++ b/crates/cold-sql/src/backend.rs @@ -36,7 +36,7 @@ use alloy::{ }, }; use signet_cold::{ - BlockData, ColdReceipt, ColdResult, ColdStorage, ColdStorageError, ColdStorageRead, + BlockData, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageError, ColdStorageRead, ColdStorageWrite, Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, }; @@ -50,7 +50,12 @@ use signet_zenith::{ Zenith, }; use sqlx::{AnyPool, Row}; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, time::Duration}; + +/// Default statement timeout for read transactions (matches MDBX). +pub(crate) const DEFAULT_READ_TIMEOUT: Duration = Duration::from_millis(500); +/// Default statement timeout for write transactions (matches MDBX). +pub(crate) const DEFAULT_WRITE_TIMEOUT: Duration = Duration::from_secs(2); /// SQL-based cold storage backend. /// @@ -75,6 +80,8 @@ use std::collections::BTreeMap; pub struct SqlColdBackend { pool: AnyPool, is_postgres: bool, + read_timeout: Duration, + write_timeout: Duration, } /// Returns `true` if the URL refers to an in-memory SQLite database. @@ -130,7 +137,97 @@ impl SqlColdBackend { sqlx::raw_sql(include_str!("../migrations/002_add_topic_indexes.sql")) .execute(&pool) .await?; - Ok(Self { pool, is_postgres }) + Ok(Self { + pool, + is_postgres, + read_timeout: DEFAULT_READ_TIMEOUT, + write_timeout: DEFAULT_WRITE_TIMEOUT, + }) + } + + /// Override the per-transaction read timeout (default 500 ms). + /// + /// On Postgres this sets `statement_timeout` on every transaction + /// opened by a read method. On SQLite the value is stored but + /// not enforced — SQLite has no equivalent mechanism. + /// + /// # Panics + /// + /// Panics if `d` rounds to 0 ms. Postgres interprets + /// `statement_timeout = 0` as "no timeout", which would silently + /// disable the trait-level mandatory-timeout contract. + #[must_use] + pub fn with_read_timeout(mut self, d: Duration) -> Self { + assert!(d.as_millis() >= 1, "read_timeout must be >= 1ms (got {d:?})"); + self.read_timeout = d; + self + } + + /// Override the per-transaction write timeout (default 2 s). + /// + /// On Postgres this sets `statement_timeout` on every transaction + /// opened by a write method. On SQLite the value is stored but + /// not enforced — SQLite has no equivalent mechanism. + /// + /// # Panics + /// + /// Panics if `d` rounds to 0 ms. See [`with_read_timeout`](Self::with_read_timeout). + #[must_use] + pub fn with_write_timeout(mut self, d: Duration) -> Self { + assert!(d.as_millis() >= 1, "write_timeout must be >= 1ms (got {d:?})"); + self.write_timeout = d; + self + } + + /// Open a transaction for a read operation. + /// + /// On Postgres, issues `SET LOCAL statement_timeout = ` scoped + /// to the transaction. On SQLite the timeout is advisory (there is + /// no equivalent mechanism). + async fn begin_read(&self) -> Result, SqlColdError> { + let tx = self.pool.begin().await?; + self.apply_statement_timeout(tx, self.read_timeout).await + } + + /// Open a transaction for a write operation. + /// + /// Same semantics as [`begin_read`](Self::begin_read) but uses + /// `write_timeout` for the `SET LOCAL`. + async fn begin_write(&self) -> Result, SqlColdError> { + let tx = self.pool.begin().await?; + self.apply_statement_timeout(tx, self.write_timeout).await + } + + /// Run `SELECT pg_sleep($1)` inside a `begin_read` transaction. + /// + /// Intended only for verifying the `statement_timeout` behaviour in + /// integration tests. Returns whatever error the server produces if + /// the sleep trips the configured read timeout. + #[cfg(any(test, feature = "test-utils"))] + pub async fn debug_pg_sleep(&self, d: Duration) -> Result<(), SqlColdError> { + let secs = d.as_secs_f64(); + let mut tx = self.begin_read().await?; + sqlx::query(&format!("SELECT pg_sleep({secs})")).execute(&mut *tx).await?; + tx.commit().await?; + Ok(()) + } + + /// Apply `SET LOCAL statement_timeout` on Postgres; no-op on SQLite. + /// + /// `SET` does not accept bind parameters in Postgres the same way + /// `SELECT` does, so the value is inline-formatted. This is SAFE + /// because `ms` is derived from an internal `Duration` field, never + /// from user input. + async fn apply_statement_timeout<'a>( + &self, + mut tx: sqlx::Transaction<'a, sqlx::Any>, + d: Duration, + ) -> Result, SqlColdError> { + if self.is_postgres { + let ms = i64::try_from(d.as_millis()).unwrap_or(i64::MAX); + sqlx::query(&format!("SET LOCAL statement_timeout = {ms}")).execute(&mut *tx).await?; + } + Ok(tx) } /// Connect to a database URL with explicit pool options. @@ -170,10 +267,12 @@ impl SqlColdBackend { match spec { HeaderSpecifier::Number(n) => Ok(Some(n)), HeaderSpecifier::Hash(hash) => { + let mut tx = self.begin_read().await?; let row = sqlx::query("SELECT block_number FROM headers WHERE block_hash = $1") .bind(hash) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await?; + tx.commit().await?; Ok(row.map(|r| from_i64(r.get::(COL_BLOCK_NUMBER)))) } } @@ -188,10 +287,12 @@ impl SqlColdBackend { block_num: BlockNumber, ) -> Result, SqlColdError> { let bn = to_i64(block_num); + let mut tx = self.begin_read().await?; let row = sqlx::query("SELECT * FROM headers WHERE block_number = $1") .bind(bn) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await?; + tx.commit().await?; row.map(|r| header_from_row(&r).map(|h| h.seal_slow())).transpose() } @@ -201,7 +302,7 @@ impl SqlColdBackend { // ======================================================================== async fn insert_block(&self, data: BlockData) -> Result<(), SqlColdError> { - let mut tx = self.pool.begin().await?; + let mut tx = self.begin_write().await?; write_block_to_tx(&mut tx, data).await?; tx.commit().await?; Ok(()) @@ -240,11 +341,21 @@ impl SqlColdBackend { // Open a REPEATABLE READ transaction so all per-block queries see a // consistent snapshot. This makes reorg detection unnecessary — if a // reorg lands mid-stream the transaction still reads the old data. + // Isolation level must be the first statement in the transaction, + // so we bypass `begin_read`'s `SET LOCAL statement_timeout` here + // and issue both settings ourselves in the correct order. let mut tx = try_stream!(sender, self.pool.begin().await); try_stream!( sender, sqlx::query("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ").execute(&mut *tx).await ); + let read_ms = i64::try_from(self.read_timeout.as_millis()).unwrap_or(i64::MAX); + try_stream!( + sender, + sqlx::query(&format!("SET LOCAL statement_timeout = {read_ms}")) + .execute(&mut *tx) + .await + ); // Build the parameterised query once. $1 is the block number // (bound per iteration); remaining parameters are the address @@ -1016,7 +1127,7 @@ fn build_log_filter_clause(filter: &Filter, start_idx: u32) -> (String, Vec<&[u8 } // ============================================================================ -// ColdStorage implementation +// ColdStorageBackend implementation // ============================================================================ impl ColdStorageRead for SqlColdBackend { @@ -1043,6 +1154,7 @@ impl ColdStorageRead for SqlColdBackend { &self, spec: TransactionSpecifier, ) -> ColdResult>> { + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let row = match spec { TransactionSpecifier::Hash(hash) => sqlx::query( "SELECT t.*, h.block_hash @@ -1051,7 +1163,7 @@ impl ColdStorageRead for SqlColdBackend { WHERE t.tx_hash = $1", ) .bind(hash) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?, TransactionSpecifier::BlockAndIndex { block, index } => sqlx::query( @@ -1062,7 +1174,7 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(to_i64(block)) .bind(to_i64(index)) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?, TransactionSpecifier::BlockHashAndIndex { block_hash, index } => sqlx::query( @@ -1073,10 +1185,11 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(block_hash) .bind(to_i64(index)) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?, }; + tx.commit().await.map_err(SqlColdError::from)?; let Some(r) = row else { return Ok(None); @@ -1092,28 +1205,33 @@ impl ColdStorageRead for SqlColdBackend { async fn get_transactions_in_block(&self, block: BlockNumber) -> ColdResult> { let bn = to_i64(block); + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let rows = sqlx::query("SELECT * FROM transactions WHERE block_number = $1 ORDER BY tx_index") .bind(bn) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; rows.iter().map(|r| recovered_tx_from_row(r).map_err(ColdStorageError::from)).collect() } async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { let bn = to_i64(block); + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let row = sqlx::query("SELECT COUNT(*) as cnt FROM transactions WHERE block_number = $1") .bind(bn) - .fetch_one(&self.pool) + .fetch_one(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; Ok(from_i64(row.get::(COL_CNT))) } async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; // Resolve to (block, index) let (block, index) = match spec { ReceiptSpecifier::TxHash(hash) => { @@ -1121,7 +1239,7 @@ impl ColdStorageRead for SqlColdBackend { "SELECT block_number, tx_index FROM transactions WHERE tx_hash = $1", ) .bind(hash) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?; let Some(r) = row else { return Ok(None) }; @@ -1157,7 +1275,7 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(to_i64(block)) .bind(to_i64(index)) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?; @@ -1183,9 +1301,10 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(to_i64(block)) .bind(to_i64(index)) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; let logs = log_rows.iter().map(log_from_row).collect(); let built = build_receipt(tx_type, success, cumulative_gas_used, logs) @@ -1212,6 +1331,7 @@ impl ColdStorageRead for SqlColdBackend { }; let bn = to_i64(block); + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; // Fetch receipts joined with tx_hash and from_address let receipt_rows = sqlx::query( @@ -1225,16 +1345,17 @@ impl ColdStorageRead for SqlColdBackend { ORDER BY r.tx_index", ) .bind(bn) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)?; let all_log_rows = sqlx::query("SELECT * FROM logs WHERE block_number = $1 ORDER BY tx_index, log_index") .bind(bn) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; // Group logs by tx_index let mut logs_by_tx: BTreeMap> = BTreeMap::new(); @@ -1271,6 +1392,7 @@ impl ColdStorageRead for SqlColdBackend { &self, spec: SignetEventsSpecifier, ) -> ColdResult> { + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let rows = match spec { SignetEventsSpecifier::Block(block) => { let bn = to_i64(block); @@ -1278,7 +1400,7 @@ impl ColdStorageRead for SqlColdBackend { "SELECT * FROM signet_events WHERE block_number = $1 ORDER BY event_index", ) .bind(bn) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)? } @@ -1291,11 +1413,12 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(s) .bind(e) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)? } }; + tx.commit().await.map_err(SqlColdError::from)?; rows.iter().map(|r| signet_event_from_row(r).map_err(ColdStorageError::from)).collect() } @@ -1309,11 +1432,13 @@ impl ColdStorageRead for SqlColdBackend { ZenithHeaderSpecifier::Range { start, .. } => start, }; let bn = to_i64(block); + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let row = sqlx::query("SELECT * FROM zenith_headers WHERE block_number = $1") .bind(bn) - .fetch_optional(&self.pool) + .fetch_optional(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; row.map(|r| zenith_header_from_row(&r)).transpose().map_err(ColdStorageError::from) } @@ -1322,12 +1447,13 @@ impl ColdStorageRead for SqlColdBackend { &self, spec: ZenithHeaderSpecifier, ) -> ColdResult> { + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let rows = match spec { ZenithHeaderSpecifier::Number(n) => { let bn = to_i64(n); sqlx::query("SELECT * FROM zenith_headers WHERE block_number = $1") .bind(bn) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)? } @@ -1340,11 +1466,12 @@ impl ColdStorageRead for SqlColdBackend { ) .bind(s) .bind(e) - .fetch_all(&self.pool) + .fetch_all(&mut *tx) .await .map_err(SqlColdError::from)? } }; + tx.commit().await.map_err(SqlColdError::from)?; rows.iter().map(|r| zenith_header_from_row(r).map_err(ColdStorageError::from)).collect() } @@ -1386,7 +1513,9 @@ impl ColdStorageRead for SqlColdBackend { let limit = max_logs.saturating_add(1).min(i64::MAX as usize); query = query.bind(to_i64(limit as u64)); - let rows = query.fetch_all(&self.pool).await.map_err(SqlColdError::from)?; + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; + let rows = query.fetch_all(&mut *tx).await.map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; if rows.len() > max_logs { return Err(ColdStorageError::TooManyLogs { limit: max_logs }); @@ -1418,21 +1547,23 @@ impl ColdStorageRead for SqlColdBackend { } async fn get_latest_block(&self) -> ColdResult> { + let mut tx = self.begin_read().await.map_err(ColdStorageError::from)?; let row = sqlx::query("SELECT MAX(block_number) as max_bn FROM headers") - .fetch_one(&self.pool) + .fetch_one(&mut *tx) .await .map_err(SqlColdError::from)?; + tx.commit().await.map_err(SqlColdError::from)?; Ok(row.get::, _>(COL_MAX_BN).map(from_i64)) } } impl ColdStorageWrite for SqlColdBackend { - async fn append_block(&mut self, data: BlockData) -> ColdResult<()> { + async fn append_block(&self, data: BlockData) -> ColdResult<()> { self.insert_block(data).await.map_err(ColdStorageError::from) } - async fn append_blocks(&mut self, data: Vec) -> ColdResult<()> { - let mut tx = self.pool.begin().await.map_err(SqlColdError::from)?; + async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + let mut tx = self.begin_write().await.map_err(ColdStorageError::from)?; for block_data in data { write_block_to_tx(&mut tx, block_data).await.map_err(ColdStorageError::from)?; } @@ -1440,9 +1571,9 @@ impl ColdStorageWrite for SqlColdBackend { Ok(()) } - async fn truncate_above(&mut self, block: BlockNumber) -> ColdResult<()> { + async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { let bn = to_i64(block); - let mut tx = self.pool.begin().await.map_err(SqlColdError::from)?; + let mut tx = self.begin_write().await.map_err(ColdStorageError::from)?; delete_above_in_tx(&mut tx, bn).await.map_err(ColdStorageError::from)?; @@ -1451,10 +1582,18 @@ impl ColdStorageWrite for SqlColdBackend { } } -impl ColdStorage for SqlColdBackend { - async fn drain_above(&mut self, block: BlockNumber) -> ColdResult>> { +impl ColdStorageBackend for SqlColdBackend { + fn read_timeout(&self) -> Option { + Some(self.read_timeout) + } + + fn write_timeout(&self) -> Option { + Some(self.write_timeout) + } + + async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { let bn = to_i64(block); - let mut tx = self.pool.begin().await.map_err(SqlColdError::from)?; + let mut tx = self.begin_write().await.map_err(ColdStorageError::from)?; // 1. Fetch all headers above block. let header_rows = diff --git a/crates/cold-sql/src/connector.rs b/crates/cold-sql/src/connector.rs index c38eed2..a5c8237 100644 --- a/crates/cold-sql/src/connector.rs +++ b/crates/cold-sql/src/connector.rs @@ -1,6 +1,9 @@ //! SQL cold storage connector. -use crate::{SqlColdBackend, SqlColdError}; +use crate::{ + SqlColdBackend, SqlColdError, + backend::{DEFAULT_READ_TIMEOUT, DEFAULT_WRITE_TIMEOUT}, +}; use signet_cold::ColdConnect; use sqlx::pool::PoolOptions; use std::time::Duration; @@ -48,6 +51,8 @@ pub enum SqlConnectorError { pub struct SqlConnector { url: String, pool_opts: PoolOptions, + read_timeout: Duration, + write_timeout: Duration, } #[cfg(any(feature = "sqlite", feature = "postgres"))] @@ -56,7 +61,12 @@ impl SqlConnector { /// /// The database type is detected from the URL prefix. pub fn new(url: impl Into) -> Self { - Self { url: url.into(), pool_opts: PoolOptions::new() } + Self { + url: url.into(), + pool_opts: PoolOptions::new(), + read_timeout: DEFAULT_READ_TIMEOUT, + write_timeout: DEFAULT_WRITE_TIMEOUT, + } } /// Get a reference to the connection URL. @@ -105,6 +115,39 @@ impl SqlConnector { self } + /// Set the per-transaction read timeout (default 500 ms). + /// + /// On Postgres this is applied via `SET LOCAL statement_timeout` + /// at the start of every read transaction. On SQLite the value is + /// stored but not enforced. + /// + /// # Panics + /// + /// Panics if `d` rounds to 0 ms (Postgres treats `0` as "no + /// timeout", which would silently disable the trait contract). + #[must_use] + pub fn with_read_timeout(mut self, d: Duration) -> Self { + assert!(d.as_millis() >= 1, "read_timeout must be >= 1ms (got {d:?})"); + self.read_timeout = d; + self + } + + /// Set the per-transaction write timeout (default 2 s). + /// + /// On Postgres this is applied via `SET LOCAL statement_timeout` + /// at the start of every write transaction. On SQLite the value is + /// stored but not enforced. + /// + /// # Panics + /// + /// Panics if `d` rounds to 0 ms. See [`with_read_timeout`](Self::with_read_timeout). + #[must_use] + pub fn with_write_timeout(mut self, d: Duration) -> Self { + assert!(d.as_millis() >= 1, "write_timeout must be >= 1ms (got {d:?})"); + self.write_timeout = d; + self + } + /// Create a connector from environment variables. /// /// Reads the SQL URL from the specified environment variable. @@ -131,6 +174,11 @@ impl ColdConnect for SqlConnector { fn connect(&self) -> impl std::future::Future> + Send { let url = self.url.clone(); let pool_opts = self.pool_opts.clone(); - async move { SqlColdBackend::connect_with(&url, pool_opts).await } + let read_timeout = self.read_timeout; + let write_timeout = self.write_timeout; + async move { + let backend = SqlColdBackend::connect_with(&url, pool_opts).await?; + Ok(backend.with_read_timeout(read_timeout).with_write_timeout(write_timeout)) + } } } diff --git a/crates/cold-sql/src/error.rs b/crates/cold-sql/src/error.rs index 00e1633..6132995 100644 --- a/crates/cold-sql/src/error.rs +++ b/crates/cold-sql/src/error.rs @@ -1,19 +1,53 @@ //! Error types for cold SQL storage. +/// Postgres SQLSTATE for `query_canceled` — emitted by the server when +/// `statement_timeout` trips a query. +const PG_SQLSTATE_QUERY_CANCELED: &str = "57014"; + /// Errors that can occur in cold SQL storage operations. #[derive(Debug, thiserror::Error)] pub enum SqlColdError { + /// The query was cancelled by Postgres `statement_timeout` + /// (SQLSTATE 57014). + /// + /// The configured deadline (read or write timeout) is not threaded + /// through to this conversion — the variant is detected at the + /// `From` boundary, which has no method context. The + /// handle surfaces this as + /// [`signet_cold::ColdStorageError::DeadlineExceeded`], which is + /// what callers and metrics should match on. + #[error("postgres statement_timeout exceeded")] + Timeout, + /// A sqlx database error occurred. #[error("sqlx error: {0}")] - Sqlx(#[from] sqlx::Error), + Sqlx(sqlx::Error), /// A data conversion error occurred. #[error("conversion error: {0}")] Convert(String), } +impl From for SqlColdError { + fn from(e: sqlx::Error) -> Self { + if let sqlx::Error::Database(ref db) = e + && db.code().as_deref() == Some(PG_SQLSTATE_QUERY_CANCELED) + { + return Self::Timeout; + } + Self::Sqlx(e) + } +} + impl From for signet_cold::ColdStorageError { fn from(error: SqlColdError) -> Self { - Self::Backend(Box::new(error)) + match error { + // Duration unknown at this conversion boundary; surface as + // ZERO so callers can match on `DeadlineExceeded(_)` and + // metrics see the deadline error class. Threading the + // configured deadline is a separate refactor. + SqlColdError::Timeout => Self::DeadlineExceeded(std::time::Duration::ZERO), + other => Self::Backend(Box::new(other)), + } } } diff --git a/crates/cold-sql/src/lib.rs b/crates/cold-sql/src/lib.rs index f28e2ad..63d9844 100644 --- a/crates/cold-sql/src/lib.rs +++ b/crates/cold-sql/src/lib.rs @@ -1,6 +1,6 @@ //! SQL backend for cold storage. //! -//! This crate provides SQL-based implementations of the [`ColdStorage`] trait +//! This crate provides SQL-based implementations of the [`ColdStorageBackend`] trait //! for storing historical blockchain data in relational databases. All data is //! stored in fully decomposed SQL columns for rich queryability. //! @@ -18,7 +18,7 @@ //! - **`test-utils`**: Enables both backends and propagates //! `signet-cold/test-utils` for conformance testing. //! -//! [`ColdStorage`]: signet_cold::ColdStorage +//! [`ColdStorageBackend`]: signet_cold::ColdStorageBackend #![warn( missing_copy_implementations, diff --git a/crates/cold-sql/tests/statement_timeout.rs b/crates/cold-sql/tests/statement_timeout.rs new file mode 100644 index 0000000..0645591 --- /dev/null +++ b/crates/cold-sql/tests/statement_timeout.rs @@ -0,0 +1,55 @@ +//! Postgres `statement_timeout` is honoured by `begin_read` / `begin_write`. +//! +//! Gated on the `postgres` feature. Skipped at runtime if +//! `DATABASE_URL` is not set (mirrors the existing `pg_conformance` +//! test convention). + +#![cfg(all(feature = "postgres", feature = "test-utils"))] + +use signet_cold_sql::{SqlColdBackend, SqlColdError}; +use std::time::Duration; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn pg_statement_timeout_trips() { + let Ok(url) = std::env::var("DATABASE_URL") else { + eprintln!("skipping pg_statement_timeout_trips: DATABASE_URL not set"); + return; + }; + + let backend = + SqlColdBackend::connect(&url).await.unwrap().with_read_timeout(Duration::from_millis(50)); + + // pg_sleep(1s) must trip the 50 ms statement_timeout and surface + // as the dedicated `Timeout` variant (SQLSTATE 57014), not as a + // generic `Sqlx` wrap. A future refactor that drops 57014 + // detection should fail this test. + let err = backend + .debug_pg_sleep(Duration::from_secs(1)) + .await + .expect_err("pg_sleep should trip statement_timeout"); + + assert!(matches!(err, SqlColdError::Timeout), "expected SqlColdError::Timeout, got: {err}"); +} + +/// At the handle boundary, the same condition must surface as +/// [`signet_cold::ColdStorageError::DeadlineExceeded`] rather than +/// `Backend(...)`, so MDBX and SQL deadline expiry classify +/// symmetrically and `cold.op_errors_total{error="deadline"}` covers +/// both backends. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn pg_statement_timeout_maps_to_deadline_exceeded() { + let Ok(url) = std::env::var("DATABASE_URL") else { + eprintln!("skipping pg_statement_timeout_maps_to_deadline_exceeded: DATABASE_URL not set"); + return; + }; + + let backend = + SqlColdBackend::connect(&url).await.unwrap().with_read_timeout(Duration::from_millis(50)); + + let sql_err = backend.debug_pg_sleep(Duration::from_secs(1)).await.unwrap_err(); + let cold_err: signet_cold::ColdStorageError = sql_err.into(); + assert!( + matches!(cold_err, signet_cold::ColdStorageError::DeadlineExceeded(_)), + "expected DeadlineExceeded, got: {cold_err}" + ); +} diff --git a/crates/cold/Cargo.toml b/crates/cold/Cargo.toml index d9c5779..35b6aa6 100644 --- a/crates/cold/Cargo.toml +++ b/crates/cold/Cargo.toml @@ -18,6 +18,8 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] alloy.workspace = true lru.workspace = true +metrics.workspace = true +parking_lot.workspace = true signet-storage-types.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/crates/cold/README.md b/crates/cold/README.md index 578ac66..73d2b44 100644 --- a/crates/cold/README.md +++ b/crates/cold/README.md @@ -11,7 +11,7 @@ handling. The cold storage engine uses a task-based architecture with separate channels for reads and writes: -- `ColdStorage` trait defines the backend interface +- `ColdStorageBackend` trait defines the backend interface - `ColdStorageTask` processes requests from channels - `ColdStorageHandle` provides full read/write access - `ColdStorageReadHandle` provides read-only access @@ -45,8 +45,9 @@ always authoritative. - **Normal operation**: Writes are dispatched asynchronously. Cold may be a few blocks behind hot. -- **Backpressure**: If cold cannot keep up, dispatch returns - `ColdStorageError::Backpressure`. +- **Deadline exceeded**: Reads that overrun their configured timeout return + `ColdStorageError::DeadlineExceeded`; streams return + `ColdStorageError::StreamDeadlineExceeded`. - **Task failure**: If the task stops, dispatch returns `ColdStorageError::TaskTerminated`. diff --git a/crates/cold/src/task/cache.rs b/crates/cold/src/cache.rs similarity index 100% rename from crates/cold/src/task/cache.rs rename to crates/cold/src/cache.rs diff --git a/crates/cold/src/conformance.rs b/crates/cold/src/conformance.rs index d284c2a..72f13d2 100644 --- a/crates/cold/src/conformance.rs +++ b/crates/cold/src/conformance.rs @@ -1,12 +1,12 @@ -//! Conformance tests for ColdStorage backends. +//! Conformance tests for ColdStorageBackend backends. //! //! These tests verify that any backend implementation behaves correctly -//! according to the ColdStorage trait contract. To use these tests with +//! according to the ColdStorageBackend trait contract. To use these tests with //! a custom backend, call the test functions with your backend instance. use crate::{ - BlockData, ColdResult, ColdStorage, ColdStorageError, ColdStorageHandle, ColdStorageTask, - Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, TransactionSpecifier, + BlockData, ColdResult, ColdStorage, ColdStorageBackend, ColdStorageError, Filter, + HeaderSpecifier, ReceiptSpecifier, RpcLog, TransactionSpecifier, }; use alloy::{ consensus::{ @@ -24,9 +24,9 @@ use tokio_util::sync::CancellationToken; /// Run all conformance tests against a backend. /// /// This is the main entry point for testing a custom backend implementation. -pub async fn conformance(backend: B) -> ColdResult<()> { +pub async fn conformance(backend: B) -> ColdResult<()> { let cancel = CancellationToken::new(); - let handle = ColdStorageTask::spawn(backend, cancel.clone()); + let handle = ColdStorage::new(backend, cancel.clone()); test_empty_storage(&handle).await?; test_append_and_read_header(&handle).await?; test_header_hash_lookup(&handle).await?; @@ -97,7 +97,7 @@ fn make_test_block_with_txs(block_number: BlockNumber, tx_count: usize) -> Block } /// Test that empty storage returns None/empty for all lookups. -pub async fn test_empty_storage(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_empty_storage(handle: &ColdStorage) -> ColdResult<()> { assert!(handle.get_header(HeaderSpecifier::Number(0)).await?.is_none()); assert!(handle.get_header(HeaderSpecifier::Hash(B256::ZERO)).await?.is_none()); assert!(handle.get_latest_block().await?.is_none()); @@ -108,7 +108,9 @@ pub async fn test_empty_storage(handle: &ColdStorageHandle) -> ColdResult<()> { } /// Test basic append and read for headers. -pub async fn test_append_and_read_header(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_append_and_read_header( + handle: &ColdStorage, +) -> ColdResult<()> { let block_data = make_test_block(100); let expected_header = block_data.header.clone(); @@ -121,7 +123,9 @@ pub async fn test_append_and_read_header(handle: &ColdStorageHandle) -> ColdResu } /// Test header lookup by hash. -pub async fn test_header_hash_lookup(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_header_hash_lookup( + handle: &ColdStorage, +) -> ColdResult<()> { let block_data = make_test_block(101); let header_hash = block_data.header.hash(); @@ -138,7 +142,9 @@ pub async fn test_header_hash_lookup(handle: &ColdStorageHandle) -> ColdResult<( } /// Test transaction lookups by hash and by block+index. -pub async fn test_transaction_lookups(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_transaction_lookups( + handle: &ColdStorage, +) -> ColdResult<()> { let block_data = make_test_block(200); handle.append_block(block_data).await?; @@ -151,7 +157,9 @@ pub async fn test_transaction_lookups(handle: &ColdStorageHandle) -> ColdResult< } /// Test receipt lookups. -pub async fn test_receipt_lookups(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_receipt_lookups( + handle: &ColdStorage, +) -> ColdResult<()> { let block_data = make_test_block(201); handle.append_block(block_data).await?; @@ -163,7 +171,9 @@ pub async fn test_receipt_lookups(handle: &ColdStorageHandle) -> ColdResult<()> } /// Test that transaction and receipt lookups return correct metadata. -pub async fn test_confirmation_metadata(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_confirmation_metadata( + handle: &ColdStorage, +) -> ColdResult<()> { let block = make_test_block_with_txs(600, 3); let expected_hash = block.header.hash(); let tx_hashes: Vec<_> = block.transactions.iter().map(|tx| *tx.tx_hash()).collect(); @@ -230,7 +240,7 @@ pub async fn test_confirmation_metadata(handle: &ColdStorageHandle) -> ColdResul } /// Test truncation removes data correctly. -pub async fn test_truncation(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_truncation(handle: &ColdStorage) -> ColdResult<()> { // Append blocks 300, 301, 302 handle.append_block(make_test_block(300)).await?; handle.append_block(make_test_block(301)).await?; @@ -253,7 +263,7 @@ pub async fn test_truncation(handle: &ColdStorageHandle) -> ColdResult<()> { } /// Test batch append. -pub async fn test_batch_append(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_batch_append(handle: &ColdStorage) -> ColdResult<()> { let blocks = vec![make_test_block(400), make_test_block(401), make_test_block(402)]; handle.append_blocks(blocks).await?; @@ -267,7 +277,9 @@ pub async fn test_batch_append(handle: &ColdStorageHandle) -> ColdResult<()> { /// Test ColdReceipt metadata: gas_used, first_log_index, tx_hash, /// block_hash, block_number, transaction_index, from. -pub async fn test_cold_receipt_metadata(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_cold_receipt_metadata( + handle: &ColdStorage, +) -> ColdResult<()> { // Block with 3 receipts having 2, 3, and 1 logs respectively. let header = Header { number: 700, ..Default::default() }; let sealed = header.seal_slow(); @@ -376,7 +388,7 @@ fn make_test_block_with_receipts(block_number: BlockNumber, receipts: Vec ColdResult<()> { +pub async fn test_get_logs(handle: &ColdStorage) -> ColdResult<()> { let addr_a = Address::with_last_byte(0xAA); let addr_b = Address::with_last_byte(0xBB); let topic0_transfer = B256::with_last_byte(0x01); @@ -535,9 +547,8 @@ async fn collect_stream(mut stream: crate::LogStream) -> ColdResult> /// combinations from the existing test_get_logs suite. /// /// Uses block numbers 850-851 to avoid collisions with test_get_logs data -/// (800-801). Streaming is tested via [`ColdStorageHandle`] since the -/// streaming loop lives in [`ColdStorageTask`]. -pub async fn test_stream_logs(handle: &ColdStorageHandle) -> ColdResult<()> { +/// (800-801). Streaming is tested via [`ColdStorage`]. +pub async fn test_stream_logs(handle: &ColdStorage) -> ColdResult<()> { let addr_a = Address::with_last_byte(0xAA); let addr_b = Address::with_last_byte(0xBB); let topic0_transfer = B256::with_last_byte(0x01); @@ -632,7 +643,7 @@ pub async fn test_stream_logs(handle: &ColdStorageHandle) -> ColdResult<()> { /// `first_log_index`, `gas_used`, `tx_hash`, `from`, and block metadata — /// the same fields tested by [`test_cold_receipt_metadata`] for the /// single-receipt path. -pub async fn test_drain_above(handle: &ColdStorageHandle) -> ColdResult<()> { +pub async fn test_drain_above(handle: &ColdStorage) -> ColdResult<()> { // Block 900: anchor (not drained). handle.append_block(make_test_block(900)).await?; diff --git a/crates/cold/src/connect.rs b/crates/cold/src/connect.rs index 7ee2a05..eef20f6 100644 --- a/crates/cold/src/connect.rs +++ b/crates/cold/src/connect.rs @@ -1,6 +1,6 @@ //! Connection traits for cold storage backends. -use crate::ColdStorage; +use crate::ColdStorageBackend; /// Connector trait for cold storage backends. /// @@ -8,7 +8,7 @@ use crate::ColdStorage; /// different backends to implement their own initialization logic. pub trait ColdConnect { /// The cold storage type produced by this connector. - type Cold: ColdStorage; + type Cold: ColdStorageBackend; /// The error type returned by connection attempts. type Error: std::error::Error + Send + Sync + 'static; diff --git a/crates/cold/src/error.rs b/crates/cold/src/error.rs index c92fc7c..126c1bf 100644 --- a/crates/cold/src/error.rs +++ b/crates/cold/src/error.rs @@ -1,5 +1,7 @@ //! Error types for cold storage operations. +use std::time::Duration; + /// Result type alias for cold storage operations. pub type ColdResult = Result; @@ -14,22 +16,6 @@ pub enum ColdStorageError { #[error("Not found: {0}")] NotFound(String), - /// The storage task was cancelled. - #[error("Task cancelled")] - Cancelled, - - /// The cold storage task cannot keep up with incoming requests. - /// - /// The channel is full, indicating transient backpressure. The cold storage - /// task is still running but processing requests slower than they arrive. - /// - /// Callers may: - /// - Accept the gap (hot storage is authoritative) and repair later - /// - Retry after a delay with exponential backoff - /// - Increase channel capacity at construction time - #[error("cold storage backpressure: channel full")] - Backpressure, - /// The query matched more logs than the caller-specified `max_logs` limit. /// /// The query is rejected entirely rather than returning a partial result. @@ -41,6 +27,14 @@ pub enum ColdStorageError { limit: usize, }, + /// A non-streaming read exceeded its configured deadline. + /// + /// Backends with native timeout support (iterator reads, pooled + /// clients) surface this as a first-class variant so callers can + /// match on it without downcasting [`Self::Backend`]. + #[error("read deadline exceeded after {0:?}")] + DeadlineExceeded(Duration), + /// The streaming operation exceeded its deadline. /// /// The backend enforces a wall-clock limit on streaming operations @@ -59,13 +53,9 @@ pub enum ColdStorageError { /// The cold storage task has terminated. /// - /// The channel is closed because the task has stopped (panic, cancellation, - /// or shutdown). This is persistent: all subsequent dispatches will fail - /// until the task is restarted. - /// - /// Hot storage remains consistent and authoritative. Cold storage can be - /// replayed from hot storage after task recovery. - #[error("cold storage task terminated: channel closed")] + /// A concurrency semaphore was closed, indicating shutdown. Reads and + /// writes cannot be dispatched until the handle is recreated. + #[error("cold storage task terminated: semaphore closed")] TaskTerminated, } @@ -77,4 +67,18 @@ impl ColdStorageError { { Self::Backend(Box::new(error)) } + + /// Short, stable label identifying the error variant. Used as a metric + /// label. + pub(crate) const fn kind(&self) -> &'static str { + match self { + Self::Backend(_) => "backend", + Self::NotFound(_) => "not_found", + Self::TooManyLogs { .. } => "too_many_logs", + Self::DeadlineExceeded(_) => "deadline", + Self::StreamDeadlineExceeded => "stream_deadline", + Self::ReorgDetected => "reorg", + Self::TaskTerminated => "task_terminated", + } + } } diff --git a/crates/cold/src/handle.rs b/crates/cold/src/handle.rs new file mode 100644 index 0000000..6547b05 --- /dev/null +++ b/crates/cold/src/handle.rs @@ -0,0 +1,694 @@ +//! Unified cold-storage handle. +//! +//! A single [`ColdStorage`] type wraps an `Arc>` and provides all +//! read, write, and streaming operations. Concurrency is enforced via +//! semaphores rather than dedicated reader/writer tasks. +//! +//! # Concurrency +//! +//! - Reads are gated by a `read_sem` (up to 64 in flight). +//! - Writes are gated by a `write_sem` (serialized: 1 in flight) and +//! additionally acquire all `read_sem` permits as a drain barrier, so no +//! read is in flight while a write commits. +//! - Log streams are gated by a `stream_sem` (up to 8 in flight) and do not +//! participate in the drain barrier. + +use crate::{ + BlockData, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageError, Confirmed, Filter, + HeaderSpecifier, LogStream, ReceiptSpecifier, RpcLog, SignetEventsSpecifier, StreamParams, + TransactionSpecifier, ZenithHeaderSpecifier, cache::ColdCache, metrics, +}; +use alloy::primitives::{B256, BlockNumber}; +use parking_lot::Mutex; +use signet_storage_types::{DbSignetEvent, DbZenithHeader, RecoveredTx, SealedHeader}; +use std::{ + sync::{Arc, Weak}, + time::Duration, +}; +use tokio::{ + sync::{Semaphore, mpsc}, + time::Instant, +}; +use tokio_stream::wrappers::ReceiverStream; +use tokio_util::{ + sync::{CancellationToken, DropGuard}, + task::TaskTracker, +}; +use tracing::Instrument; + +/// Default maximum deadline for streaming operations. +const DEFAULT_MAX_STREAM_DEADLINE: Duration = Duration::from_secs(60); + +/// Default fallback for the stream-setup `get_latest_block` deadline +/// when the backend does not advertise a [`read_timeout`]. Picked to +/// match the SQL/MDBX defaults so behaviour is predictable. +/// +/// [`read_timeout`]: crate::ColdStorageBackend::read_timeout +const DEFAULT_STREAM_SETUP_TIMEOUT: Duration = Duration::from_millis(500); + +/// Emit an advisory WARN if a successful write exceeded its end-to-end +/// SLO target. Only fires on `Ok`: a failed write already surfaces an +/// error to the caller, and a noisy overrun WARN on top would poison +/// alerting built on this signal. +fn warn_on_write_overrun( + op: &'static str, + elapsed: Duration, + threshold: Option, + is_ok: bool, +) { + let Some(threshold) = threshold else { return }; + if is_ok && elapsed > threshold { + tracing::warn!( + op, + elapsed_ms = elapsed.as_millis() as u64, + threshold_ms = threshold.as_millis() as u64, + "cold write exceeded end-to-end write timeout (queue + drain + commit)", + ); + } +} + +/// Log a `JoinError` from a tracked spawn before mapping to +/// [`ColdStorageError::TaskTerminated`]. A panic inside the spawned body +/// is otherwise indistinguishable from graceful shutdown for the +/// caller, which is a poor on-call signal. +fn log_join_error(op: &'static str, e: &tokio::task::JoinError) { + if e.is_panic() { + tracing::error!(op, error = %e, "cold storage spawned task panicked"); + } else if e.is_cancelled() { + tracing::debug!(op, "cold storage spawned task cancelled"); + } +} + +/// Maximum concurrent read operations. +const MAX_CONCURRENT_READERS: usize = 64; + +/// Maximum concurrent write operations. +const MAX_CONCURRENT_WRITES: usize = 1; + +/// Maximum concurrent streaming operations. +const MAX_CONCURRENT_STREAMS: usize = 8; + +/// Channel buffer size for streaming operations. +const STREAM_CHANNEL_BUFFER: usize = 256; + +/// Shared inner state for [`ColdStorage`]. +pub(crate) struct Inner { + pub(crate) backend: B, + pub(crate) cache: Mutex, + pub(crate) max_stream_deadline: Duration, + pub(crate) read_sem: Arc, + pub(crate) write_sem: Arc, + pub(crate) stream_sem: Arc, + pub(crate) tracker: TaskTracker, + /// Fires `shutdown` (a child of the user's cancel token) when `Inner` + /// drops, waking the coordinator task so it exits without waiting on + /// user-side cancel. + _shutdown_guard: DropGuard, +} + +/// Unified handle for interacting with a cold storage backend. +/// +/// `ColdStorage` is cheap to [`Clone`] — it is just an `Arc` around the +/// shared inner state. All operations dispatch through semaphore-gated +/// [`TaskTracker`]-spawned tasks. +pub struct ColdStorage { + inner: Arc>, +} + +impl Clone for ColdStorage { + fn clone(&self) -> Self { + Self { inner: Arc::clone(&self.inner) } + } +} + +impl std::fmt::Debug for ColdStorage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ColdStorage").finish_non_exhaustive() + } +} + +impl ColdStorage { + /// Create a new cold storage handle wrapping `backend`. + /// + /// A hidden coordinator task watches `cancel` and, on fire, closes the + /// read/write/stream semaphores and the task tracker. After cancel, + /// all handle methods fail fast with [`ColdStorageError::TaskTerminated`] + /// on permit acquisition; in-flight spawned tasks drain to completion + /// bounded by backend timeouts. + pub fn new(backend: B, cancel: CancellationToken) -> Self { + // `shutdown` fires on user-side cancel (via the parent) OR when + // `Inner` drops (via the DropGuard). The coordinator holds only a + // `Weak` so it never pins the backend. + let shutdown = cancel.child_token(); + let shutdown_guard = shutdown.clone().drop_guard(); + let inner = Arc::new(Inner { + backend, + cache: Mutex::new(ColdCache::new()), + max_stream_deadline: DEFAULT_MAX_STREAM_DEADLINE, + read_sem: Arc::new(Semaphore::new(MAX_CONCURRENT_READERS)), + write_sem: Arc::new(Semaphore::new(MAX_CONCURRENT_WRITES)), + stream_sem: Arc::new(Semaphore::new(MAX_CONCURRENT_STREAMS)), + tracker: TaskTracker::new(), + _shutdown_guard: shutdown_guard, + }); + let weak: Weak> = Arc::downgrade(&inner); + // Shutdown coordinator: must NOT be tracked by `tracker`, otherwise + // `tracker.wait()` would deadlock waiting on this task. Holds only a + // `Weak` ref so `Inner` (and the backend) can drop when all handles + // and in-flight tasks are gone. + tokio::spawn(async move { + shutdown.cancelled().await; + let Some(inner) = weak.upgrade() else { return }; + inner.read_sem.close(); + inner.write_sem.close(); + inner.stream_sem.close(); + inner.tracker.close(); + }); + Self { inner } + } + + /// Close the task tracker and wait for all in-flight tasks to finish. + /// + /// Idempotent with the shutdown coordinator: safe to call whether or not + /// the cancel token has fired. + pub async fn wait_shutdown(&self) { + self.inner.tracker.close(); + self.inner.tracker.wait().await; + } + + /// Spawn a read task under the `read_sem` permit. + async fn spawn_read(&self, op: &'static str, f: F) -> ColdResult + where + T: Send + 'static, + F: FnOnce(Arc>) -> Fut + Send + 'static, + Fut: std::future::Future> + Send, + { + let wait = Instant::now(); + let permit = self + .inner + .read_sem + .clone() + .acquire_owned() + .await + .map_err(|_| ColdStorageError::TaskTerminated)?; + metrics::record_permit_wait("read", wait.elapsed()); + let inner = Arc::clone(&self.inner); + self.inner + .tracker + .spawn( + async move { + let _p = permit; + let _guard = metrics::InFlightGuard::new("read"); + let start = Instant::now(); + let result = f(inner).await; + metrics::record_op_duration(op, start.elapsed()); + if let Err(ref e) = result { + metrics::record_op_error(op, e.kind()); + } + result + } + .in_current_span(), + ) + .await + .map_err(|e| { + log_join_error(op, &e); + ColdStorageError::TaskTerminated + })? + } + + /// Spawn a write task under the `write_sem` permit, holding a full drain + /// on `read_sem` for the duration of the write. + /// + /// Acquisition order is: `write_sem` first, then all `MAX_CONCURRENT_READERS` + /// read permits. This ensures concurrent writers queue on `write_sem` so + /// only one writer at a time waits on the drain, preventing starvation of + /// the read pool. + /// + /// Drop order inside the spawned task releases the drain before the write + /// permit, so readers regain access immediately once the write completes. + async fn spawn_write(&self, op: &'static str, f: F) -> ColdResult + where + T: Send + 'static, + F: FnOnce(Arc>) -> Fut + Send + 'static, + Fut: std::future::Future> + Send, + { + // End-to-end SLO start: capture before permit acquisition so the + // measurement covers `write_sem` queueing and the read drain in + // addition to the backend commit. This is the failure shape the + // PR targets — a slow drain followed by a fast commit must surface + // as an SLO violation, not as a sub-threshold backend timing. + let e2e_start = Instant::now(); + let threshold = self.inner.backend.write_timeout(); + + let write_permit = self + .inner + .write_sem + .clone() + .acquire_owned() + .await + .map_err(|_| ColdStorageError::TaskTerminated)?; + metrics::record_permit_wait("write", e2e_start.elapsed()); + + let drain_wait = Instant::now(); + let drain = self + .inner + .read_sem + .clone() + .acquire_many_owned(MAX_CONCURRENT_READERS as u32) + .await + .map_err(|_| ColdStorageError::TaskTerminated)?; + metrics::record_permit_wait("drain", drain_wait.elapsed()); + + let inner = Arc::clone(&self.inner); + self.inner + .tracker + .spawn( + async move { + let _w = write_permit; + let _d = drain; + let _guard = metrics::InFlightGuard::new("write"); + let start = Instant::now(); + let result = f(inner).await; + metrics::record_op_duration(op, start.elapsed()); + if let Err(ref e) = result { + metrics::record_op_error(op, e.kind()); + } + warn_on_write_overrun(op, e2e_start.elapsed(), threshold, result.is_ok()); + result + } + .in_current_span(), + ) + .await + .map_err(|e| { + log_join_error(op, &e); + ColdStorageError::TaskTerminated + })? + } + + // ========================================================================== + // Headers + // ========================================================================== + + /// Get a header by specifier. + #[tracing::instrument(skip(self, spec), fields(op = "get_header"))] + pub async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { + let op_start = Instant::now(); + if let HeaderSpecifier::Number(n) = &spec + && let Some(hit) = self.inner.cache.lock().get_header(n) + { + metrics::record_op_duration("get_header", op_start.elapsed()); + return Ok(Some(hit)); + } + self.spawn_read("get_header", move |inner| async move { + let result = inner.backend.get_header(spec).await; + if let Ok(Some(ref h)) = result { + inner.cache.lock().put_header(h.number, h.clone()); + } + result + }) + .await + } + + /// Get a header by block number. + pub async fn get_header_by_number( + &self, + block: BlockNumber, + ) -> ColdResult> { + self.get_header(HeaderSpecifier::Number(block)).await + } + + /// Get a header by block hash. + pub async fn get_header_by_hash(&self, hash: B256) -> ColdResult> { + self.get_header(HeaderSpecifier::Hash(hash)).await + } + + /// Get multiple headers by specifiers. + #[tracing::instrument(skip(self, specs), fields(op = "get_headers"))] + pub async fn get_headers( + &self, + specs: Vec, + ) -> ColdResult>> { + self.spawn_read("get_headers", move |inner| async move { + inner.backend.get_headers(specs).await + }) + .await + } + + // ========================================================================== + // Transactions + // ========================================================================== + + /// Get a transaction by specifier, with block confirmation metadata. + #[tracing::instrument(skip(self, spec), fields(op = "get_transaction"))] + pub async fn get_transaction( + &self, + spec: TransactionSpecifier, + ) -> ColdResult>> { + let op_start = Instant::now(); + if let TransactionSpecifier::BlockAndIndex { block, index } = &spec + && let Some(hit) = self.inner.cache.lock().get_tx(&(*block, *index)) + { + metrics::record_op_duration("get_transaction", op_start.elapsed()); + return Ok(Some(hit)); + } + self.spawn_read("get_transaction", move |inner| async move { + let result = inner.backend.get_transaction(spec).await; + if let Ok(Some(ref c)) = result { + let meta = c.meta(); + inner + .cache + .lock() + .put_tx((meta.block_number(), meta.transaction_index()), c.clone()); + } + result + }) + .await + } + + /// Get a transaction by hash. + pub async fn get_tx_by_hash(&self, hash: B256) -> ColdResult>> { + self.get_transaction(TransactionSpecifier::Hash(hash)).await + } + + /// Get a transaction by block number and index. + pub async fn get_tx_by_block_and_index( + &self, + block: BlockNumber, + index: u64, + ) -> ColdResult>> { + self.get_transaction(TransactionSpecifier::BlockAndIndex { block, index }).await + } + + /// Get a transaction by block hash and index. + pub async fn get_tx_by_block_hash_and_index( + &self, + block_hash: B256, + index: u64, + ) -> ColdResult>> { + self.get_transaction(TransactionSpecifier::BlockHashAndIndex { block_hash, index }).await + } + + /// Get all transactions in a block. + #[tracing::instrument(skip(self), fields(op = "get_transactions_in_block"))] + pub async fn get_transactions_in_block( + &self, + block: BlockNumber, + ) -> ColdResult> { + self.spawn_read("get_transactions_in_block", move |inner| async move { + inner.backend.get_transactions_in_block(block).await + }) + .await + } + + /// Get the transaction count for a block. + #[tracing::instrument(skip(self), fields(op = "get_transaction_count"))] + pub async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { + self.spawn_read("get_transaction_count", move |inner| async move { + inner.backend.get_transaction_count(block).await + }) + .await + } + + // ========================================================================== + // Receipts + // ========================================================================== + + /// Get a receipt by specifier. + #[tracing::instrument(skip(self, spec), fields(op = "get_receipt"))] + pub async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { + let op_start = Instant::now(); + if let ReceiptSpecifier::BlockAndIndex { block, index } = &spec + && let Some(hit) = self.inner.cache.lock().get_receipt(&(*block, *index)) + { + metrics::record_op_duration("get_receipt", op_start.elapsed()); + return Ok(Some(hit)); + } + self.spawn_read("get_receipt", move |inner| async move { + let result = inner.backend.get_receipt(spec).await; + if let Ok(Some(ref c)) = result { + inner.cache.lock().put_receipt((c.block_number, c.transaction_index), c.clone()); + } + result + }) + .await + } + + /// Get a receipt by transaction hash. + pub async fn get_receipt_by_tx_hash(&self, hash: B256) -> ColdResult> { + self.get_receipt(ReceiptSpecifier::TxHash(hash)).await + } + + /// Get a receipt by block number and index. + pub async fn get_receipt_by_block_and_index( + &self, + block: BlockNumber, + index: u64, + ) -> ColdResult> { + self.get_receipt(ReceiptSpecifier::BlockAndIndex { block, index }).await + } + + /// Get all receipts in a block. + #[tracing::instrument(skip(self), fields(op = "get_receipts_in_block"))] + pub async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { + self.spawn_read("get_receipts_in_block", move |inner| async move { + inner.backend.get_receipts_in_block(block).await + }) + .await + } + + // ========================================================================== + // SignetEvents + // ========================================================================== + + /// Get signet events by specifier. + #[tracing::instrument(skip(self, spec), fields(op = "get_signet_events"))] + pub async fn get_signet_events( + &self, + spec: SignetEventsSpecifier, + ) -> ColdResult> { + self.spawn_read("get_signet_events", move |inner| async move { + inner.backend.get_signet_events(spec).await + }) + .await + } + + /// Get signet events in a block. + pub async fn get_signet_events_in_block( + &self, + block: BlockNumber, + ) -> ColdResult> { + self.get_signet_events(SignetEventsSpecifier::Block(block)).await + } + + /// Get signet events in a range of blocks. + pub async fn get_signet_events_in_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ColdResult> { + self.get_signet_events(SignetEventsSpecifier::BlockRange { start, end }).await + } + + // ========================================================================== + // ZenithHeaders + // ========================================================================== + + /// Get a zenith header by block number. + pub async fn get_zenith_header( + &self, + block: BlockNumber, + ) -> ColdResult> { + self.get_zenith_header_by_spec(ZenithHeaderSpecifier::Number(block)).await + } + + /// Get a zenith header by specifier. + #[tracing::instrument(skip(self, spec), fields(op = "get_zenith_header_by_spec"))] + async fn get_zenith_header_by_spec( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + self.spawn_read("get_zenith_header_by_spec", move |inner| async move { + inner.backend.get_zenith_header(spec).await + }) + .await + } + + /// Get zenith headers by specifier. + #[tracing::instrument(skip(self, spec), fields(op = "get_zenith_headers"))] + pub async fn get_zenith_headers( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + self.spawn_read("get_zenith_headers", move |inner| async move { + inner.backend.get_zenith_headers(spec).await + }) + .await + } + + /// Get zenith headers in a range of blocks. + pub async fn get_zenith_headers_in_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ColdResult> { + self.get_zenith_headers(ZenithHeaderSpecifier::Range { start, end }).await + } + + // ========================================================================== + // Logs + // ========================================================================== + + /// Filter logs by block range, address, and topics. + /// + /// Follows `eth_getLogs` semantics. Returns matching logs ordered by + /// `(block_number, tx_index, log_index)`. + #[tracing::instrument(skip(self, filter), fields(op = "get_logs"))] + pub async fn get_logs(&self, filter: Filter, max_logs: usize) -> ColdResult> { + self.spawn_read("get_logs", move |inner| async move { + inner.backend.get_logs(&filter, max_logs).await + }) + .await + } + + /// Stream logs matching a filter. + /// + /// Returns a [`LogStream`] that yields matching logs in order. + /// Consume with `StreamExt::next()` until `None`. If the last item is + /// `Err(...)`, an error occurred (deadline, too many logs, reorg). + /// + /// The `deadline` is clamped to the handle's configured maximum. + #[tracing::instrument(skip(self, filter), fields(op = "stream_logs"))] + pub async fn stream_logs( + &self, + filter: Filter, + max_logs: usize, + deadline: Duration, + ) -> ColdResult { + let from = filter.get_from_block().unwrap_or(0); + // Resolve `to` BEFORE acquiring `stream_sem`. Holding the permit + // across setup I/O (especially when falling back to + // `get_latest_block`) lets a stuck backend pin all 8 permits and + // prevent any new stream from starting. Setup reads intentionally + // bypass `read_sem` and the drain barrier: a stream asking for + // "latest" should observe latest at setup time even alongside an + // in-flight write. + // + // Wrap the setup read in a wall-clock timeout so a stuck backend + // (cold MDBX page, saturated PG pool) cannot stall N concurrent + // setup callers indefinitely. The future drops on timeout but the + // backend work continues — same trade-off the rest of the design + // accepts. + let to = match filter.get_to_block() { + Some(to) => to, + None => { + let setup_to = + self.inner.backend.read_timeout().unwrap_or(DEFAULT_STREAM_SETUP_TIMEOUT); + let latest = tokio::time::timeout(setup_to, self.inner.backend.get_latest_block()) + .await + .map_err(|_| ColdStorageError::DeadlineExceeded(setup_to))??; + match latest { + Some(latest) => latest, + None => { + let (_tx, rx) = mpsc::channel(1); + return Ok(ReceiverStream::new(rx)); + } + } + } + }; + + let wait = Instant::now(); + let permit = self + .inner + .stream_sem + .clone() + .acquire_owned() + .await + .map_err(|_| ColdStorageError::TaskTerminated)?; + metrics::record_permit_wait("stream", wait.elapsed()); + + let effective = deadline.min(self.inner.max_stream_deadline); + let deadline_instant = Instant::now() + effective; + let (sender, rx) = mpsc::channel(STREAM_CHANNEL_BUFFER); + let inner = Arc::clone(&self.inner); + let started = Instant::now(); + self.inner.tracker.spawn( + async move { + let _p = permit; + let _guard = metrics::InFlightGuard::new("stream"); + let params = + StreamParams { from, to, max_logs, sender, deadline: deadline_instant }; + inner.backend.produce_log_stream(&filter, params).await; + metrics::record_stream_lifetime(started.elapsed()); + } + .in_current_span(), + ); + Ok(ReceiverStream::new(rx)) + } + + // ========================================================================== + // Metadata + // ========================================================================== + + /// Get the latest block number in storage. + #[tracing::instrument(skip(self), fields(op = "get_latest_block"))] + pub async fn get_latest_block(&self) -> ColdResult> { + self.spawn_read("get_latest_block", move |inner| async move { + inner.backend.get_latest_block().await + }) + .await + } + + // ========================================================================== + // Writes + // ========================================================================== + + /// Append a single block to cold storage. + #[tracing::instrument(skip(self, data), fields(op = "append_block"))] + pub async fn append_block(&self, data: BlockData) -> ColdResult<()> { + self.spawn_write("append_block", move |inner| async move { + inner.backend.append_block(data).await + }) + .await + } + + /// Append multiple blocks to cold storage. + #[tracing::instrument(skip(self, data), fields(op = "append_blocks"))] + pub async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + self.spawn_write("append_blocks", move |inner| async move { + inner.backend.append_blocks(data).await + }) + .await + } + + /// Truncate all data above the given block number. + /// + /// This removes block N+1 and higher from all tables and invalidates any + /// cached lookups above `block`. + #[tracing::instrument(skip(self), fields(op = "truncate_above"))] + pub async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { + self.spawn_write("truncate_above", move |inner| async move { + let result = inner.backend.truncate_above(block).await; + if result.is_ok() { + inner.cache.lock().invalidate_above(block); + } + result + }) + .await + } + + /// Read and remove all blocks above the given block number. + /// + /// Returns receipts for each block above `block` in ascending order, + /// then truncates. Index 0 = block+1, index 1 = block+2, etc. + #[tracing::instrument(skip(self), fields(op = "drain_above"))] + pub async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { + self.spawn_write("drain_above", move |inner| async move { + let result = inner.backend.drain_above(block).await; + if result.is_ok() { + inner.cache.lock().invalidate_above(block); + } + result + }) + .await + } +} diff --git a/crates/cold/src/lib.rs b/crates/cold/src/lib.rs index dd61d99..7a57891 100644 --- a/crates/cold/src/lib.rs +++ b/crates/cold/src/lib.rs @@ -11,25 +11,19 @@ //! //! # Architecture //! -//! The cold storage engine uses a task-based architecture: +//! The cold storage engine exposes a single handle type: //! -//! - [`ColdStorage`] trait defines the backend interface -//! - [`ColdStorageTask`] processes requests from channels -//! - [`ColdStorageHandle`] provides full read/write access -//! - [`ColdStorageReadHandle`] provides read-only access +//! - [`ColdStorageBackend`] trait defines the backend interface +//! - [`ColdStorage`] wraps a shared inner struct in an `Arc` and provides +//! read, write, and streaming operations. It is cheap to clone and share. //! -//! # Channel Separation +//! # Concurrency //! -//! Reads and writes use **separate channels**: +//! The handle uses semaphores rather than channels to bound in-flight work: //! -//! - **Read channel**: Shared between [`ColdStorageHandle`] and -//! [`ColdStorageReadHandle`]. Reads are processed concurrently (up to 64 in -//! flight). -//! - **Write channel**: Exclusive to [`ColdStorageHandle`]. Writes are -//! processed sequentially to maintain ordering. -//! -//! This design allows read-heavy workloads to proceed without being blocked by -//! write operations, while ensuring write ordering is preserved. +//! - **Reads**: gated by an internal semaphore (up to 64 in flight). +//! - **Writes**: gated by an internal semaphore (serialized: 1 in flight). +//! - **Streams**: gated by an internal semaphore (up to 8 in flight). //! //! # Consistency Model //! @@ -38,12 +32,10 @@ //! //! ## When Cold May Lag //! -//! - **Normal operation**: Writes are dispatched asynchronously. Cold may be a -//! few blocks behind hot during normal block processing. -//! - **Backpressure**: If cold storage cannot keep up, the write channel fills. -//! Dispatch methods return [`ColdStorageError::Backpressure`]. -//! - **Task termination**: If the cold storage task stops, writes cannot be -//! dispatched. Dispatch methods return [`ColdStorageError::TaskTerminated`]. +//! - **Normal operation**: Callers that do not await cold writes may see cold +//! storage lag hot by a few blocks during ingest. +//! - **Task termination**: If the handle's semaphores are closed, reads and +//! writes return [`ColdStorageError::TaskTerminated`]. //! //! ## When Cold May Have Stale Data //! @@ -65,16 +57,16 @@ //! //! ```ignore //! use tokio_util::sync::CancellationToken; -//! use signet_cold::{ColdStorageTask, mem::MemColdBackend}; +//! use signet_cold::{ColdStorage, mem::MemColdBackend}; //! //! let cancel = CancellationToken::new(); -//! let handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel); +//! let handle = ColdStorage::new(MemColdBackend::new(), cancel); //! //! // Use the handle to interact with cold storage //! let header = handle.get_header_by_number(100).await?; //! -//! // Get a read-only handle for query-only components -//! let reader = handle.reader(); +//! // Clone cheaply to share across tasks +//! let reader = handle.clone(); //! let tx = reader.get_tx_by_hash(hash).await?; //! ``` //! @@ -98,7 +90,7 @@ //! /// //! /// * `handle` - The cold storage handle to write through //! /// * `buffer_capacity` - Number of blocks to buffer before flushing -//! pub fn new(handle: &ColdStorageHandle, buffer_capacity: usize) -> Self; +//! pub fn new(handle: &ColdStorage, buffer_capacity: usize) -> Self; //! //! /// Push a block to the write buffer. //! /// @@ -126,7 +118,7 @@ //! # Feature Flags //! //! - **`in-memory`**: Enables the `mem` module, providing an in-memory -//! [`ColdStorage`] backend for testing. +//! [`ColdStorageBackend`] backend for testing. //! - **`test-utils`**: Enables the `conformance` module with backend //! conformance tests. Implies `in-memory`. @@ -142,10 +134,12 @@ #![deny(unused_must_use, rust_2018_idioms)] #![cfg_attr(docsrs, feature(doc_cfg))] +mod cache; mod error; +mod metrics; pub use error::{ColdResult, ColdStorageError}; -mod request; -pub use request::{AppendBlockRequest, ColdReadRequest, ColdWriteRequest, Responder}; +mod handle; +pub use handle::ColdStorage; mod specifier; pub use alloy::rpc::types::{Filter, Log as RpcLog}; pub use signet_storage_types::{Confirmed, Recovered}; @@ -159,15 +153,11 @@ pub use cold_receipt::ColdReceipt; mod stream; pub use stream::{StreamParams, produce_log_stream_default}; mod traits; -pub use traits::{BlockData, ColdStorage, ColdStorageRead, ColdStorageWrite, LogStream}; +pub use traits::{BlockData, ColdStorageBackend, ColdStorageRead, ColdStorageWrite, LogStream}; pub mod connect; pub use connect::ColdConnect; -/// Task module containing the storage task runner and handles. -pub mod task; -pub use task::{ColdStorageHandle, ColdStorageReadHandle, ColdStorageTask}; - /// Conformance tests for cold storage backends. #[cfg(any(test, feature = "test-utils"))] pub mod conformance; diff --git a/crates/cold/src/mem.rs b/crates/cold/src/mem.rs index e810c77..37d0a99 100644 --- a/crates/cold/src/mem.rs +++ b/crates/cold/src/mem.rs @@ -4,7 +4,7 @@ //! It is primarily intended for testing and development. use crate::{ - BlockData, ColdReceipt, ColdResult, ColdStorage, ColdStorageError, ColdStorageRead, + BlockData, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageError, ColdStorageRead, ColdStorageWrite, Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, }; @@ -47,6 +47,18 @@ struct MemColdBackendInner { /// /// This backend is thread-safe and suitable for concurrent access. /// All operations are protected by an async read-write lock. +/// +/// # Timeout exemption +/// +/// [`MemColdBackend`] does NOT honor the "Timeouts (mandatory)" clause of +/// [`ColdStorageBackend`]'s trait contract. All operations are pure +/// in-memory `BTreeMap`/`HashMap` work with no blocking I/O, so a +/// wall-clock deadline has no meaningful effect. This is a deliberate +/// exemption: the backend is intended for tests and development only, and +/// MUST NOT be used as the basis for SLO measurement or production +/// deployment. +/// +/// [`ColdStorageBackend`]: crate::ColdStorageBackend #[derive(Clone)] pub struct MemColdBackend { inner: Arc>, @@ -278,7 +290,7 @@ impl ColdStorageRead for MemColdBackend { } impl ColdStorageWrite for MemColdBackend { - async fn append_block(&mut self, data: BlockData) -> ColdResult<()> { + async fn append_block(&self, data: BlockData) -> ColdResult<()> { let mut inner = self.inner.write().await; let block = data.block_number(); @@ -326,22 +338,22 @@ impl ColdStorageWrite for MemColdBackend { Ok(()) } - async fn append_blocks(&mut self, data: Vec) -> ColdResult<()> { + async fn append_blocks(&self, data: Vec) -> ColdResult<()> { for block_data in data { self.append_block(block_data).await?; } Ok(()) } - async fn truncate_above(&mut self, block: BlockNumber) -> ColdResult<()> { + async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { let mut inner = self.inner.write().await; inner.truncate_above(block); Ok(()) } } -impl ColdStorage for MemColdBackend { - async fn drain_above(&mut self, block: BlockNumber) -> ColdResult>> { +impl ColdStorageBackend for MemColdBackend { + async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { let mut inner = self.inner.write().await; // Collect receipts for blocks above `block` in ascending order @@ -382,4 +394,12 @@ mod test { let backend = MemColdBackend::new(); conformance(backend).await.unwrap(); } + + #[tokio::test] + async fn write_trait_takes_self_shared_ref() { + // Compile-fence: proves ColdStorageWrite can be called on &self. + let backend = MemColdBackend::new(); + let r: &MemColdBackend = &backend; + let _ = r.truncate_above(0).await; + } } diff --git a/crates/cold/src/metrics.rs b/crates/cold/src/metrics.rs new file mode 100644 index 0000000..dccfb10 --- /dev/null +++ b/crates/cold/src/metrics.rs @@ -0,0 +1,137 @@ +//! Metrics for cold storage operations. +//! +//! Metric names, help strings, and helper recording functions are exposed as +//! `pub(crate)` items. Helpers force the shared [`LazyLock`] describe block on +//! first use, so descriptions are registered exactly once before any value is +//! recorded. + +use metrics::{counter, gauge, histogram}; +use std::{sync::LazyLock, time::Duration}; + +/// Gauge: number of read operations currently in flight against the backend. +pub(crate) const READS_IN_FLIGHT: &str = "cold.reads_in_flight"; +pub(crate) const READS_IN_FLIGHT_HELP: &str = + "Number of cold storage read operations currently in flight against the backend"; + +/// Gauge: number of write operations currently in flight against the backend. +pub(crate) const WRITES_IN_FLIGHT: &str = "cold.writes_in_flight"; +pub(crate) const WRITES_IN_FLIGHT_HELP: &str = + "Number of cold storage write operations currently in flight against the backend"; + +/// Gauge: number of log streams currently active. +pub(crate) const STREAMS_ACTIVE: &str = "cold.streams_active"; +pub(crate) const STREAMS_ACTIVE_HELP: &str = "Number of cold storage log streams currently active"; + +/// Histogram (microseconds): per-operation duration, labeled by `op`. +pub(crate) const OP_DURATION_US: &str = "cold.op_duration_us"; +pub(crate) const OP_DURATION_US_HELP: &str = + "Duration of cold storage operations in microseconds, labeled by op"; + +/// Histogram (microseconds): time spent waiting for a concurrency permit, +/// labeled by `sem`. +pub(crate) const PERMIT_WAIT_US: &str = "cold.permit_wait_us"; +pub(crate) const PERMIT_WAIT_US_HELP: &str = + "Time spent waiting for a cold storage concurrency permit in microseconds, labeled by sem"; + +/// Counter: cold storage operation errors, labeled by `op` and `error`. +pub(crate) const OP_ERRORS_TOTAL: &str = "cold.op_errors_total"; +pub(crate) const OP_ERRORS_TOTAL_HELP: &str = + "Number of cold storage operation errors, labeled by op and error kind"; + +/// Histogram (milliseconds): lifetime of a log stream from spawn to end. +pub(crate) const STREAM_LIFETIME_MS: &str = "cold.stream_lifetime_ms"; +pub(crate) const STREAM_LIFETIME_MS_HELP: &str = + "Lifetime of cold storage log streams in milliseconds"; + +static DESCRIBE: LazyLock<()> = LazyLock::new(|| { + metrics::describe_gauge!(READS_IN_FLIGHT, metrics::Unit::Count, READS_IN_FLIGHT_HELP); + metrics::describe_gauge!(WRITES_IN_FLIGHT, metrics::Unit::Count, WRITES_IN_FLIGHT_HELP); + metrics::describe_gauge!(STREAMS_ACTIVE, metrics::Unit::Count, STREAMS_ACTIVE_HELP); + metrics::describe_histogram!(OP_DURATION_US, metrics::Unit::Microseconds, OP_DURATION_US_HELP); + metrics::describe_histogram!(PERMIT_WAIT_US, metrics::Unit::Microseconds, PERMIT_WAIT_US_HELP); + metrics::describe_counter!(OP_ERRORS_TOTAL, metrics::Unit::Count, OP_ERRORS_TOTAL_HELP); + metrics::describe_histogram!( + STREAM_LIFETIME_MS, + metrics::Unit::Milliseconds, + STREAM_LIFETIME_MS_HELP + ); +}); + +/// Force the one-time describe block to run. +fn ensure_described() { + LazyLock::force(&DESCRIBE); +} + +/// Resolve the in-flight gauge for the given pool name. +fn in_flight_gauge(which: &'static str) -> metrics::Gauge { + match which { + "read" => gauge!(READS_IN_FLIGHT), + "write" => gauge!(WRITES_IN_FLIGHT), + "stream" => gauge!(STREAMS_ACTIVE), + _ => unreachable!("unknown in-flight pool: {which}"), + } +} + +/// Increment the in-flight gauge for a pool. `which` is one of +/// `"read"`, `"write"`, `"stream"`. +pub(crate) fn inc_in_flight(which: &'static str) { + ensure_described(); + in_flight_gauge(which).increment(1); +} + +/// Decrement the in-flight gauge for a pool. `which` is one of +/// `"read"`, `"write"`, `"stream"`. +pub(crate) fn dec_in_flight(which: &'static str) { + ensure_described(); + in_flight_gauge(which).decrement(1); +} + +/// Record an operation duration, in microseconds, labeled by `op`. +pub(crate) fn record_op_duration(op: &'static str, d: Duration) { + ensure_described(); + histogram!(OP_DURATION_US, "op" => op).record(d.as_micros() as f64); +} + +/// Record a permit-wait duration, in microseconds, labeled by `sem`. +/// `sem` is one of `"read"`, `"write"`, `"drain"`, `"stream"`. +pub(crate) fn record_permit_wait(sem: &'static str, d: Duration) { + ensure_described(); + histogram!(PERMIT_WAIT_US, "sem" => sem).record(d.as_micros() as f64); +} + +/// Record an operation error, labeled by `op` and `error` kind. +pub(crate) fn record_op_error(op: &'static str, error: &'static str) { + ensure_described(); + counter!(OP_ERRORS_TOTAL, "op" => op, "error" => error).increment(1); +} + +/// Record a stream lifetime, in milliseconds. +pub(crate) fn record_stream_lifetime(d: Duration) { + ensure_described(); + histogram!(STREAM_LIFETIME_MS).record(d.as_millis() as f64); +} + +/// RAII guard that increments the in-flight gauge for `pool` on construction +/// and decrements it on drop. +/// +/// Using a guard ensures the decrement always runs, even if the spawned task +/// body panics between start and end. Without this, a panic leaks the gauge +/// and corrupts any Prometheus alerting built on `cold.*_in_flight`. +pub(crate) struct InFlightGuard { + pool: &'static str, +} + +impl InFlightGuard { + /// Increment the gauge for `pool` and return a guard that decrements on + /// drop. `pool` is one of `"read"`, `"write"`, `"stream"`. + pub(crate) fn new(pool: &'static str) -> Self { + inc_in_flight(pool); + Self { pool } + } +} + +impl Drop for InFlightGuard { + fn drop(&mut self) { + dec_in_flight(self.pool); + } +} diff --git a/crates/cold/src/request.rs b/crates/cold/src/request.rs deleted file mode 100644 index 32ee971..0000000 --- a/crates/cold/src/request.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! Request and response types for the cold storage task. -//! -//! These types define the messages sent over channels to the cold storage task. -//! Reads and writes use separate channels with their own request types. - -use crate::{ - BlockData, ColdReceipt, ColdStorageError, Confirmed, Filter, HeaderSpecifier, LogStream, - ReceiptSpecifier, RpcLog, SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, -}; -use alloy::primitives::BlockNumber; -use signet_storage_types::{DbSignetEvent, DbZenithHeader, RecoveredTx, SealedHeader}; -use std::time::Duration; -use tokio::sync::oneshot; - -/// Response sender type alias that propagates Result types. -pub type Responder = oneshot::Sender>; - -/// Block append request data (wrapper struct). -#[derive(Debug)] -pub struct AppendBlockRequest { - /// The block data to append. - pub data: BlockData, - /// The response channel. - pub resp: Responder<()>, -} - -/// Read requests for cold storage. -/// -/// These requests are processed concurrently (up to 64 in flight). -#[derive(Debug)] -pub enum ColdReadRequest { - // --- Headers --- - /// Get a single header by specifier. - GetHeader { - /// The header specifier. - spec: HeaderSpecifier, - /// The response channel. - resp: Responder>, - }, - /// Get multiple headers by specifiers. - GetHeaders { - /// The header specifiers. - specs: Vec, - /// The response channel. - resp: Responder>>, - }, - - // --- Transactions --- - /// Get a single transaction by specifier. - GetTransaction { - /// The transaction specifier. - spec: TransactionSpecifier, - /// The response channel. - resp: Responder>>, - }, - /// Get all transactions in a block. - GetTransactionsInBlock { - /// The block number. - block: BlockNumber, - /// The response channel. - resp: Responder>, - }, - /// Get the transaction count for a block. - GetTransactionCount { - /// The block number. - block: BlockNumber, - /// The response channel. - resp: Responder, - }, - - // --- Receipts --- - /// Get a single receipt by specifier. - GetReceipt { - /// The receipt specifier. - spec: ReceiptSpecifier, - /// The response channel. - resp: Responder>, - }, - /// Get all receipts in a block. - GetReceiptsInBlock { - /// The block number. - block: BlockNumber, - /// The response channel. - resp: Responder>, - }, - - // --- SignetEvents --- - /// Get signet events by specifier. - GetSignetEvents { - /// The signet events specifier. - spec: SignetEventsSpecifier, - /// The response channel. - resp: Responder>, - }, - - // --- ZenithHeaders --- - /// Get a single zenith header by specifier. - GetZenithHeader { - /// The zenith header specifier. - spec: ZenithHeaderSpecifier, - /// The response channel. - resp: Responder>, - }, - /// Get multiple zenith headers by specifier. - GetZenithHeaders { - /// The zenith header specifier. - spec: ZenithHeaderSpecifier, - /// The response channel. - resp: Responder>, - }, - - // --- Logs --- - /// Filter logs by block range, address, and topics. - GetLogs { - /// The log filter. - filter: Box, - /// Maximum number of logs to return. - max_logs: usize, - /// The response channel. - resp: Responder>, - }, - /// Stream logs matching a filter. - StreamLogs { - /// The log filter. - filter: Box, - /// Maximum number of logs to stream. - max_logs: usize, - /// Requested stream deadline (clamped to the task's max). - deadline: Duration, - /// Response channel returning the log stream. - resp: Responder, - }, - - // --- Metadata --- - /// Get the latest block number. - GetLatestBlock { - /// The response channel. - resp: Responder>, - }, -} - -/// Write requests for cold storage. -/// -/// These requests are processed sequentially to maintain ordering. -#[derive(Debug)] -pub enum ColdWriteRequest { - /// Append a single block. - AppendBlock(Box), - /// Append multiple blocks. - AppendBlocks { - /// The block data to append. - data: Vec, - /// The response channel. - resp: Responder<()>, - }, - /// Truncate all data above the given block. - TruncateAbove { - /// The block number to truncate above. - block: BlockNumber, - /// The response channel. - resp: Responder<()>, - }, - /// Read receipts and truncate all data above the given block. - DrainAbove { - /// The block number to drain above. - block: BlockNumber, - /// The response channel. - resp: Responder>>, - }, -} diff --git a/crates/cold/src/task/handle.rs b/crates/cold/src/task/handle.rs deleted file mode 100644 index 57c2d38..0000000 --- a/crates/cold/src/task/handle.rs +++ /dev/null @@ -1,645 +0,0 @@ -//! Ergonomic handles for interacting with cold storage. -//! -//! This module provides two handle types: -//! -//! - [`ColdStorageHandle`]: Full access (reads and writes) -//! - [`ColdStorageReadHandle`]: Read-only access -//! -//! Both handles can be cloned and shared across tasks. They use separate -//! channels for reads and writes, allowing concurrent read processing while -//! maintaining sequential write ordering. - -use crate::{ - AppendBlockRequest, BlockData, ColdReadRequest, ColdReceipt, ColdResult, ColdStorageError, - ColdWriteRequest, Confirmed, Filter, HeaderSpecifier, LogStream, ReceiptSpecifier, RpcLog, - SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, -}; -use alloy::primitives::{B256, BlockNumber}; -use signet_storage_types::{DbSignetEvent, DbZenithHeader, RecoveredTx, SealedHeader}; -use std::time::Duration; -use tokio::sync::{mpsc, oneshot}; - -/// Map a [`mpsc::error::TrySendError`] to the appropriate -/// [`ColdStorageError`] variant. -fn map_dispatch_error(e: mpsc::error::TrySendError) -> ColdStorageError { - match e { - mpsc::error::TrySendError::Full(_) => ColdStorageError::Backpressure, - mpsc::error::TrySendError::Closed(_) => ColdStorageError::TaskTerminated, - } -} - -/// Read-only handle for interacting with the cold storage task. -/// -/// This handle provides read access only and cannot perform write operations. -/// It shares the read channel with [`ColdStorageHandle`], allowing multiple -/// readers to coexist without affecting write throughput. -/// -/// # Usage -/// -/// Obtain a read handle via [`ColdStorageHandle::reader`]: -/// -/// ```ignore -/// let handle = ColdStorageTask::spawn(backend, cancel); -/// let reader = handle.reader(); -/// -/// // Use reader for queries -/// let header = reader.get_header_by_number(100).await?; -/// ``` -/// -/// # Thread Safety -/// -/// This handle is `Clone + Send + Sync` and can be shared across tasks. -/// Multiple readers can query concurrently without blocking writes. -#[derive(Clone, Debug)] -pub struct ColdStorageReadHandle { - sender: mpsc::Sender, -} - -impl ColdStorageReadHandle { - /// Create a new read-only handle with the given sender. - pub(crate) const fn new(sender: mpsc::Sender) -> Self { - Self { sender } - } - - /// Send a read request and wait for the response. - async fn send( - &self, - req: ColdReadRequest, - rx: oneshot::Receiver>, - ) -> ColdResult { - self.sender.send(req).await.map_err(|_| ColdStorageError::Cancelled)?; - rx.await.map_err(|_| ColdStorageError::Cancelled)? - } - - // ========================================================================== - // Headers - // ========================================================================== - - /// Get a header by specifier. - pub async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetHeader { spec, resp }, rx).await - } - - /// Get a header by block number. - pub async fn get_header_by_number( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.get_header(HeaderSpecifier::Number(block)).await - } - - /// Get a header by block hash. - pub async fn get_header_by_hash(&self, hash: B256) -> ColdResult> { - self.get_header(HeaderSpecifier::Hash(hash)).await - } - - /// Get multiple headers by specifiers. - pub async fn get_headers( - &self, - specs: Vec, - ) -> ColdResult>> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetHeaders { specs, resp }, rx).await - } - - // ========================================================================== - // Transactions - // ========================================================================== - - /// Get a transaction by specifier, with block confirmation metadata. - pub async fn get_transaction( - &self, - spec: TransactionSpecifier, - ) -> ColdResult>> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetTransaction { spec, resp }, rx).await - } - - /// Get a transaction by hash. - pub async fn get_tx_by_hash(&self, hash: B256) -> ColdResult>> { - self.get_transaction(TransactionSpecifier::Hash(hash)).await - } - - /// Get a transaction by block number and index. - pub async fn get_tx_by_block_and_index( - &self, - block: BlockNumber, - index: u64, - ) -> ColdResult>> { - self.get_transaction(TransactionSpecifier::BlockAndIndex { block, index }).await - } - - /// Get a transaction by block hash and index. - pub async fn get_tx_by_block_hash_and_index( - &self, - block_hash: B256, - index: u64, - ) -> ColdResult>> { - self.get_transaction(TransactionSpecifier::BlockHashAndIndex { block_hash, index }).await - } - - /// Get all transactions in a block. - pub async fn get_transactions_in_block( - &self, - block: BlockNumber, - ) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetTransactionsInBlock { block, resp }, rx).await - } - - /// Get the transaction count for a block. - pub async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetTransactionCount { block, resp }, rx).await - } - - // ========================================================================== - // Receipts - // ========================================================================== - - /// Get a receipt by specifier. - pub async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetReceipt { spec, resp }, rx).await - } - - /// Get a receipt by transaction hash. - pub async fn get_receipt_by_tx_hash(&self, hash: B256) -> ColdResult> { - self.get_receipt(ReceiptSpecifier::TxHash(hash)).await - } - - /// Get a receipt by block number and index. - pub async fn get_receipt_by_block_and_index( - &self, - block: BlockNumber, - index: u64, - ) -> ColdResult> { - self.get_receipt(ReceiptSpecifier::BlockAndIndex { block, index }).await - } - - /// Get all receipts in a block. - pub async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetReceiptsInBlock { block, resp }, rx).await - } - - // ========================================================================== - // SignetEvents - // ========================================================================== - - /// Get signet events by specifier. - pub async fn get_signet_events( - &self, - spec: SignetEventsSpecifier, - ) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetSignetEvents { spec, resp }, rx).await - } - - /// Get signet events in a block. - pub async fn get_signet_events_in_block( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.get_signet_events(SignetEventsSpecifier::Block(block)).await - } - - /// Get signet events in a range of blocks. - pub async fn get_signet_events_in_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ColdResult> { - self.get_signet_events(SignetEventsSpecifier::BlockRange { start, end }).await - } - - // ========================================================================== - // ZenithHeaders - // ========================================================================== - - /// Get a zenith header by block number. - pub async fn get_zenith_header( - &self, - block: BlockNumber, - ) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send( - ColdReadRequest::GetZenithHeader { spec: ZenithHeaderSpecifier::Number(block), resp }, - rx, - ) - .await - } - - /// Get zenith headers by specifier. - pub async fn get_zenith_headers( - &self, - spec: ZenithHeaderSpecifier, - ) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetZenithHeaders { spec, resp }, rx).await - } - - /// Get zenith headers in a range of blocks. - pub async fn get_zenith_headers_in_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ColdResult> { - self.get_zenith_headers(ZenithHeaderSpecifier::Range { start, end }).await - } - - // ========================================================================== - // Logs - // ========================================================================== - - /// Filter logs by block range, address, and topics. - /// - /// Follows `eth_getLogs` semantics. Returns matching logs ordered by - /// `(block_number, tx_index, log_index)`. - /// - /// # Errors - /// - /// Returns [`ColdStorageError::TooManyLogs`] if the query would produce - /// more than `max_logs` results. - pub async fn get_logs(&self, filter: Filter, max_logs: usize) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetLogs { filter: Box::new(filter), max_logs, resp }, rx).await - } - - /// Stream logs matching a filter. - /// - /// Returns a [`LogStream`] that yields matching logs in order. - /// Consume with `StreamExt::next()` until `None`. If the last item - /// is `Err(...)`, an error occurred (deadline, too many logs, etc.). - /// - /// The `deadline` is clamped to the task's configured maximum. - /// - /// # Partial Delivery - /// - /// One or more `Ok(log)` items may be delivered before a terminal - /// `Err(...)`. Consumers must be prepared for partial results — for - /// example, a reorg or deadline expiry can interrupt a stream that - /// has already yielded some logs. - /// - /// # Resource Management - /// - /// The stream holds a backend concurrency permit. Dropping the - /// stream releases the permit. Drop early if results are no - /// longer needed. - pub async fn stream_logs( - &self, - filter: Filter, - max_logs: usize, - deadline: Duration, - ) -> ColdResult { - let (resp, rx) = oneshot::channel(); - self.send( - ColdReadRequest::StreamLogs { filter: Box::new(filter), max_logs, deadline, resp }, - rx, - ) - .await - } - - // ========================================================================== - // Metadata - // ========================================================================== - - /// Get the latest block number in storage. - pub async fn get_latest_block(&self) -> ColdResult> { - let (resp, rx) = oneshot::channel(); - self.send(ColdReadRequest::GetLatestBlock { resp }, rx).await - } -} - -/// Handle for interacting with the cold storage task. -/// -/// This handle provides full access to both read and write operations. -/// It can be cloned and shared across tasks. -/// -/// # Channel Separation -/// -/// Internally, this handle uses separate channels for reads and writes: -/// -/// - **Read channel**: Shared with [`ColdStorageReadHandle`]. Reads are -/// processed concurrently (up to 64 in flight). -/// - **Write channel**: Exclusive to this handle. Writes are processed -/// sequentially to maintain ordering. -/// -/// This design allows read-heavy workloads to proceed without being blocked -/// by write operations, while ensuring write ordering is preserved. -/// -/// # Read Access -/// -/// All read methods from [`ColdStorageReadHandle`] are also available -/// directly on this handle. -/// -/// # Usage -/// -/// ```ignore -/// let handle = ColdStorageTask::spawn(backend, cancel); -/// -/// // Full access: reads and writes -/// handle.append_block(data).await?; -/// let header = handle.get_header_by_number(100).await?; -/// -/// // Get a read-only handle for query-only use cases -/// let reader = handle.reader(); -/// ``` -/// -/// # Thread Safety -/// -/// This handle is `Clone + Send + Sync` and can be shared across tasks. -#[derive(Clone, Debug)] -pub struct ColdStorageHandle { - reader: ColdStorageReadHandle, - write_sender: mpsc::Sender, -} - -impl ColdStorageHandle { - /// Create a new handle with the given senders. - pub(crate) const fn new( - read_sender: mpsc::Sender, - write_sender: mpsc::Sender, - ) -> Self { - Self { reader: ColdStorageReadHandle::new(read_sender), write_sender } - } - - /// Get a read-only handle that shares the read channel. - /// - /// The returned handle can only perform read operations and cannot - /// modify storage. Multiple read handles can coexist and query - /// concurrently without affecting write throughput. - pub fn reader(&self) -> ColdStorageReadHandle { - self.reader.clone() - } - - /// Send a write request and wait for the response. - async fn send_write( - &self, - req: ColdWriteRequest, - rx: oneshot::Receiver>, - ) -> ColdResult { - self.write_sender.send(req).await.map_err(|_| ColdStorageError::Cancelled)?; - rx.await.map_err(|_| ColdStorageError::Cancelled)? - } - - // ========================================================================== - // Write Operations - // ========================================================================== - - /// Append a single block to cold storage. - pub async fn append_block(&self, data: BlockData) -> ColdResult<()> { - let (resp, rx) = oneshot::channel(); - self.send_write( - ColdWriteRequest::AppendBlock(Box::new(AppendBlockRequest { data, resp })), - rx, - ) - .await - } - - /// Append multiple blocks to cold storage. - pub async fn append_blocks(&self, data: Vec) -> ColdResult<()> { - let (resp, rx) = oneshot::channel(); - self.send_write(ColdWriteRequest::AppendBlocks { data, resp }, rx).await - } - - /// Truncate all data above the given block number. - /// - /// This removes block N+1 and higher from all tables. - pub async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { - let (resp, rx) = oneshot::channel(); - self.send_write(ColdWriteRequest::TruncateAbove { block, resp }, rx).await - } - - // ========================================================================== - // Synchronous Fire-and-Forget Dispatch - // ========================================================================== - - /// Dispatch append blocks without waiting for response (non-blocking). - /// - /// Unlike [`append_blocks`](Self::append_blocks), this method returns - /// immediately without waiting for the write to complete. The write - /// result is discarded. - /// - /// # Errors - /// - /// - [`ColdStorageError::Backpressure`]: Channel is full. The task is alive - /// but cannot keep up. Transient; may retry or accept the gap. - /// - [`ColdStorageError::TaskTerminated`]: Channel is closed. The task has - /// stopped and must be restarted. - /// - /// In both cases, hot storage already contains the data and remains - /// authoritative. - pub fn dispatch_append_blocks(&self, data: Vec) -> ColdResult<()> { - let (resp, _rx) = oneshot::channel(); - self.write_sender - .try_send(ColdWriteRequest::AppendBlocks { data, resp }) - .map_err(map_dispatch_error) - } - - /// Read and remove all blocks above the given block number. - /// - /// Returns receipts for each block above `block` in ascending order, - /// then truncates. Index 0 = block+1, index 1 = block+2, etc. - pub async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { - let (resp, rx) = oneshot::channel(); - self.send_write(ColdWriteRequest::DrainAbove { block, resp }, rx).await - } - - /// Dispatch truncate without waiting for response (non-blocking). - /// - /// Unlike [`truncate_above`](Self::truncate_above), this method returns - /// immediately without waiting for the truncate to complete. The result - /// is discarded. - /// - /// # Errors - /// - /// Same as [`dispatch_append_blocks`](Self::dispatch_append_blocks). If - /// cold storage falls behind during a reorg, it may temporarily contain - /// stale data until the truncate is processed or replayed. - pub fn dispatch_truncate_above(&self, block: BlockNumber) -> ColdResult<()> { - let (resp, _rx) = oneshot::channel(); - self.write_sender - .try_send(ColdWriteRequest::TruncateAbove { block, resp }) - .map_err(map_dispatch_error) - } - - // ========================================================================== - // Read Operations (delegated to ColdStorageReadHandle) - // ========================================================================== - - /// Get a header by specifier. - pub async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { - self.reader.get_header(spec).await - } - - /// Get a header by block number. - pub async fn get_header_by_number( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.reader.get_header_by_number(block).await - } - - /// Get a header by block hash. - pub async fn get_header_by_hash(&self, hash: B256) -> ColdResult> { - self.reader.get_header_by_hash(hash).await - } - - /// Get multiple headers by specifiers. - pub async fn get_headers( - &self, - specs: Vec, - ) -> ColdResult>> { - self.reader.get_headers(specs).await - } - - /// Get a transaction by specifier, with block confirmation metadata. - pub async fn get_transaction( - &self, - spec: TransactionSpecifier, - ) -> ColdResult>> { - self.reader.get_transaction(spec).await - } - - /// Get a transaction by hash. - pub async fn get_tx_by_hash(&self, hash: B256) -> ColdResult>> { - self.reader.get_tx_by_hash(hash).await - } - - /// Get a transaction by block number and index. - pub async fn get_tx_by_block_and_index( - &self, - block: BlockNumber, - index: u64, - ) -> ColdResult>> { - self.reader.get_tx_by_block_and_index(block, index).await - } - - /// Get a transaction by block hash and index. - pub async fn get_tx_by_block_hash_and_index( - &self, - block_hash: B256, - index: u64, - ) -> ColdResult>> { - self.reader.get_tx_by_block_hash_and_index(block_hash, index).await - } - - /// Get all transactions in a block. - pub async fn get_transactions_in_block( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.reader.get_transactions_in_block(block).await - } - - /// Get the transaction count for a block. - pub async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { - self.reader.get_transaction_count(block).await - } - - /// Get a receipt by specifier. - pub async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { - self.reader.get_receipt(spec).await - } - - /// Get a receipt by transaction hash. - pub async fn get_receipt_by_tx_hash(&self, hash: B256) -> ColdResult> { - self.reader.get_receipt_by_tx_hash(hash).await - } - - /// Get a receipt by block number and index. - pub async fn get_receipt_by_block_and_index( - &self, - block: BlockNumber, - index: u64, - ) -> ColdResult> { - self.reader.get_receipt_by_block_and_index(block, index).await - } - - /// Get all receipts in a block. - pub async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { - self.reader.get_receipts_in_block(block).await - } - - /// Get signet events by specifier. - pub async fn get_signet_events( - &self, - spec: SignetEventsSpecifier, - ) -> ColdResult> { - self.reader.get_signet_events(spec).await - } - - /// Get signet events in a block. - pub async fn get_signet_events_in_block( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.reader.get_signet_events_in_block(block).await - } - - /// Get signet events in a range of blocks. - pub async fn get_signet_events_in_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ColdResult> { - self.reader.get_signet_events_in_range(start, end).await - } - - /// Get a zenith header by block number. - pub async fn get_zenith_header( - &self, - block: BlockNumber, - ) -> ColdResult> { - self.reader.get_zenith_header(block).await - } - - /// Get zenith headers by specifier. - pub async fn get_zenith_headers( - &self, - spec: ZenithHeaderSpecifier, - ) -> ColdResult> { - self.reader.get_zenith_headers(spec).await - } - - /// Get zenith headers in a range of blocks. - pub async fn get_zenith_headers_in_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ColdResult> { - self.reader.get_zenith_headers_in_range(start, end).await - } - - /// Filter logs by block range, address, and topics. - /// - /// Follows `eth_getLogs` semantics. Returns matching logs ordered by - /// `(block_number, tx_index, log_index)`. - /// - /// # Errors - /// - /// Returns [`ColdStorageError::TooManyLogs`] if the query would produce - /// more than `max_logs` results. - pub async fn get_logs(&self, filter: Filter, max_logs: usize) -> ColdResult> { - self.reader.get_logs(filter, max_logs).await - } - - /// Stream logs matching a filter. - /// - /// See [`ColdStorageReadHandle::stream_logs`] for full documentation. - pub async fn stream_logs( - &self, - filter: Filter, - max_logs: usize, - deadline: Duration, - ) -> ColdResult { - self.reader.stream_logs(filter, max_logs, deadline).await - } - - /// Get the latest block number in storage. - pub async fn get_latest_block(&self) -> ColdResult> { - self.reader.get_latest_block().await - } -} diff --git a/crates/cold/src/task/mod.rs b/crates/cold/src/task/mod.rs deleted file mode 100644 index c2e6467..0000000 --- a/crates/cold/src/task/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Cold storage task and handles. -//! -//! This module provides the task-based architecture for cold storage: -//! -//! - [`ColdStorageTask`] processes requests from channels -//! - [`ColdStorageHandle`] provides full read/write access -//! - [`ColdStorageReadHandle`] provides read-only access - -mod cache; -mod handle; -pub use handle::{ColdStorageHandle, ColdStorageReadHandle}; -mod runner; -pub use runner::ColdStorageTask; diff --git a/crates/cold/src/task/runner.rs b/crates/cold/src/task/runner.rs deleted file mode 100644 index c247d90..0000000 --- a/crates/cold/src/task/runner.rs +++ /dev/null @@ -1,410 +0,0 @@ -//! Cold storage task runner. -//! -//! The [`ColdStorageTask`] processes requests from channels and dispatches -//! them to the storage backend. Reads and writes use separate channels: -//! -//! - **Reads**: Processed concurrently (up to 64 in flight) via spawned tasks. -//! In-flight reads are drained before each write. -//! - **Writes**: Processed sequentially (inline await) to maintain ordering. -//! - **Streams**: Log-streaming producers run independently, tracked for -//! graceful shutdown but not drained before writes. Backends must provide -//! their own read isolation (e.g. snapshot semantics). -//! -//! Transaction, receipt, and header lookups are served from an LRU cache, -//! avoiding repeated backend reads for frequently queried items. -//! -//! # Log Streaming -//! -//! The task owns the streaming configuration (max deadline, concurrency -//! limit) and delegates the streaming loop to the backend via -//! [`ColdStorageRead::produce_log_stream`]. Callers supply a per-request -//! deadline that is clamped to the task's configured maximum. - -use super::cache::ColdCache; -use crate::{ - ColdReadRequest, ColdReceipt, ColdResult, ColdStorage, ColdStorageError, ColdStorageHandle, - ColdStorageRead, ColdWriteRequest, Confirmed, HeaderSpecifier, LogStream, ReceiptSpecifier, - TransactionSpecifier, -}; -use signet_storage_types::{RecoveredTx, SealedHeader}; -use std::{sync::Arc, time::Duration}; -use tokio::sync::{Mutex, Semaphore, mpsc}; -use tokio_stream::wrappers::ReceiverStream; -use tokio_util::{sync::CancellationToken, task::TaskTracker}; -use tracing::{debug, instrument}; - -/// Default maximum deadline for streaming operations. -const DEFAULT_MAX_STREAM_DEADLINE: Duration = Duration::from_secs(60); - -/// Channel size for cold storage read requests. -const READ_CHANNEL_SIZE: usize = 256; - -/// Channel size for cold storage write requests. -const WRITE_CHANNEL_SIZE: usize = 256; - -/// Maximum concurrent read request handlers. -const MAX_CONCURRENT_READERS: usize = 64; - -/// Maximum concurrent streaming operations. -const MAX_CONCURRENT_STREAMS: usize = 8; - -/// Channel buffer size for streaming operations. -const STREAM_CHANNEL_BUFFER: usize = 256; - -/// Shared state for the cold storage task, holding the read backend and cache. -/// -/// This is wrapped in an `Arc` so that spawned read handlers can access -/// the backend and cache without moving ownership. -struct ColdStorageTaskInner { - read_backend: B, - cache: Mutex, - max_stream_deadline: Duration, - stream_semaphore: Arc, - /// Tracks long-lived stream producer tasks separately from short reads. - /// Not drained before writes — backends provide their own read isolation. - /// Drained on graceful shutdown. - stream_tracker: TaskTracker, -} - -impl ColdStorageTaskInner { - /// Fetch a header from the backend and cache the result. - async fn fetch_and_cache_header( - &self, - spec: HeaderSpecifier, - ) -> ColdResult> { - let r = self.read_backend.get_header(spec).await; - if let Ok(Some(ref h)) = r { - self.cache.lock().await.put_header(h.number, h.clone()); - } - r - } - - /// Fetch a transaction from the backend and cache the result. - async fn fetch_and_cache_tx( - &self, - spec: TransactionSpecifier, - ) -> ColdResult>> { - let r = self.read_backend.get_transaction(spec).await; - if let Ok(Some(ref c)) = r { - let meta = c.meta(); - self.cache - .lock() - .await - .put_tx((meta.block_number(), meta.transaction_index()), c.clone()); - } - r - } - - /// Fetch a receipt from the backend and cache the result. - async fn fetch_and_cache_receipt( - &self, - spec: ReceiptSpecifier, - ) -> ColdResult> { - let r = self.read_backend.get_receipt(spec).await; - if let Ok(Some(ref c)) = r { - self.cache.lock().await.put_receipt((c.block_number, c.transaction_index), c.clone()); - } - r - } - - /// Handle a read request, checking the cache first where applicable. - async fn handle_read(self: &Arc, req: ColdReadRequest) { - match req { - ColdReadRequest::GetHeader { spec, resp } => { - let result = if let HeaderSpecifier::Number(n) = &spec { - if let Some(hit) = self.cache.lock().await.get_header(n) { - Ok(Some(hit)) - } else { - self.fetch_and_cache_header(spec).await - } - } else { - self.fetch_and_cache_header(spec).await - }; - let _ = resp.send(result); - } - ColdReadRequest::GetHeaders { specs, resp } => { - let _ = resp.send(self.read_backend.get_headers(specs).await); - } - ColdReadRequest::GetTransaction { spec, resp } => { - let result = if let TransactionSpecifier::BlockAndIndex { block, index } = &spec { - if let Some(hit) = self.cache.lock().await.get_tx(&(*block, *index)) { - Ok(Some(hit)) - } else { - self.fetch_and_cache_tx(spec).await - } - } else { - self.fetch_and_cache_tx(spec).await - }; - let _ = resp.send(result); - } - ColdReadRequest::GetTransactionsInBlock { block, resp } => { - let _ = resp.send(self.read_backend.get_transactions_in_block(block).await); - } - ColdReadRequest::GetTransactionCount { block, resp } => { - let _ = resp.send(self.read_backend.get_transaction_count(block).await); - } - ColdReadRequest::GetReceipt { spec, resp } => { - let result = if let ReceiptSpecifier::BlockAndIndex { block, index } = &spec { - if let Some(hit) = self.cache.lock().await.get_receipt(&(*block, *index)) { - Ok(Some(hit)) - } else { - self.fetch_and_cache_receipt(spec).await - } - } else { - self.fetch_and_cache_receipt(spec).await - }; - let _ = resp.send(result); - } - ColdReadRequest::GetReceiptsInBlock { block, resp } => { - let _ = resp.send(self.read_backend.get_receipts_in_block(block).await); - } - ColdReadRequest::GetSignetEvents { spec, resp } => { - let _ = resp.send(self.read_backend.get_signet_events(spec).await); - } - ColdReadRequest::GetZenithHeader { spec, resp } => { - let _ = resp.send(self.read_backend.get_zenith_header(spec).await); - } - ColdReadRequest::GetZenithHeaders { spec, resp } => { - let _ = resp.send(self.read_backend.get_zenith_headers(spec).await); - } - ColdReadRequest::GetLogs { filter, max_logs, resp } => { - let _ = resp.send(self.read_backend.get_logs(&filter, max_logs).await); - } - ColdReadRequest::StreamLogs { filter, max_logs, deadline, resp } => { - let _ = resp.send(self.handle_stream_logs(*filter, max_logs, deadline).await); - } - ColdReadRequest::GetLatestBlock { resp } => { - let _ = resp.send(self.read_backend.get_latest_block().await); - } - } - } - - /// Stream logs matching a filter. - /// - /// Acquires a concurrency permit, resolves the block range, then - /// spawns a producer task that delegates to - /// [`ColdStorageRead::produce_log_stream`]. - async fn handle_stream_logs( - self: &Arc, - filter: crate::Filter, - max_logs: usize, - deadline: Duration, - ) -> ColdResult { - let permit = self - .stream_semaphore - .clone() - .acquire_owned() - .await - .map_err(|_| ColdStorageError::Cancelled)?; - - let from = filter.get_from_block().unwrap_or(0); - let to = match filter.get_to_block() { - Some(to) => to, - None => { - let Some(latest) = self.read_backend.get_latest_block().await? else { - let (_tx, rx) = mpsc::channel(1); - return Ok(ReceiverStream::new(rx)); - }; - latest - } - }; - - let effective = deadline.min(self.max_stream_deadline); - let deadline_instant = tokio::time::Instant::now() + effective; - let (sender, rx) = mpsc::channel(STREAM_CHANNEL_BUFFER); - let stream_backend = self.read_backend.clone(); - - self.stream_tracker.spawn(async move { - let _permit = permit; - let params = - crate::StreamParams { from, to, max_logs, sender, deadline: deadline_instant }; - stream_backend.produce_log_stream(&filter, params).await; - }); - - Ok(ReceiverStream::new(rx)) - } -} - -/// The cold storage task that processes requests. -/// -/// This task receives requests over separate read and write channels and -/// dispatches them to the storage backend. It supports graceful shutdown -/// via a cancellation token. -/// -/// # Processing Model -/// -/// - **Reads**: Spawned as concurrent tasks (up to 64 in flight). -/// Multiple reads can execute in parallel. In-flight reads are drained -/// before each write to ensure exclusive backend access. -/// - **Writes**: Processed inline (sequential). Each write completes before -/// the next is started, ensuring ordering. -/// - **Streams**: Log-streaming producer tasks run independently of the -/// read/write lifecycle. They are tracked separately for graceful -/// shutdown but are NOT drained before writes. Backends MUST provide -/// their own read isolation for streaming (snapshot semantics). -/// -/// This design prioritizes write ordering for correctness while allowing -/// read throughput to scale with concurrency. -/// -/// # Log Streaming -/// -/// The task owns the streaming configuration (max deadline, concurrency -/// limit) and delegates the streaming loop to the backend via -/// [`ColdStorageRead::produce_log_stream`]. Callers supply a per-request -/// deadline that is clamped to the task's configured maximum. -/// -/// # Caching -/// -/// Transaction, receipt, and header lookups are served from an LRU cache -/// when possible. Cache entries are invalidated on -/// [`truncate_above`](crate::ColdStorageWrite::truncate_above) to handle reorgs. -pub struct ColdStorageTask { - inner: Arc>, - write_backend: B, - read_receiver: mpsc::Receiver, - write_receiver: mpsc::Receiver, - cancel_token: CancellationToken, - /// Task tracker for concurrent read handlers only. - task_tracker: TaskTracker, -} - -impl std::fmt::Debug for ColdStorageTask { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ColdStorageTask").finish_non_exhaustive() - } -} - -impl ColdStorageTask { - /// Create a new cold storage task and return its handle. - pub fn new(backend: B, cancel_token: CancellationToken) -> (Self, ColdStorageHandle) { - let (read_sender, read_receiver) = mpsc::channel(READ_CHANNEL_SIZE); - let (write_sender, write_receiver) = mpsc::channel(WRITE_CHANNEL_SIZE); - let read_backend = backend.clone(); - let task = Self { - inner: Arc::new(ColdStorageTaskInner { - read_backend, - cache: Mutex::new(ColdCache::new()), - max_stream_deadline: DEFAULT_MAX_STREAM_DEADLINE, - stream_semaphore: Arc::new(Semaphore::new(MAX_CONCURRENT_STREAMS)), - stream_tracker: TaskTracker::new(), - }), - write_backend: backend, - read_receiver, - write_receiver, - cancel_token, - task_tracker: TaskTracker::new(), - }; - let handle = ColdStorageHandle::new(read_sender, write_sender); - (task, handle) - } - - /// Spawn the task and return the handle. - /// - /// The task will run until the cancellation token is triggered or the - /// channels are closed. - pub fn spawn(backend: B, cancel_token: CancellationToken) -> ColdStorageHandle { - let (task, handle) = Self::new(backend, cancel_token); - tokio::spawn(task.run()); - handle - } - - /// Handle a write request using the exclusively owned write backend. - /// - /// Called inline in the run loop (never spawned). The write backend - /// is not shared — no lock acquisition needed. The drain-before-write - /// step in `run()` ensures no read tasks are populating the cache - /// concurrently, preventing stale cache entries after truncation. - async fn handle_write(&mut self, req: ColdWriteRequest) { - match req { - ColdWriteRequest::AppendBlock(boxed) => { - let result = self.write_backend.append_block(boxed.data).await; - let _ = boxed.resp.send(result); - } - ColdWriteRequest::AppendBlocks { data, resp } => { - let result = self.write_backend.append_blocks(data).await; - let _ = resp.send(result); - } - ColdWriteRequest::TruncateAbove { block, resp } => { - let result = self.write_backend.truncate_above(block).await; - if result.is_ok() { - self.inner.cache.lock().await.invalidate_above(block); - } - let _ = resp.send(result); - } - ColdWriteRequest::DrainAbove { block, resp } => { - let result = self.write_backend.drain_above(block).await; - if result.is_ok() { - self.inner.cache.lock().await.invalidate_above(block); - } - let _ = resp.send(result); - } - } - } - - /// Run the task, processing requests until shutdown. - #[instrument(skip(self), name = "cold_storage_task")] - pub async fn run(mut self) { - debug!("Cold storage task started"); - - loop { - tokio::select! { - biased; - - _ = self.cancel_token.cancelled() => { - debug!("Cold storage task received cancellation signal"); - break; - } - - maybe_write = self.write_receiver.recv() => { - let Some(req) = maybe_write else { - debug!("Cold storage write channel closed"); - break; - }; - // Drain in-flight read tasks before executing the write. - // Stream producers are NOT drained here — they rely on - // backend-level read isolation (snapshot semantics). - self.task_tracker.close(); - self.task_tracker.wait().await; - self.task_tracker.reopen(); - - self.handle_write(req).await; - } - - maybe_read = self.read_receiver.recv() => { - let Some(req) = maybe_read else { - debug!("Cold storage read channel closed"); - break; - }; - - // Apply backpressure: wait if we've hit the concurrent reader limit - while self.task_tracker.len() >= MAX_CONCURRENT_READERS { - tokio::select! { - _ = self.cancel_token.cancelled() => { - debug!("Cancellation while waiting for read task slot"); - break; - } - _ = self.task_tracker.wait() => {} - } - } - - let inner = Arc::clone(&self.inner); - self.task_tracker.spawn(async move { - inner.handle_read(req).await; - }); - } - } - } - - // Graceful shutdown: drain reads first (short-lived), then streams - // (bounded by deadline). Reads must drain first because StreamLogs - // requests spawn stream tasks — draining streams before reads could - // miss newly spawned producers. - debug!("Waiting for in-progress read handlers to complete"); - self.task_tracker.close(); - self.task_tracker.wait().await; - debug!("Waiting for in-progress stream producers to complete"); - self.inner.stream_tracker.close(); - self.inner.stream_tracker.wait().await; - debug!("Cold storage task shut down gracefully"); - } -} diff --git a/crates/cold/src/traits.rs b/crates/cold/src/traits.rs index d351d4c..c0d8994 100644 --- a/crates/cold/src/traits.rs +++ b/crates/cold/src/traits.rs @@ -3,8 +3,8 @@ //! The cold storage interface is split into three traits: //! //! - [`ColdStorageRead`] — read-only access (`&self`, `Clone`) -//! - [`ColdStorageWrite`] — write access (`&mut self`) -//! - [`ColdStorage`] — supertrait combining both, with `drain_above` +//! - [`ColdStorageWrite`] — write access (`&self`) +//! - [`ColdStorageBackend`] — supertrait combining both, with `drain_above` use crate::{ ColdReceipt, ColdResult, Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, @@ -14,7 +14,7 @@ use alloy::primitives::BlockNumber; use signet_storage_types::{ DbSignetEvent, DbZenithHeader, ExecutedBlock, Receipt, RecoveredTx, SealedHeader, }; -use std::future::Future; +use std::{future::Future, time::Duration}; use tokio_stream::wrappers::ReceiverStream; /// A stream of log results backed by a bounded channel. @@ -225,9 +225,11 @@ pub trait ColdStorageRead: Clone + Send + Sync + 'static { /// Write-only cold storage backend trait. /// -/// All methods take `&mut self` and return `Send` futures. The write -/// backend is exclusively owned by the task runner — no synchronization -/// is needed. +/// All methods take `&self` and return `Send` futures. The handle wraps +/// the backend in an `Arc` and shares it across spawned tasks, so write +/// implementations are responsible for any internal synchronization +/// they need (e.g. the MDBX backend serializes writes via a single +/// `spawn_blocking` worker behind the handle's `write_sem`). /// /// # Implementation Guide /// @@ -244,30 +246,82 @@ pub trait ColdStorageRead: Clone + Send + Sync + 'static { /// transaction by hash) require the implementation to maintain appropriate /// indexes. These indexes must be updated during `append_block` and cleaned /// during `truncate_above`. -pub trait ColdStorageWrite: Send + 'static { +pub trait ColdStorageWrite: Send + Sync + 'static { /// Append a single block to cold storage. - fn append_block(&mut self, data: BlockData) -> impl Future> + Send; + fn append_block(&self, data: BlockData) -> impl Future> + Send; /// Append multiple blocks to cold storage. - fn append_blocks( - &mut self, - data: Vec, - ) -> impl Future> + Send; + fn append_blocks(&self, data: Vec) -> impl Future> + Send; /// Truncate all data above the given block number (exclusive). /// /// This removes block N+1 and higher from all tables. Used for reorg handling. - fn truncate_above(&mut self, block: BlockNumber) - -> impl Future> + Send; + fn truncate_above(&self, block: BlockNumber) -> impl Future> + Send; } /// Combined read and write cold storage backend trait. /// /// Combines [`ColdStorageRead`] and [`ColdStorageWrite`] and provides -/// [`drain_above`](ColdStorage::drain_above), which reads receipts then +/// [`drain_above`](ColdStorageBackend::drain_above), which reads receipts then /// truncates. The default implementation is correct but not atomic; /// backends should override with an atomic version when possible. -pub trait ColdStorage: ColdStorageRead + ColdStorageWrite { +/// +/// # Timeouts (mandatory) +/// +/// Every implementation MUST enforce a wall-clock timeout on both read +/// and write operations. The handle does not apply a dispatcher-side +/// deadline; the backend is the only place where a stuck call can be +/// bounded. +/// +/// - Implementations using a pooled async client (e.g. sqlx) MUST apply +/// a server-side statement-level timeout at the start of every +/// transaction. +/// - Implementations using synchronous, uninterruptible calls (e.g. +/// MDBX) MUST perform in-body deadline checks between iteration +/// steps. Single-step point lookups may rely on the backend's native +/// latency bounds and skip the check. +/// - Timeouts MUST be configurable per operation class (read, write) +/// via builder methods on the connector. Defaults SHOULD be 500ms +/// for reads and 2s for writes unless the backend's latency profile +/// requires different values. +/// - Timeout expiry MUST return an error at any callable-abort point +/// (pre-commit iteration, async cancellation, pooled-client +/// server-side cancellation) and MUST NOT return stale or partial +/// results from such an abort. +/// - For synchronous, uninterruptible commits (e.g. MDBX writes), the +/// implementation MAY complete an in-flight commit past the +/// deadline; in that case it MUST log the overrun and the +/// `write_timeout` acts as an SLO + alerting signal rather than a +/// hard abort. The commit, if it completes, is authoritative. +/// - Where abort IS possible, the implementation MUST ensure the +/// underlying transaction rolls back on timeout so backend state +/// remains consistent. +/// +/// Backends that cannot honor this contract (e.g. a toy in-memory stub +/// used for tests) may document their exemption explicitly in their +/// own type docs; the trait docs remain the authoritative contract. +pub trait ColdStorageBackend: ColdStorageRead + ColdStorageWrite { + /// Configured read deadline, if any. + /// + /// The handle reads this to scope end-to-end SLO measurements and + /// to bound out-of-band setup reads (e.g. resolving "to = latest" + /// for a log stream). `None` signals an exempt backend (test + /// stubs); the trait contract still requires real backends to + /// enforce the deadline internally — this accessor only exposes + /// the value to the handle. + fn read_timeout(&self) -> Option { + None + } + + /// Configured write deadline, if any. + /// + /// The handle reads this to measure end-to-end write latency + /// (queue + drain + commit) against the SLO target. See + /// [`read_timeout`](Self::read_timeout) for `None` semantics. + fn write_timeout(&self) -> Option { + None + } + /// Read and remove all blocks above the given block number. /// /// Returns receipts for each block above `block` in ascending order, @@ -279,7 +333,7 @@ pub trait ColdStorage: ColdStorageRead + ColdStorageWrite { /// not atomic. Backends should override with an atomic version /// when possible. fn drain_above( - &mut self, + &self, block: BlockNumber, ) -> impl Future>>> + Send { async move { diff --git a/crates/cold/tests/common/gated.rs b/crates/cold/tests/common/gated.rs new file mode 100644 index 0000000..9ee957e --- /dev/null +++ b/crates/cold/tests/common/gated.rs @@ -0,0 +1,189 @@ +//! Test-only backend that gates read operations on a semaphore. +//! +//! Use for tests that need to saturate the read pool deterministically. + +use alloy::primitives::BlockNumber; +use signet_cold::{ + BlockData, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageRead, ColdStorageWrite, + Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, RpcLog, SignetEventsSpecifier, + StreamParams, TransactionSpecifier, ZenithHeaderSpecifier, mem::MemColdBackend, +}; +use signet_storage_types::{DbSignetEvent, DbZenithHeader, RecoveredTx, SealedHeader}; +use std::{sync::Arc, time::Duration}; +use tokio::sync::Semaphore; + +/// Backend that parks all reads on a semaphore gate. +/// +/// Writes and stream production are ungated so tests can distinguish +/// read-pool saturation from write/drain blocking. +#[derive(Clone)] +pub struct GatedBackend { + inner: MemColdBackend, + gate: Arc, + stream_gate: Arc, + read_timeout: Option, +} + +impl GatedBackend { + /// Build a backend whose read gate is closed (zero permits). + pub fn closed() -> Self { + Self { + inner: MemColdBackend::new(), + gate: Arc::new(Semaphore::new(0)), + // Streams are ungated by default: effectively unbounded permits. + stream_gate: Arc::new(Semaphore::new(usize::MAX >> 4)), + read_timeout: None, + } + } + + /// Advertise a `read_timeout` to the handle. Used to drive + /// `stream_logs` setup-timeout tests against a parked + /// `get_latest_block`. + #[allow(dead_code)] + pub const fn with_read_timeout(mut self, d: Duration) -> Self { + self.read_timeout = Some(d); + self + } + + /// Build a backend whose read gate is effectively open. + #[allow(dead_code)] + pub fn open() -> Self { + let me = Self::closed(); + me.gate.add_permits(usize::MAX >> 4); + me + } + + /// Replace the stream gate with a closed (zero-permit) gate so + /// `produce_log_stream` parks until permits are released. + #[allow(dead_code)] + pub fn with_gated_streams(mut self) -> Self { + self.stream_gate = Arc::new(Semaphore::new(0)); + self + } + + /// Release `n` additional read permits on the gate. + #[allow(dead_code)] + pub fn release(&self, n: usize) { + self.gate.add_permits(n); + } + + /// Release `n` additional stream permits on the stream gate. + #[allow(dead_code)] + pub fn release_streams(&self, n: usize) { + self.stream_gate.add_permits(n); + } +} + +impl std::fmt::Debug for GatedBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GatedBackend").finish_non_exhaustive() + } +} + +impl ColdStorageRead for GatedBackend { + async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_header(spec).await + } + + async fn get_headers( + &self, + specs: Vec, + ) -> ColdResult>> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_headers(specs).await + } + + async fn get_transaction( + &self, + spec: TransactionSpecifier, + ) -> ColdResult>> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_transaction(spec).await + } + + async fn get_transactions_in_block(&self, block: BlockNumber) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_transactions_in_block(block).await + } + + async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_transaction_count(block).await + } + + async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_receipt(spec).await + } + + async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_receipts_in_block(block).await + } + + async fn get_signet_events( + &self, + spec: SignetEventsSpecifier, + ) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_signet_events(spec).await + } + + async fn get_zenith_header( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_zenith_header(spec).await + } + + async fn get_zenith_headers( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_zenith_headers(spec).await + } + + async fn get_latest_block(&self) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_latest_block().await + } + + async fn get_logs(&self, filter: &Filter, max_logs: usize) -> ColdResult> { + let _p = self.gate.clone().acquire_owned().await.ok(); + self.inner.get_logs(filter, max_logs).await + } + + async fn produce_log_stream(&self, filter: &Filter, params: StreamParams) { + let _p = self.stream_gate.clone().acquire_owned().await.ok(); + self.inner.produce_log_stream(filter, params).await; + } +} + +impl ColdStorageWrite for GatedBackend { + async fn append_block(&self, data: BlockData) -> ColdResult<()> { + // Ungated — writers need to distinguish drain-blocking from + // read-gating in tests. + self.inner.append_block(data).await + } + + async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + self.inner.append_blocks(data).await + } + + async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { + self.inner.truncate_above(block).await + } +} + +impl ColdStorageBackend for GatedBackend { + fn read_timeout(&self) -> Option { + self.read_timeout + } + + async fn drain_above(&self, block: BlockNumber) -> ColdResult>> { + self.inner.drain_above(block).await + } +} diff --git a/crates/cold/tests/common/mod.rs b/crates/cold/tests/common/mod.rs new file mode 100644 index 0000000..206d748 --- /dev/null +++ b/crates/cold/tests/common/mod.rs @@ -0,0 +1 @@ +pub mod gated; diff --git a/crates/cold/tests/concurrency.rs b/crates/cold/tests/concurrency.rs new file mode 100644 index 0000000..e0e8912 --- /dev/null +++ b/crates/cold/tests/concurrency.rs @@ -0,0 +1,278 @@ +//! Concurrency scenarios for the unified `ColdStorage` handle. +//! +//! Per-task tests cover narrow cases (`drain_barrier.rs`, +//! `stream_isolation.rs`, `shutdown.rs`, `handle_shape.rs`); this file +//! collects the broader load scenarios from the architecture spec. + +mod common; + +use alloy::rpc::types::Filter; +use common::gated::GatedBackend; +use signet_cold::{ + ColdStorage, ColdStorageError, HeaderSpecifier, conformance::make_test_block, + mem::MemColdBackend, +}; +use std::{sync::Arc, time::Duration}; +use tokio::sync::Notify; +use tokio_util::sync::CancellationToken; + +/// 1. 256 concurrent reads against an ungated backend must all complete +/// despite only 64 `read_sem` permits. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn reads_above_concurrency_cap_do_not_deadlock() { + let cs = ColdStorage::new(MemColdBackend::new(), CancellationToken::new()); + + let mut handles = Vec::with_capacity(256); + for _ in 0..256 { + let cs2 = cs.clone(); + handles.push(tokio::spawn(async move { cs2.get_latest_block().await })); + } + + for h in handles { + let r = tokio::time::timeout(Duration::from_secs(15), h) + .await + .expect("read deadlocked") + .expect("task panicked"); + r.expect("read failed"); + } +} + +/// 2. A write interleaved with saturating reads still completes. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn write_after_saturating_reads_makes_progress() { + let cs = ColdStorage::new(MemColdBackend::new(), CancellationToken::new()); + + let mut readers = Vec::with_capacity(128); + for _ in 0..128 { + let cs2 = cs.clone(); + readers.push(tokio::spawn(async move { cs2.get_latest_block().await })); + } + + let cs_w = cs.clone(); + let writer = tokio::spawn(async move { cs_w.truncate_above(0).await }); + + let mut more_readers = Vec::with_capacity(128); + for _ in 0..128 { + let cs2 = cs.clone(); + more_readers.push(tokio::spawn(async move { cs2.get_latest_block().await })); + } + + tokio::time::timeout(Duration::from_secs(15), writer) + .await + .expect("writer deadlocked") + .expect("writer panicked") + .expect("writer failed"); + + for h in readers.into_iter().chain(more_readers) { + tokio::time::timeout(Duration::from_secs(15), h) + .await + .expect("reader deadlocked") + .expect("reader panicked") + .expect("reader failed"); + } +} + +/// 3. Fairness: a writer acquired after saturating readers must complete +/// before readers queued *after* the writer. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn fairness_write_serves_before_later_readers() { + let backend = GatedBackend::closed(); + let cs = ColdStorage::new(backend.clone(), CancellationToken::new()); + + // Saturate all 64 read permits behind the backend gate. + for _ in 0..64 { + let cs2 = cs.clone(); + tokio::spawn(async move { + let _ = cs2.get_latest_block().await; + }); + } + tokio::time::sleep(Duration::from_millis(100)).await; + + // Queue a writer. It holds `write_sem`, then blocks on the drain + // barrier waiting for the 64 in-flight readers. + let writer_done = Arc::new(Notify::new()); + let wd = writer_done.clone(); + let cs_w = cs.clone(); + tokio::spawn(async move { + let _ = cs_w.truncate_above(0).await; + wd.notify_one(); + }); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Queue 64 "later" readers. They park on `read_sem::acquire_owned` + // because the writer's drain has claimed every permit. + let later_done = Arc::new(Notify::new()); + let mut later_count = 0usize; + for _ in 0..64 { + let cs2 = cs.clone(); + let ld = later_done.clone(); + tokio::spawn(async move { + let _ = cs2.get_latest_block().await; + ld.notify_one(); + }); + later_count += 1; + } + assert_eq!(later_count, 64); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Release the backend gate so the 64 saturating readers complete, + // which lets the drain barrier acquire and the writer run. + backend.release(usize::MAX >> 4); + + // The writer MUST resolve before any later reader. + tokio::select! { + biased; + () = writer_done.notified() => {} + () = later_done.notified() => panic!("later reader resolved before writer"), + } + + // And the later readers still complete shortly after. + tokio::time::timeout(Duration::from_secs(5), later_done.notified()) + .await + .expect("later readers should complete after writer"); +} + +/// 4. Cancel during reader backpressure: queued acquisitions fail fast. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn cancel_during_reader_backpressure_shuts_down() { + let backend = GatedBackend::closed(); + let cancel = CancellationToken::new(); + let cs = ColdStorage::new(backend.clone(), cancel.clone()); + + // Saturate all 64 read permits. + let mut readers = Vec::with_capacity(65); + for _ in 0..64 { + let cs2 = cs.clone(); + readers.push(tokio::spawn(async move { cs2.get_latest_block().await })); + } + // One queued reader — parked on `read_sem::acquire_owned`. + let cs_q = cs.clone(); + readers.push(tokio::spawn(async move { cs_q.get_latest_block().await })); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Cancel: coordinator closes the semaphores. + cancel.cancel(); + tokio::time::sleep(Duration::from_millis(50)).await; + + // New acquisitions fail fast. + let err = cs.get_latest_block().await.unwrap_err(); + assert!(matches!(err, ColdStorageError::TaskTerminated)); + + // Release the backend gate so any in-flight readers finish. + backend.release(usize::MAX >> 4); + + // All spawned readers resolve within the bound. Those that had + // acquired permits complete with `Ok(None)`; the queued one resolves + // with `Err(TaskTerminated)` because semaphore close propagates. + for h in readers { + let _ = tokio::time::timeout(Duration::from_secs(1), h) + .await + .expect("reader hung after cancel") + .expect("reader task panicked"); + } +} + +/// 5. Cancel during write drain: writer parked on `acquire_many_owned` +/// exits promptly. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn cancel_during_write_drain_shuts_down() { + let backend = GatedBackend::closed(); + let cancel = CancellationToken::new(); + let cs = ColdStorage::new(backend.clone(), cancel.clone()); + + for _ in 0..64 { + let cs2 = cs.clone(); + tokio::spawn(async move { + let _ = cs2.get_latest_block().await; + }); + } + tokio::time::sleep(Duration::from_millis(100)).await; + + // Queue a writer. Parks on the drain barrier. + let cs_w = cs.clone(); + let writer = tokio::spawn(async move { cs_w.truncate_above(0).await }); + tokio::time::sleep(Duration::from_millis(100)).await; + + cancel.cancel(); + tokio::time::sleep(Duration::from_millis(50)).await; + backend.release(usize::MAX >> 4); + + let result = tokio::time::timeout(Duration::from_secs(1), writer) + .await + .expect("writer hung after cancel") + .expect("writer task panicked"); + assert!(matches!(result, Err(ColdStorageError::TaskTerminated))); +} + +/// 6. Stream caller cancellation releases its `stream_sem` permit so a +/// later stream can acquire it. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn stream_setup_caller_cancel_releases_stream_permit() { + let backend = GatedBackend::closed().with_gated_streams(); + let cs = ColdStorage::new(backend.clone(), CancellationToken::new()); + + // Fully open read permits for this test (we only care about stream_sem). + backend.release(usize::MAX >> 4); + + // Seed a block so `stream_logs` can resolve `to` without parking. + cs.append_block(make_test_block(0)).await.unwrap(); + + // Saturate all 8 stream_sem slots. Each spawned task acquires the + // stream permit inside `stream_logs`, then the produced `LogStream` + // is dropped — but the BACKEND's `produce_log_stream` task is still + // holding the permit via `_p = permit` and parked on the stream gate. + let filter = Filter::new().from_block(0).to_block(0); + for _ in 0..8 { + let cs2 = cs.clone(); + let f = filter.clone(); + tokio::spawn(async move { + let _ = cs2.stream_logs(f, 1000, Duration::from_secs(5)).await; + }); + } + tokio::time::sleep(Duration::from_millis(100)).await; + + // 9th stream setup must park on `stream_sem`. Cancel it via timeout. + let cs9 = cs.clone(); + let f9 = filter.clone(); + let attempt = tokio::time::timeout( + Duration::from_millis(200), + cs9.stream_logs(f9, 1000, Duration::from_secs(5)), + ) + .await; + assert!(attempt.is_err(), "stream_logs should park on saturated stream_sem"); + + // Release one stream so a permit becomes available. + backend.release_streams(1); + tokio::time::sleep(Duration::from_millis(50)).await; + + // A fresh stream_logs call should acquire within the bound. + let cs10 = cs.clone(); + tokio::time::timeout( + Duration::from_secs(2), + cs10.stream_logs(filter, 1000, Duration::from_secs(5)), + ) + .await + .expect("stream_logs should acquire after dropped attempt released permit") + .expect("stream_logs returned error"); +} + +/// 7. Cache invalidation on destructive writes. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn cache_consistent_through_truncate() { + let cs = ColdStorage::new(MemColdBackend::new(), CancellationToken::new()); + + for n in 0..=20 { + cs.append_block(make_test_block(n)).await.unwrap(); + } + + // Seed the cache with block 10. + let h = cs.get_header(HeaderSpecifier::Number(10)).await.unwrap(); + assert!(h.is_some(), "block 10 should be present after append"); + + // Destructive write above block 5. + cs.truncate_above(5).await.unwrap(); + + // Block 10 must now be absent: cache invalidated AND backend dropped it. + let h = cs.get_header(HeaderSpecifier::Number(10)).await.unwrap(); + assert!(h.is_none(), "block 10 must be absent after truncate_above(5)"); +} diff --git a/crates/cold/tests/drain_barrier.rs b/crates/cold/tests/drain_barrier.rs new file mode 100644 index 0000000..5903676 --- /dev/null +++ b/crates/cold/tests/drain_barrier.rs @@ -0,0 +1,43 @@ +//! Writes must wait for in-flight reads to drain. + +mod common; + +use common::gated::GatedBackend; +use signet_cold::ColdStorage; +use std::{sync::Arc, time::Duration}; +use tokio::sync::Notify; +use tokio_util::sync::CancellationToken; + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn write_waits_for_in_flight_reads_to_drain() { + let backend = GatedBackend::closed(); + let cs = ColdStorage::new(backend.clone(), CancellationToken::new()); + + // 16 gated readers — hold 16 of the 64 read permits. + for _ in 0..16 { + let cs2 = cs.clone(); + tokio::spawn(async move { + let _ = cs2.get_latest_block().await; + }); + } + tokio::time::sleep(Duration::from_millis(50)).await; + + // Writer should block on drain: only 48 of 64 permits are free. + let cs2 = cs.clone(); + let done = Arc::new(Notify::new()); + let wd = done.clone(); + tokio::spawn(async move { + let _ = cs2.truncate_above(0).await; + wd.notify_one(); + }); + + // Writer must NOT complete before readers are released. + let early = tokio::time::timeout(Duration::from_millis(150), done.notified()).await; + assert!(early.is_err(), "writer completed before read drain"); + + // Release all readers; writer should now progress. + backend.release(usize::MAX >> 4); + tokio::time::timeout(Duration::from_secs(2), done.notified()) + .await + .expect("writer should complete once readers release"); +} diff --git a/crates/cold/tests/handle_shape.rs b/crates/cold/tests/handle_shape.rs new file mode 100644 index 0000000..3f8bf6b --- /dev/null +++ b/crates/cold/tests/handle_shape.rs @@ -0,0 +1,12 @@ +use signet_cold::{ColdStorage, mem::MemColdBackend}; +use tokio_util::sync::CancellationToken; + +#[tokio::test] +async fn handle_is_clone_cheap() { + let cs: ColdStorage = + ColdStorage::new(MemColdBackend::new(), CancellationToken::new()); + let cs2 = cs.clone(); + let _ = cs.truncate_above(0).await; + let latest = cs2.get_latest_block().await.unwrap(); + assert!(latest.is_none()); +} diff --git a/crates/cold/tests/shutdown.rs b/crates/cold/tests/shutdown.rs new file mode 100644 index 0000000..491a1d9 --- /dev/null +++ b/crates/cold/tests/shutdown.rs @@ -0,0 +1,31 @@ +//! Shutdown coordinator closes semaphores on cancel; handle methods +//! fail fast afterwards. + +use signet_cold::{ColdStorage, ColdStorageError, mem::MemColdBackend}; +use std::time::Duration; +use tokio_util::sync::CancellationToken; + +#[tokio::test] +async fn shutdown_acquire_fails_fast() { + let cancel = CancellationToken::new(); + let cs = ColdStorage::new(MemColdBackend::new(), cancel.clone()); + + cancel.cancel(); + // Give the coordinator a tick to run. + tokio::time::sleep(Duration::from_millis(50)).await; + + let err = cs.get_latest_block().await.unwrap_err(); + assert!(matches!(err, ColdStorageError::TaskTerminated)); +} + +#[tokio::test] +async fn shutdown_write_fails_fast() { + let cancel = CancellationToken::new(); + let cs = ColdStorage::new(MemColdBackend::new(), cancel.clone()); + + cancel.cancel(); + tokio::time::sleep(Duration::from_millis(50)).await; + + let err = cs.truncate_above(0).await.unwrap_err(); + assert!(matches!(err, ColdStorageError::TaskTerminated)); +} diff --git a/crates/cold/tests/stream_isolation.rs b/crates/cold/tests/stream_isolation.rs new file mode 100644 index 0000000..fc4817c --- /dev/null +++ b/crates/cold/tests/stream_isolation.rs @@ -0,0 +1,45 @@ +//! Streams must not consume a read permit. +//! +//! If a saturated read pool could block stream setup, the existing +//! PR #57 bug would reappear. This test asserts the fix. + +mod common; + +use alloy::rpc::types::Filter; +use common::gated::GatedBackend; +use signet_cold::ColdStorage; +use std::time::Duration; +use tokio_util::sync::CancellationToken; + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn stream_setup_not_blocked_by_saturated_reads() { + let backend = GatedBackend::closed(); + let cs = ColdStorage::new(backend.clone(), CancellationToken::new()); + + // Saturate all 64 read permits. Each spawned task acquires a read + // permit inside the handle, then parks in the backend behind the + // gate. With the gate closed, these tasks hold their permits for + // the duration of the test. + for _ in 0..64 { + let cs2 = cs.clone(); + tokio::spawn(async move { + let _ = cs2.get_latest_block().await; + }); + } + tokio::time::sleep(Duration::from_millis(50)).await; + + // Stream setup must complete even though every read permit is held. + // Use a bounded `to_block` so setup does not need to resolve + // "latest" via the backend (which our gate also blocks). What we + // are asserting is that setup does not acquire a `read_sem` permit; + // the stream body itself runs after setup returns. + let stream = tokio::time::timeout( + Duration::from_secs(1), + cs.stream_logs(Filter::new().from_block(0).to_block(10), 1000, Duration::from_secs(5)), + ) + .await + .expect("stream setup must not block on saturated read pool") + .expect("stream setup must succeed"); + + drop(stream); +} diff --git a/crates/cold/tests/stream_setup_timeout.rs b/crates/cold/tests/stream_setup_timeout.rs new file mode 100644 index 0000000..72afe03 --- /dev/null +++ b/crates/cold/tests/stream_setup_timeout.rs @@ -0,0 +1,50 @@ +//! `stream_logs` setup must fail fast when the backend's +//! `get_latest_block` call hangs. +//! +//! Pre-fix: `stream_logs` resolved `to = latest` via an un-bounded +//! backend call before acquiring `stream_sem`. A stuck point lookup +//! (cold MDBX page, saturated PG pool) could stall N concurrent setup +//! callers indefinitely with no permit cap. The fix wraps the setup +//! read in `tokio::time::timeout(backend.read_timeout(), …)`. This +//! test pins that behaviour against a regression. + +mod common; + +use alloy::rpc::types::Filter; +use common::gated::GatedBackend; +use signet_cold::{ColdStorage, ColdStorageError}; +use std::time::Duration; +use tokio_util::sync::CancellationToken; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn stream_setup_fails_fast_when_get_latest_block_hangs() { + // Backend whose read gate is closed: every read parks forever, + // including `get_latest_block`. Advertise a 50 ms `read_timeout` + // so the handle's setup wrap-around expires quickly. + let backend = GatedBackend::closed().with_read_timeout(Duration::from_millis(50)); + let cs = ColdStorage::new(backend, CancellationToken::new()); + + // No `to_block` on the filter forces the handle to call + // `get_latest_block` during setup — exactly the path the timeout + // protects. + let filter = Filter::new().from_block(0); + + let started = std::time::Instant::now(); + let outcome = tokio::time::timeout( + Duration::from_secs(1), + cs.stream_logs(filter, 1000, Duration::from_secs(5)), + ) + .await + .expect("stream_logs setup must not hang past the wall-clock timeout"); + let elapsed = started.elapsed(); + + let err = outcome.expect_err("setup must fail with DeadlineExceeded, not succeed"); + assert!( + matches!(err, ColdStorageError::DeadlineExceeded(_)), + "expected DeadlineExceeded, got: {err}" + ); + // Generous upper bound — the configured deadline is 50 ms and + // tokio's timer wheel adds slop. We mostly care that we are + // bounded by the configured value, not by a 1 s outer harness. + assert!(elapsed < Duration::from_millis(500), "setup took too long: {elapsed:?}"); +} diff --git a/crates/storage/README.md b/crates/storage/README.md index 87b3a37..b1ffaa2 100644 --- a/crates/storage/README.md +++ b/crates/storage/README.md @@ -60,8 +60,10 @@ if let Some(first_missing) = storage.cold_lag().await? { hot storage and can be recovered via `replay_to_cold`. Cold dispatch errors indicate either: -- `Backpressure`: Channel full, task alive. Transient. -- `TaskTerminated`: Task stopped. Requires restart. +- `DeadlineExceeded` / `StreamDeadlineExceeded`: Read overran its configured + timeout. Retryable with a larger deadline or narrower filter. +- `TaskTerminated`: Handle's semaphores were closed (shutdown). Requires + recreating the handle. ## Re-exports diff --git a/crates/storage/src/builder.rs b/crates/storage/src/builder.rs index c9258f3..4e2097c 100644 --- a/crates/storage/src/builder.rs +++ b/crates/storage/src/builder.rs @@ -88,7 +88,7 @@ where /// Build the unified storage instance. /// /// Opens both hot and cold backends and spawns the cold storage task. - pub async fn build(self) -> StorageResult> { + pub async fn build(self) -> StorageResult> { // Connect to hot storage (sync) let hot = self .hot_connector diff --git a/crates/storage/src/either.rs b/crates/storage/src/either.rs index 3020f19..8e9b656 100644 --- a/crates/storage/src/either.rs +++ b/crates/storage/src/either.rs @@ -3,11 +3,11 @@ //! The `Either` type enables runtime backend selection while maintaining compile-time //! type safety and zero-cost abstraction. The `dispatch_async!` macro reduces //! boilerplate for the `EitherCold` implementation by generating the repetitive -//! match-and-forward pattern for all ColdStorage trait methods. +//! match-and-forward pattern for all ColdStorageBackend trait methods. use alloy::primitives::BlockNumber; use signet_cold::{ - BlockData, ColdConnect, ColdReceipt, ColdResult, ColdStorage, ColdStorageRead, + BlockData, ColdConnect, ColdReceipt, ColdResult, ColdStorageBackend, ColdStorageRead, ColdStorageWrite, Confirmed, Filter, HeaderSpecifier, ReceiptSpecifier, SignetEventsSpecifier, StreamParams, TransactionSpecifier, ZenithHeaderSpecifier, }; @@ -166,29 +166,39 @@ impl ColdStorageRead for EitherCold { #[allow(clippy::manual_async_fn)] impl ColdStorageWrite for EitherCold { - fn append_block(&mut self, data: BlockData) -> impl Future> + Send { + fn append_block(&self, data: BlockData) -> impl Future> + Send { dispatch_async!(self, append_block(data)) } - fn append_blocks( - &mut self, - data: Vec, - ) -> impl Future> + Send { + fn append_blocks(&self, data: Vec) -> impl Future> + Send { dispatch_async!(self, append_blocks(data)) } - fn truncate_above( - &mut self, - block: BlockNumber, - ) -> impl Future> + Send { + fn truncate_above(&self, block: BlockNumber) -> impl Future> + Send { dispatch_async!(self, truncate_above(block)) } } #[allow(clippy::manual_async_fn)] -impl ColdStorage for EitherCold { +impl ColdStorageBackend for EitherCold { + fn read_timeout(&self) -> Option { + match self { + Self::Mdbx(backend) => backend.read_timeout(), + #[cfg(any(feature = "postgres", feature = "sqlite"))] + Self::Sql(backend) => backend.read_timeout(), + } + } + + fn write_timeout(&self) -> Option { + match self { + Self::Mdbx(backend) => backend.write_timeout(), + #[cfg(any(feature = "postgres", feature = "sqlite"))] + Self::Sql(backend) => backend.write_timeout(), + } + } + fn drain_above( - &mut self, + &self, block: BlockNumber, ) -> impl Future>>> + Send { dispatch_async!(self, drain_above(block)) diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 48dc3c6..75b6d5d 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -82,8 +82,7 @@ pub use signet_cold_sql::SqlConnector; // Re-export key types for convenience pub use signet_cold::{ - ColdStorage, ColdStorageError, ColdStorageHandle, ColdStorageRead, ColdStorageTask, - ColdStorageWrite, + ColdStorage, ColdStorageBackend, ColdStorageError, ColdStorageRead, ColdStorageWrite, }; pub use signet_cold_mdbx::MdbxColdBackend; pub use signet_hot::{ diff --git a/crates/storage/src/unified.rs b/crates/storage/src/unified.rs index d38b967..e8c1b8d 100644 --- a/crates/storage/src/unified.rs +++ b/crates/storage/src/unified.rs @@ -6,10 +6,7 @@ use crate::StorageResult; use alloy::primitives::BlockNumber; -use signet_cold::{ - BlockData, ColdReceipt, ColdStorage, ColdStorageError, ColdStorageHandle, - ColdStorageReadHandle, ColdStorageTask, -}; +use signet_cold::{BlockData, ColdReceipt, ColdStorage, ColdStorageBackend, ColdStorageError}; use signet_hot::{ HistoryRead, HistoryWrite, HotKv, model::{HotKvReadError, HotKvWrite, RevmRead}, @@ -36,40 +33,30 @@ pub struct DrainedBlock { /// /// # Write Semantics /// -/// - Hot storage writes are synchronous and use database transactions -/// - Cold storage writes are dispatched synchronously via the handle (non-blocking) -/// - On `append_blocks`, hot storage is written first, then cold storage is notified +/// - Hot storage writes are synchronous and use database transactions. +/// - Cold storage writes are awaited through a [`ColdStorage`] handle, which +/// serializes writes internally. /// /// # Error Handling /// -/// Both hot storage and cold storage errors are returned. Cold storage dispatch -/// fails immediately if the channel is full (non-blocking). +/// Both hot storage and cold storage errors are returned. Callers decide +/// whether cold-storage failures should halt execution or be retried via +/// [`replay_to_cold`](Self::replay_to_cold). /// /// # Backpressure and Failure Recovery /// -/// Cold storage dispatch uses non-blocking sends. Two distinct errors indicate -/// dispatch failure: -/// -/// - [`ColdStorageError::Backpressure`]: Channel full. The task is alive but -/// cannot keep up. Transient; may resolve on its own or with retry. -/// - [`ColdStorageError::TaskTerminated`]: Channel closed. The task has stopped -/// and must be restarted. Persistent until recovery. +/// Cold storage writes are serialized by the handle's write semaphore. If the +/// backend is slow, `append_blocks` will block the caller until capacity is +/// available. If the handle is shutting down, writes return +/// [`ColdStorageError::TaskTerminated`]. /// -/// **Important**: Hot storage is always authoritative. When cold dispatch fails: +/// **Important**: Hot storage is always authoritative. When cold writes fail: /// /// 1. Hot storage already contains the committed data -/// 2. Cold storage may be behind (backpressure) or unavailable (task failure) +/// 2. Cold storage may lag or be unavailable /// 3. Use [`cold_lag`](Self::cold_lag) to detect gaps between hot and cold /// 4. Use [`replay_to_cold`](Self::replay_to_cold) to recover cold storage /// -/// Callers should decide based on their requirements and the error type: -/// -/// - **Backpressure**: Log and continue, or retry with backoff. The task will -/// catch up eventually. Use `cold_lag()` to monitor. -/// - **TaskTerminated**: The task must be restarted. Halt or switch to degraded -/// mode until recovery, then replay missing blocks. -/// -/// [`ColdStorageError::Backpressure`]: signet_cold::ColdStorageError::Backpressure /// [`ColdStorageError::TaskTerminated`]: signet_cold::ColdStorageError::TaskTerminated /// /// # Example @@ -80,35 +67,30 @@ pub struct DrainedBlock { /// let storage = UnifiedStorage::new(hot_db, cold_handle); /// /// // Append executed blocks (takes ownership) -/// storage.append_blocks(blocks)?; +/// storage.append_blocks(blocks).await?; /// /// // Handle reorgs -/// storage.unwind_above(reorg_block)?; +/// storage.unwind_above(reorg_block).await?; /// ``` #[derive(Debug)] -pub struct UnifiedStorage { +pub struct UnifiedStorage { hot: H, - cold: ColdStorageHandle, + cold: ColdStorage, } -impl UnifiedStorage { +impl UnifiedStorage { /// Create a new unified storage instance. - pub const fn new(hot: H, cold: ColdStorageHandle) -> Self { + pub const fn new(hot: H, cold: ColdStorage) -> Self { Self { hot, cold } } /// Spawn a unified storage instance from hot and cold backends. /// - /// This spawns the [`ColdStorageTask`] internally and returns a - /// fully-assembled [`UnifiedStorage`]. The cold storage task runs - /// until the cancellation token is triggered or all handles are - /// dropped. - /// - /// Use [`new`](Self::new) instead if you need manual control over - /// the cold storage task lifecycle or need to share the - /// [`ColdStorageHandle`] before constructing unified storage. - pub fn spawn(hot: H, cold_backend: B, cancel_token: CancellationToken) -> Self { - let cold = ColdStorageTask::spawn(cold_backend, cancel_token); + /// Constructs a [`ColdStorage`] wrapping `cold_backend` and returns a + /// fully-assembled [`UnifiedStorage`]. The cold storage handle retains + /// the `cancel_token` for future shutdown coordination. + pub fn spawn(hot: H, cold_backend: B, cancel_token: CancellationToken) -> Self { + let cold = ColdStorage::new(cold_backend, cancel_token); Self::new(hot, cold) } @@ -123,16 +105,16 @@ impl UnifiedStorage { } /// Get a reference to the cold storage handle. - pub const fn cold(&self) -> &ColdStorageHandle { + pub const fn cold(&self) -> &ColdStorage { &self.cold } - /// Get a read-only cold storage handle. + /// Get a clone of the cold storage handle. /// - /// The returned handle can only perform read operations and cannot modify - /// storage. Use this for components that only need to query historical data. - pub fn cold_reader(&self) -> ColdStorageReadHandle { - self.cold.reader() + /// Cloning is cheap (reference-counted). Components that only perform + /// read operations should use this instead of borrowing. + pub fn cold_reader(&self) -> ColdStorage { + self.cold.clone() } /// Create a read-only transaction for hot storage. @@ -145,33 +127,12 @@ impl UnifiedStorage { } /// Create a revm-compatible read-only database adapter. - /// - /// The returned [`RevmRead`] implements revm's `Database` and `DatabaseRef` - /// traits, allowing it to be used directly with revm for EVM execution. - /// - /// # Errors - /// - /// Returns an error if the transaction cannot be created. pub fn revm_reader(&self) -> StorageResult> { self.hot.revm_reader().map_err(Into::into) } /// Create a revm-compatible read-only database adapter that reads state /// at a specific block height. - /// - /// The returned [`RevmRead`] uses history and change set tables to - /// reconstruct state as it was at `height`. - /// - /// # Errors - /// - /// - [`NoBlocks`] if the database has no blocks. - /// - [`HeightOutOfRange`] if `height` is outside the - /// stored block range. - /// - [`Inner`] if the transaction cannot be created. - /// - /// [`NoBlocks`]: signet_hot::model::HotKvError::NoBlocks - /// [`HeightOutOfRange`]: signet_hot::model::HotKvError::HeightOutOfRange - /// [`Inner`]: signet_hot::model::HotKvError::Inner pub fn revm_reader_at_height(&self, height: u64) -> StorageResult> { self.hot.revm_reader_at_height(height).map_err(Into::into) } @@ -180,24 +141,18 @@ impl UnifiedStorage { /// /// This method: /// 1. Writes to hot storage synchronously (validates chain extension, updates state) - /// 2. Dispatches writes to cold storage synchronously (non-blocking) + /// 2. Awaits cold storage write via the [`ColdStorage`] handle /// /// # Errors /// /// - [`Hot`]: Hot storage write failed. No data was written. - /// - [`Cold`]: Hot storage succeeded but cold dispatch failed. - /// Check the inner [`ColdStorageError`] variant: - /// - [`Backpressure`]: Task alive but channel full. May retry or continue. - /// - [`TaskTerminated`]: Task stopped. Requires restart. - /// - /// In both cold error cases, data is safely in hot storage and can be - /// recovered later via [`replay_to_cold`](Self::replay_to_cold). + /// - [`Cold`]: Hot storage succeeded but cold write failed. Data is safely + /// in hot storage and can be recovered later via + /// [`replay_to_cold`](Self::replay_to_cold). /// /// [`Hot`]: crate::StorageError::Hot /// [`Cold`]: crate::StorageError::Cold - /// [`Backpressure`]: signet_cold::ColdStorageError::Backpressure - /// [`TaskTerminated`]: signet_cold::ColdStorageError::TaskTerminated - pub fn append_blocks(&self, blocks: Vec) -> StorageResult<()> { + pub async fn append_blocks(&self, blocks: Vec) -> StorageResult<()> { if blocks.is_empty() { return Ok(()); } @@ -205,8 +160,9 @@ impl UnifiedStorage { // 1. Write to hot storage (borrows blocks) self.write_hot(&blocks)?; - // 2. Dispatch to cold storage (consumes blocks) - self.dispatch_cold(blocks) + // 2. Write to cold storage (consumes blocks) + let cold_data: Vec<_> = blocks.into_iter().map(BlockData::from).collect(); + self.cold.append_blocks(cold_data).await.map_err(Into::into) } /// Write blocks to hot storage. @@ -221,14 +177,6 @@ impl UnifiedStorage { Ok(()) } - /// Dispatch blocks to cold storage synchronously (non-blocking). - /// - /// Consumes the blocks to avoid cloning. - fn dispatch_cold(&self, blocks: Vec) -> StorageResult<()> { - let cold_data: Vec<_> = blocks.into_iter().map(BlockData::from).collect(); - self.cold.dispatch_append_blocks(cold_data).map_err(Into::into) - } - /// Read and remove all blocks above the given block number. /// /// This combines reading the about-to-be-removed data with unwinding, @@ -237,9 +185,8 @@ impl UnifiedStorage { /// # Implementation /// /// 1. Reads headers from hot storage (sync) - /// 2. Reads receipts from cold storage (async, best-effort) - /// 3. Unwinds hot storage (sync) - /// 4. Drains cold storage (async, best-effort) + /// 2. Unwinds hot storage (sync) + /// 3. Drains cold storage (async, best-effort) /// /// # Cold Lag /// @@ -250,25 +197,30 @@ impl UnifiedStorage { /// # Errors /// /// - [`Hot`]: Hot storage read or unwind failed. - /// - [`Cold`]: Hot storage unwound but cold truncate dispatch failed. + /// + /// Cold storage failures are silently swallowed: any error from + /// [`ColdStorage::drain_above`] (`TaskTerminated`, `Backend`, + /// `DeadlineExceeded`, …) collapses to "no receipts above" and the + /// returned [`DrainedBlock`]s carry empty receipt vecs. Operators + /// cannot distinguish a real cold failure from "cold has not + /// caught up" through this method. See `ENG-2210` for the + /// follow-up to either propagate non-`TaskTerminated` errors or + /// remove `unified.rs::drain_above` outright. /// /// [`Hot`]: crate::StorageError::Hot - /// [`Cold`]: crate::StorageError::Cold + /// [`ColdStorage::drain_above`]: signet_cold::ColdStorage::drain_above pub async fn drain_above(&self, block: BlockNumber) -> StorageResult> { // 1–2. Read headers and unwind hot storage synchronously. - // Extracted to a sync helper so the `!Send` write transaction - // does not appear in the async state machine. let headers = self.unwind_hot_above(block)?; if headers.is_empty() { return Ok(Vec::new()); } - // 3. Atomically drain cold (best-effort — failure = normal cold lag) + // 3. Drain cold (errors silently swallowed — see method docs + // and ENG-2210 for the propagation follow-up). let cold_receipts = self.cold.drain_above(block).await.unwrap_or_default(); // 4. Assemble drained blocks (zip headers with receipts, default empty). - // Pad cold_receipts to match headers length so zip consumes both - // without cloning. let drained = headers .into_iter() .zip(cold_receipts.into_iter().chain(std::iter::repeat_with(Vec::new))) @@ -298,32 +250,30 @@ impl UnifiedStorage { /// /// This method: /// 1. Unwinds hot storage synchronously (restores previous state) - /// 2. Truncates cold storage synchronously (non-blocking dispatch) + /// 2. Truncates cold storage via the [`ColdStorage`] handle /// /// # Errors /// /// - [`Hot`]: Hot storage unwind failed. State is unchanged. - /// - [`Cold`]: Hot storage unwound but cold truncate dispatch - /// failed. Check the inner [`ColdStorageError`] variant: - /// - [`Backpressure`]: Task alive but channel full. - /// - [`TaskTerminated`]: Task stopped. - /// - /// Cold storage may temporarily contain stale blocks until the truncate is - /// replayed. This is safe: hot storage is authoritative. + /// - [`Cold`]: Hot storage unwound but cold truncate failed. /// /// [`Hot`]: crate::StorageError::Hot /// [`Cold`]: crate::StorageError::Cold - /// [`Backpressure`]: signet_cold::ColdStorageError::Backpressure - /// [`TaskTerminated`]: signet_cold::ColdStorageError::TaskTerminated - pub fn unwind_above(&self, block: BlockNumber) -> StorageResult<()> { - // 1. Unwind hot storage synchronously - let writer = self.hot.writer()?; + pub async fn unwind_above(&self, block: BlockNumber) -> StorageResult<()> { + // 1. Unwind hot storage synchronously (helper scopes the !Send writer) + self.unwind_hot_sync(block)?; + + // 2. Truncate cold storage + self.cold.truncate_above(block).await.map_err(Into::into) + } + /// Unwind hot storage to `block`. Kept sync so the `!Send` write + /// transaction does not leak into any async state machine. + fn unwind_hot_sync(&self, block: BlockNumber) -> StorageResult<()> { + let writer = self.hot.writer()?; writer.unwind_above(block).map_err(|e| e.map_db(|e| e.into_hot_kv_error()))?; writer.raw_commit().map_err(|e| e.into_hot_kv_error())?; - - // 2. Truncate cold storage synchronously (non-blocking dispatch) - self.cold.dispatch_truncate_above(block).map_err(Into::into) + Ok(()) } /// Check how far behind cold storage is compared to hot storage. @@ -365,20 +315,27 @@ impl UnifiedStorage { #[cfg(test)] mod tests { use super::*; + use signet_cold_mdbx::MdbxColdBackend; use signet_hot_mdbx::DatabaseEnv; - /// Compile-time canaries: all async methods on `UnifiedStorage` + /// Compile-time canaries: all async methods on `UnifiedStorage` /// must return `Send` futures, even though MDBX write transactions are /// `!Send`. If a `!Send` type leaks into the async state machine, these /// will fail to compile. fn _assert_send(_: T) {} - fn _drain_above_is_send(s: &UnifiedStorage) { + fn _drain_above_is_send(s: &UnifiedStorage) { _assert_send(s.drain_above(0)); } - fn _cold_lag_is_send(s: &UnifiedStorage) { + fn _cold_lag_is_send(s: &UnifiedStorage) { _assert_send(s.cold_lag()); } - fn _replay_to_cold_is_send(s: &UnifiedStorage) { + fn _replay_to_cold_is_send(s: &UnifiedStorage) { _assert_send(s.replay_to_cold(Vec::new())); } + fn _append_blocks_is_send(s: &UnifiedStorage) { + _assert_send(s.append_blocks(Vec::new())); + } + fn _unwind_above_is_send(s: &UnifiedStorage) { + _assert_send(s.unwind_above(0)); + } } diff --git a/crates/storage/tests/unified.rs b/crates/storage/tests/unified.rs index 002157c..ed238be 100644 --- a/crates/storage/tests/unified.rs +++ b/crates/storage/tests/unified.rs @@ -4,7 +4,7 @@ use alloy::{ consensus::{Header, Sealable, Signed, TxLegacy, transaction::Recovered}, primitives::{Address, B256, Signature, TxKind, U256}, }; -use signet_cold::{ColdStorageTask, HeaderSpecifier, mem::MemColdBackend}; +use signet_cold::{ColdStorage, HeaderSpecifier, mem::MemColdBackend}; use signet_hot::{HistoryRead, HistoryWrite, HotKv, mem::MemKv, model::HotKvWrite}; use signet_storage::UnifiedStorage; use signet_storage_types::{ @@ -79,14 +79,14 @@ fn make_chain_with_txs(count: u64, tx_count: usize) -> Vec { async fn append_and_read_back() { let hot = MemKv::new(); let cancel = CancellationToken::new(); - let cold_handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel.clone()); + let cold_handle = ColdStorage::new(MemColdBackend::new(), cancel.clone()); let storage = UnifiedStorage::new(hot.clone(), cold_handle.clone()); // Append a block let blocks = make_chain(1); let expected_hash = blocks[0].header.hash(); - storage.append_blocks(blocks).unwrap(); + storage.append_blocks(blocks).await.unwrap(); // Verify hot storage has the block let reader = hot.reader().unwrap(); @@ -104,18 +104,18 @@ async fn append_and_read_back() { async fn append_multiple_and_unwind() { let hot = MemKv::new(); let cancel = CancellationToken::new(); - let cold_handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel.clone()); + let cold_handle = ColdStorage::new(MemColdBackend::new(), cancel.clone()); let storage = UnifiedStorage::new(hot.clone(), cold_handle.clone()); // Append blocks 0, 1, 2 - storage.append_blocks(make_chain(3)).unwrap(); + storage.append_blocks(make_chain(3)).await.unwrap(); // Verify tip is at block 2 assert_eq!(hot.reader().unwrap().get_chain_tip().unwrap().unwrap().0, 2); // Unwind above block 0 - storage.unwind_above(0).unwrap(); + storage.unwind_above(0).await.unwrap(); // Hot tip should be block 0 assert_eq!(hot.reader().unwrap().get_chain_tip().unwrap().unwrap().0, 0); @@ -127,12 +127,12 @@ async fn append_multiple_and_unwind() { async fn drain_above_returns_headers_and_receipts() { let hot = MemKv::new(); let cancel = CancellationToken::new(); - let cold_handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel.clone()); + let cold_handle = ColdStorage::new(MemColdBackend::new(), cancel.clone()); let storage = UnifiedStorage::new(hot.clone(), cold_handle); // Append 3 blocks (0, 1, 2) with 2 txs each let blocks = make_chain_with_txs(3, 2); - storage.append_blocks(blocks).unwrap(); + storage.append_blocks(blocks).await.unwrap(); // drain_above(0) — returns blocks 1 and 2 let drained = storage.drain_above(0).await.unwrap(); @@ -153,11 +153,11 @@ async fn drain_above_returns_headers_and_receipts() { async fn drain_above_empty_when_at_tip() { let hot = MemKv::new(); let cancel = CancellationToken::new(); - let cold_handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel.clone()); + let cold_handle = ColdStorage::new(MemColdBackend::new(), cancel.clone()); let storage = UnifiedStorage::new(hot.clone(), cold_handle); // Append 2 blocks (0, 1) - storage.append_blocks(make_chain_with_txs(2, 1)).unwrap(); + storage.append_blocks(make_chain_with_txs(2, 1)).await.unwrap(); // drain_above(1) — nothing above tip let drained = storage.drain_above(1).await.unwrap(); @@ -174,7 +174,7 @@ async fn drain_above_empty_when_at_tip() { async fn drain_above_cold_lag() { let hot = MemKv::new(); let cancel = CancellationToken::new(); - let cold_handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel.clone()); + let cold_handle = ColdStorage::new(MemColdBackend::new(), cancel.clone()); let storage = UnifiedStorage::new(hot.clone(), cold_handle); // Write 3 blocks with txs directly to hot — skips cold entirely