From 49133cdc221aeb1cd9c556dd915370765a97bd02 Mon Sep 17 00:00:00 2001
From: flashboy <jake@monad.foundation>
Date: Wed, 28 Jan 2026 11:28:27 -0500
Subject: [PATCH 1/3] Add restart policy + exit on consecutive health failures

---
 backend/src/lib/server.rs | 32 +++++++++++++++++++++++++++++---
 docker-compose.yml        |  1 +
 2 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/backend/src/lib/server.rs b/backend/src/lib/server.rs
index 8aa9935..a134f8b 100644
--- a/backend/src/lib/server.rs
+++ b/backend/src/lib/server.rs
@@ -29,6 +29,12 @@ use super::serializable_event::SerializableEventData;
 /// Stores the Unix timestamp (in seconds) of the last event received from the ring
 type LastEventTime = Arc<AtomicU64>;
 
+/// Tracks consecutive unhealthy health checks
+type ConsecutiveUnhealthyCount = Arc<AtomicU64>;
+
+/// Number of consecutive unhealthy checks before triggering process exit
+const UNHEALTHY_THRESHOLD: u64 = 3;
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct TopAccessesData {
     pub account: Vec<AccessEntry<Address>>,
@@ -224,7 +230,7 @@ async fn run_event_forwarder_task(
                     .duration_since(UNIX_EPOCH)
                     .unwrap_or_default()
                     .as_secs();
-                last_event_time.store(now_secs, Ordering::Relaxed);
+                // last_event_time.store(now_secs, Ordering::Relaxed);
 
                 // Track txn_hash from TxnHeaderStart events
                 if let EventName::TxnHeaderStart = event_data.event_name {
@@ -362,6 +368,7 @@ async fn handle_connection(
 
 async fn health_handler(
     last_event_time: LastEventTime,
+    consecutive_unhealthy: ConsecutiveUnhealthyCount,
 ) -> Result<Response<Full<Bytes>>, hyper::Error> {
     let now_secs = SystemTime::now()
         .duration_since(UNIX_EPOCH)
@@ -371,10 +378,25 @@ async fn health_handler(
     let is_healthy = now_secs.saturating_sub(last_event) <= 10;
 
     let body = if is_healthy {
+        consecutive_unhealthy.store(0, Ordering::Relaxed);
         info!("Health check passed");
         r#"{"success": true}"#
     } else {
-        warn!("Health check failed - last event time: {} seconds ago", now_secs.saturating_sub(last_event));
+        let count = consecutive_unhealthy.fetch_add(1, Ordering::Relaxed) + 1;
+        warn!(
+            "Health check failed - last event time: {} seconds ago (consecutive failures: {})",
+            now_secs.saturating_sub(last_event),
+            count
+        );
+
+        if count >= UNHEALTHY_THRESHOLD {
+            error!(
+                "Health check failed {} consecutive times, exiting to trigger restart",
+                count
+            );
+            std::process::exit(1);
+        }
+
         r#"{"success": false}"#
     };
 
@@ -392,15 +414,19 @@ async fn run_health_server(
     let listener = tokio::net::TcpListener::bind(health_addr).await?;
     info!("Health server listening on: {}", health_addr);
 
+    let consecutive_unhealthy: ConsecutiveUnhealthyCount = Arc::new(AtomicU64::new(0));
+
     loop {
         let (stream, _) = listener.accept().await?;
         let io = hyper_util::rt::TokioIo::new(stream);
         let last_event_time = last_event_time.clone();
+        let consecutive_unhealthy = consecutive_unhealthy.clone();
 
         tokio::spawn(async move {
             let service = service_fn(move |_req: Request<hyper::body::Incoming>| {
                 let last_event_time = last_event_time.clone();
-                async move { health_handler(last_event_time).await }
+                let consecutive_unhealthy = consecutive_unhealthy.clone();
+                async move { health_handler(last_event_time, consecutive_unhealthy).await }
             });
 
             if let Err(e) = http1::Builder::new().serve_connection(io, service).await {
diff --git a/docker-compose.yml b/docker-compose.yml
index 30565fb..32a424e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,6 +7,7 @@ services:
       dockerfile: Dockerfile
     container_name: backend
     network_mode: host
+    restart: always
     environment:
       - RUST_LOG=info
     volumes:

From 9a63d0732118db422ba735809b9793a7fa93152c Mon Sep 17 00:00:00 2001
From: flashboy <jake@monad.foundation>
Date: Wed, 28 Jan 2026 11:33:59 -0500
Subject: [PATCH 2/3] store last event time

---
 backend/src/lib/server.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/lib/server.rs b/backend/src/lib/server.rs
index a134f8b..cdb35c9 100644
--- a/backend/src/lib/server.rs
+++ b/backend/src/lib/server.rs
@@ -230,7 +230,7 @@ async fn run_event_forwarder_task(
                     .duration_since(UNIX_EPOCH)
                     .unwrap_or_default()
                     .as_secs();
-                // last_event_time.store(now_secs, Ordering::Relaxed);
+                last_event_time.store(now_secs, Ordering::Relaxed);
 
                 // Track txn_hash from TxnHeaderStart events
                 if let EventName::TxnHeaderStart = event_data.event_name {

From 37f7a4abb5850bb6f7db57cb1c86e45c6412772f Mon Sep 17 00:00:00 2001
From: flashboy <jake@monad.foundation>
Date: Wed, 28 Jan 2026 11:39:32 -0500
Subject: [PATCH 3/3] change to 30 second interval instead of 3 failed attempts

---
 backend/src/lib/server.rs | 45 +++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/backend/src/lib/server.rs b/backend/src/lib/server.rs
index cdb35c9..a2c470c 100644
--- a/backend/src/lib/server.rs
+++ b/backend/src/lib/server.rs
@@ -29,11 +29,11 @@ use super::serializable_event::SerializableEventData;
 /// Stores the Unix timestamp (in seconds) of the last event received from the ring
 type LastEventTime = Arc<AtomicU64>;
 
-/// Tracks consecutive unhealthy health checks
-type ConsecutiveUnhealthyCount = Arc<AtomicU64>;
+/// Seconds without events before health check reports unhealthy
+const UNHEALTHY_THRESHOLD_SECS: u64 = 10;
 
-/// Number of consecutive unhealthy checks before triggering process exit
-const UNHEALTHY_THRESHOLD: u64 = 3;
+/// Seconds without events before triggering process exit
+const EXIT_THRESHOLD_SECS: u64 = 30;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct TopAccessesData {
@@ -368,35 +368,34 @@ async fn handle_connection(
 
 async fn health_handler(
     last_event_time: LastEventTime,
-    consecutive_unhealthy: ConsecutiveUnhealthyCount,
 ) -> Result<Response<Full<Bytes>>, hyper::Error> {
     let now_secs = SystemTime::now()
         .duration_since(UNIX_EPOCH)
         .unwrap_or_default()
         .as_secs();
     let last_event = last_event_time.load(Ordering::Relaxed);
-    let is_healthy = now_secs.saturating_sub(last_event) <= 10;
+    let time_since_last_event = now_secs.saturating_sub(last_event);
+
+    // Exit process if no events received for EXIT_THRESHOLD_SECS
+    if time_since_last_event >= EXIT_THRESHOLD_SECS {
+        error!(
+            "No events received for {} seconds (threshold: {}), exiting to trigger restart",
+            time_since_last_event,
+            EXIT_THRESHOLD_SECS
+        );
+        std::process::exit(1);
+    }
+
+    let is_healthy = time_since_last_event <= UNHEALTHY_THRESHOLD_SECS;
 
     let body = if is_healthy {
-        consecutive_unhealthy.store(0, Ordering::Relaxed);
         info!("Health check passed");
         r#"{"success": true}"#
     } else {
-        let count = consecutive_unhealthy.fetch_add(1, Ordering::Relaxed) + 1;
         warn!(
-            "Health check failed - last event time: {} seconds ago (consecutive failures: {})",
-            now_secs.saturating_sub(last_event),
-            count
+            "Health check failed - last event time: {} seconds ago",
+            time_since_last_event
         );
-
-        if count >= UNHEALTHY_THRESHOLD {
-            error!(
-                "Health check failed {} consecutive times, exiting to trigger restart",
-                count
-            );
-            std::process::exit(1);
-        }
-
         r#"{"success": false}"#
     };
 
@@ -414,19 +413,15 @@ async fn run_health_server(
     let listener = tokio::net::TcpListener::bind(health_addr).await?;
     info!("Health server listening on: {}", health_addr);
 
-    let consecutive_unhealthy: ConsecutiveUnhealthyCount = Arc::new(AtomicU64::new(0));
-
     loop {
         let (stream, _) = listener.accept().await?;
         let io = hyper_util::rt::TokioIo::new(stream);
         let last_event_time = last_event_time.clone();
-        let consecutive_unhealthy = consecutive_unhealthy.clone();
 
         tokio::spawn(async move {
             let service = service_fn(move |_req: Request<hyper::body::Incoming>| {
                 let last_event_time = last_event_time.clone();
-                let consecutive_unhealthy = consecutive_unhealthy.clone();
-                async move { health_handler(last_event_time, consecutive_unhealthy).await }
+                async move { health_handler(last_event_time).await }
             });
 
             if let Err(e) = http1::Builder::new().serve_connection(io, service).await {