From ac70626715d619ab49be7b10e4ad9e0b5888ea80 Mon Sep 17 00:00:00 2001 From: Aleksander <170264518+t-aleksander@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:26:03 +0200 Subject: [PATCH 1/2] Retry service location auto-connect on startup --- .../enterprise/service_locations/windows.rs | 2 +- src-tauri/src/service/windows.rs | 58 ++++++++++++++----- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src-tauri/src/enterprise/service_locations/windows.rs b/src-tauri/src/enterprise/service_locations/windows.rs index f40c1044..17a442c2 100644 --- a/src-tauri/src/enterprise/service_locations/windows.rs +++ b/src-tauri/src/enterprise/service_locations/windows.rs @@ -725,7 +725,7 @@ impl ServiceLocationManager { if let Err(err) = self.setup_service_location_interface(&location, &instance_data.private_key) { - debug!( + warn!( "Failed to setup service location interface for '{}': {err:?}", location.name ); diff --git a/src-tauri/src/service/windows.rs b/src-tauri/src/service/windows.rs index c72a176a..d0a3647b 100644 --- a/src-tauri/src/service/windows.rs +++ b/src-tauri/src/service/windows.rs @@ -32,6 +32,8 @@ use crate::{ static SERVICE_NAME: &str = "DefguardService"; const SERVICE_TYPE: ServiceType = ServiceType::OWN_PROCESS; const LOGIN_LOGOFF_MONITORING_RESTART_DELAY_SECS: Duration = Duration::from_secs(5); +const SERVICE_LOCATION_CONNECT_RETRY_COUNT: u32 = 5; +const SERVICE_LOCATION_CONNECT_RETRY_DELAY_SECS: u64 = 30; pub fn run() -> Result<(), windows_service::Error> { // Register generated `ffi_service_main` with the system and start the service, blocking @@ -112,25 +114,51 @@ fn run_service() -> Result<(), DaemonError> { let service_location_manager = Arc::new(RwLock::new(service_location_manager)); - // Spawn service location management task - let service_location_manager_clone = service_location_manager.clone(); + // Spawn service location auto-connect task with retries. + // Each attempt skips locations that are already connected, so it is safe to call + // connect_to_service_locations repeatedly. The retry loop exists to handle the case + // where the connection may fail initially at startup because the network + // (e.g. Wi-Fi) is not yet available (mainly DNS resolution issues). + let service_location_manager_connect = service_location_manager.clone(); runtime.spawn(async move { - let manager = service_location_manager_clone; - - info!("Starting service location management task"); - - info!("Attempting to auto-connect to service locations"); - match manager.write().unwrap().connect_to_service_locations() { - Ok(()) => { - info!("Auto-connect to service locations completed successfully"); + for attempt in 1..=SERVICE_LOCATION_CONNECT_RETRY_COUNT { + info!( + "Attempting to auto-connect to service locations \ + (attempt {attempt}/{SERVICE_LOCATION_CONNECT_RETRY_COUNT})" + ); + match service_location_manager_connect + .write() + .unwrap() + .connect_to_service_locations() + { + Ok(()) => { + info!( + "Auto-connect attempt {attempt}/{SERVICE_LOCATION_CONNECT_RETRY_COUNT} \ + completed" + ); + } + Err(err) => { + warn!( + "Auto-connect attempt {attempt}/{SERVICE_LOCATION_CONNECT_RETRY_COUNT} \ + failed: {err}" + ); + } } - Err(err) => { - warn!( - "Error while trying to auto-connect to service locations: {err}. \ - Will continue monitoring for login/logoff events.", - ); + + if attempt < SERVICE_LOCATION_CONNECT_RETRY_COUNT { + tokio::time::sleep(Duration::from_secs( + SERVICE_LOCATION_CONNECT_RETRY_DELAY_SECS, + )) + .await; } } + info!("Service location auto-connect task finished"); + }); + + // Spawn login/logoff monitoring task, runs concurrently with the auto-connect task above. + let service_location_manager_clone = service_location_manager.clone(); + runtime.spawn(async move { + let manager = service_location_manager_clone; info!("Starting login/logoff event monitoring"); loop { From 48cdb9de1fb3a641457c0412cab571162c3bf814 Mon Sep 17 00:00:00 2001 From: Aleksander <170264518+t-aleksander@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:38:56 +0200 Subject: [PATCH 2/2] listen for address change --- src-tauri/Cargo.toml | 3 ++ .../enterprise/service_locations/windows.rs | 54 +++++++++++++++++++ src-tauri/src/service/windows.rs | 39 ++++++++++++-- 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 080ef5c1..456abc86 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -160,6 +160,9 @@ windows-sys = { version = "0.61", features = [ # HANDLE & file functions "Win32_System_IO", "Win32_System_Threading", + + # Network address change notifications (NotifyAddrChange) + "Win32_NetworkManagement_IpHelper", ] } [features] diff --git a/src-tauri/src/enterprise/service_locations/windows.rs b/src-tauri/src/enterprise/service_locations/windows.rs index 17a442c2..3bdd63a3 100644 --- a/src-tauri/src/enterprise/service_locations/windows.rs +++ b/src-tauri/src/enterprise/service_locations/windows.rs @@ -9,6 +9,8 @@ use std::{ time::Duration, }; +use windows_sys::Win32::NetworkManagement::IpHelper::NotifyAddrChange; + use common::{dns_borrow, find_free_tcp_port, get_interface_name}; use defguard_wireguard_rs::{ key::Key, net::IpAddrMask, peer::Peer, InterfaceConfiguration, WireguardInterfaceApi, @@ -36,10 +38,62 @@ use crate::{ }; const LOGIN_LOGOFF_EVENT_RETRY_DELAY_SECS: u64 = 5; +// How long to wait after a network change before attempting to connect. +// Gives DHCP time to complete and DNS to become available. +const NETWORK_STABILIZATION_DELAY_SECS: u64 = 3; +// How long to wait before restarting the network change watcher on error. +const NETWORK_CHANGE_MONITOR_RESTART_DELAY_SECS: u64 = 5; const DEFAULT_WIREGUARD_PORT: u16 = 51820; const DEFGUARD_DIR: &str = "Defguard"; const SERVICE_LOCATIONS_SUBDIR: &str = "service_locations"; +/// Watches for IP address changes on any network interface and attempts to connect to any +/// service locations that are not yet connected. This handles the case where the endpoint +/// hostname cannot be resolved at service startup because the network (e.g. Wi-Fi) is not +/// yet available. When the network comes up and an IP is assigned, this watcher fires and +/// retries the connection. +/// +/// Note: `NotifyAddrChange` also fires when WireGuard interfaces are created. This is +/// harmless because `connect_to_service_locations` skips already-connected locations. +pub(crate) async fn watch_for_network_change( + service_location_manager: Arc>, +) -> Result<(), ServiceLocationError> { + loop { + // NotifyAddrChange blocks until any IP address is added or removed on any interface. + // Passing NULL for both handle and overlapped selects the synchronous (blocking) mode. + let result = unsafe { NotifyAddrChange(std::ptr::null_mut(), std::ptr::null()) }; + + if result != 0 { + error!("NotifyAddrChange failed with error code: {result}"); + tokio::time::sleep(Duration::from_secs( + NETWORK_CHANGE_MONITOR_RESTART_DELAY_SECS, + )) + .await; + continue; + } + + debug!( + "Network address change detected, waiting {NETWORK_STABILIZATION_DELAY_SECS}s for \ + network to stabilize before attempting service location connections..." + ); + tokio::time::sleep(Duration::from_secs(NETWORK_STABILIZATION_DELAY_SECS)).await; + + debug!("Attempting to connect to service locations after network change"); + match service_location_manager + .write() + .unwrap() + .connect_to_service_locations() + { + Ok(()) => { + debug!("Service location connect attempt after network change completed"); + } + Err(err) => { + warn!("Failed to connect to service locations after network change: {err}"); + } + } + } +} + pub(crate) async fn watch_for_login_logoff( service_location_manager: Arc>, ) -> Result<(), ServiceLocationError> { diff --git a/src-tauri/src/service/windows.rs b/src-tauri/src/service/windows.rs index d0a3647b..ec4b4730 100644 --- a/src-tauri/src/service/windows.rs +++ b/src-tauri/src/service/windows.rs @@ -20,7 +20,8 @@ use windows_service::{ use crate::{ enterprise::service_locations::{ - windows::watch_for_login_logoff, ServiceLocationError, ServiceLocationManager, + windows::{watch_for_login_logoff, watch_for_network_change}, + ServiceLocationError, ServiceLocationManager, }, service::{ config::Config, @@ -32,6 +33,7 @@ use crate::{ static SERVICE_NAME: &str = "DefguardService"; const SERVICE_TYPE: ServiceType = ServiceType::OWN_PROCESS; const LOGIN_LOGOFF_MONITORING_RESTART_DELAY_SECS: Duration = Duration::from_secs(5); +const NETWORK_CHANGE_MONITORING_RESTART_DELAY_SECS: Duration = Duration::from_secs(5); const SERVICE_LOCATION_CONNECT_RETRY_COUNT: u32 = 5; const SERVICE_LOCATION_CONNECT_RETRY_DELAY_SECS: u64 = 30; @@ -114,11 +116,42 @@ fn run_service() -> Result<(), DaemonError> { let service_location_manager = Arc::new(RwLock::new(service_location_manager)); + // Spawn network change monitoring task first so NotifyAddrChange is registered as early + // as possible, minimising the window in which a network event could be missed before + // the watcher is listening. The retry task below is the backstop for any event that + // still slips through that window. + let service_location_manager_clone = service_location_manager.clone(); + runtime.spawn(async move { + let manager = service_location_manager_clone; + + info!("Starting network change monitoring"); + loop { + match watch_for_network_change(manager.clone()).await { + Ok(()) => { + warn!( + "Network change monitoring ended unexpectedly. Restarting in \ + {NETWORK_CHANGE_MONITORING_RESTART_DELAY_SECS:?}..." + ); + tokio::time::sleep(NETWORK_CHANGE_MONITORING_RESTART_DELAY_SECS).await; + } + Err(e) => { + error!( + "Error in network change monitoring: {e}. Restarting in \ + {NETWORK_CHANGE_MONITORING_RESTART_DELAY_SECS:?}...", + ); + tokio::time::sleep(NETWORK_CHANGE_MONITORING_RESTART_DELAY_SECS).await; + info!("Restarting network change monitoring"); + } + } + } + }); + // Spawn service location auto-connect task with retries. // Each attempt skips locations that are already connected, so it is safe to call // connect_to_service_locations repeatedly. The retry loop exists to handle the case // where the connection may fail initially at startup because the network - // (e.g. Wi-Fi) is not yet available (mainly DNS resolution issues). + // (e.g. Wi-Fi) is not yet available (mainly DNS resolution issues), and serves as + // a backstop for any network events missed by the watcher above. let service_location_manager_connect = service_location_manager.clone(); runtime.spawn(async move { for attempt in 1..=SERVICE_LOCATION_CONNECT_RETRY_COUNT { @@ -155,7 +188,7 @@ fn run_service() -> Result<(), DaemonError> { info!("Service location auto-connect task finished"); }); - // Spawn login/logoff monitoring task, runs concurrently with the auto-connect task above. + // Spawn login/logoff monitoring task, runs concurrently with the tasks above. let service_location_manager_clone = service_location_manager.clone(); runtime.spawn(async move { let manager = service_location_manager_clone;