From a38efe8875285f8a10516d8f1c9cfe3c35a9c5df Mon Sep 17 00:00:00 2001 From: Rhys Parry Date: Wed, 29 Apr 2026 04:48:39 +0000 Subject: [PATCH 1/2] Fix ConnectionErrorRetryTimeout to act as a hard deadline during connection attempts Previously the timeout was only checked at the start of each retry loop iteration, so a blocked SSL handshake could exceed it by up to one full per-attempt timeout. Now a linked CancellationToken fires at the deadline, interrupting AcquireConnectionAsync mid-attempt. Deadline cancellations are surfaced as HalibutClientException rather than OperationCanceledException. Includes a regression test using a paused port forwarder to demonstrate the before/after: the overall wait is now bounded by ConnectionErrorRetryTimeout (~3s) rather than the per-attempt stream timeout (~8s). Co-Authored-By: Claude Sonnet 4.6 --- .../ListeningConnectRetryFixture.cs | 44 +++++++++++++++++++ .../Transport/SecureListeningClient.cs | 28 +++++++++--- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/source/Halibut.Tests/ListeningConnectRetryFixture.cs b/source/Halibut.Tests/ListeningConnectRetryFixture.cs index 4a9a4e9fc..76a34b607 100644 --- a/source/Halibut.Tests/ListeningConnectRetryFixture.cs +++ b/source/Halibut.Tests/ListeningConnectRetryFixture.cs @@ -2,6 +2,7 @@ using System.Diagnostics; using System.Threading.Tasks; using FluentAssertions; +using Halibut.Diagnostics; using Halibut.Logging; using Halibut.Tests.Support; using Halibut.Tests.Support.PortForwarding; @@ -121,6 +122,49 @@ public async Task ListeningRetryListeningSleepIntervalWorks(ClientAndServiceTest } } + [Test] + [LatestClientAndLatestServiceTestCases(testNetworkConditions: false, testWebSocket: false, testPolling: false)] + public async Task ConnectionErrorRetryTimeout_IsAHardDeadlineEvenWhenIndividualAttemptsBlockLonger(ClientAndServiceTestCase clientAndServiceTestCase) + { + // Each attempt hangs for perAttemptTimeout while the SSL handshake waits for data + // that the paused port forwarder will never deliver. + // ConnectionErrorRetryTimeout is shorter and should act as the hard deadline. + var perAttemptTimeout = TimeSpan.FromSeconds(8); + var connectionErrorRetryTimeout = TimeSpan.FromSeconds(3); + + var halibutTimeoutsAndLimits = new HalibutTimeoutsAndLimitsForTestsBuilder().Build(); + halibutTimeoutsAndLimits.TcpClientTimeout = new SendReceiveTimeout( + sendTimeout: perAttemptTimeout, + receiveTimeout: perAttemptTimeout); + halibutTimeoutsAndLimits.TcpClientConnectTimeout = TimeSpan.FromSeconds(60); + halibutTimeoutsAndLimits.ConnectionErrorRetryTimeout = connectionErrorRetryTimeout; + halibutTimeoutsAndLimits.RetryCountLimit = int.MaxValue; + + await using var clientAndService = await clientAndServiceTestCase.CreateTestCaseBuilder() + .As() + .WithHalibutTimeoutsAndLimits(halibutTimeoutsAndLimits) + .WithPortForwarding(out var portForwarder) + .WithEchoService() + .WithHalibutLoggingLevel(LogLevel.Fatal) + .Build(CancellationToken); + + // Pause mode: TCP connects immediately but data is frozen, so the SSL handshake + // stalls until the stream read timeout (perAttemptTimeout) fires. + portForwarder.Value.EnterPauseNewAndExistingConnectionsMode(); + + var echoService = clientAndService.CreateAsyncClient(); + + var sw = Stopwatch.StartNew(); + await AssertException.Throws(() => echoService.SayHelloAsync("hello")); + sw.Stop(); + + // Should bail out around connectionErrorRetryTimeout (3s), not perAttemptTimeout (8s). + // Currently FAILS: the retry loop only checks ConnectionErrorRetryTimeout between + // attempts, so the first attempt runs to completion at ~8s before the loop can exit. + sw.Elapsed.Should().BeLessThan(perAttemptTimeout - TimeSpan.FromSeconds(2), + because: $"ConnectionErrorRetryTimeout ({connectionErrorRetryTimeout}) should bound the total wait, not the per-attempt timeout ({perAttemptTimeout})"); + } + [Test] [LatestClientAndLatestServiceTestCases(testNetworkConditions: false, testWebSocket: false, testPolling: false)] public async Task ListeningRetriesAttemptsCanEventuallyWork(ClientAndServiceTestCase clientAndServiceTestCase) diff --git a/source/Halibut/Transport/SecureListeningClient.cs b/source/Halibut/Transport/SecureListeningClient.cs index 36db06a37..efb37ec7e 100644 --- a/source/Halibut/Transport/SecureListeningClient.cs +++ b/source/Halibut/Transport/SecureListeningClient.cs @@ -47,10 +47,17 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C // retryAllowed is also used to indicate if the error occurred before or after the connection was made var retryAllowed = true; - // It is important to know if an exception happens while connecting or while transmitting a request, + // It is important to know if an exception happens while connecting or while transmitting a request, // as it determines what we do in Tentacle (for example, if we are cancelling). var hasConnected = false; var watch = Stopwatch.StartNew(); + + // Arm a deadline token so individual connection attempts can't outlive ConnectionErrorRetryTimeout. + // Guard against TimeSpan.MaxValue and other huge values that would overflow CancelAfter's internal timer. + using var deadlineCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + if (ServiceEndpoint.ConnectionErrorRetryTimeout < TimeSpan.FromMilliseconds(uint.MaxValue - 1)) + deadlineCts.CancelAfter(ServiceEndpoint.ConnectionErrorRetryTimeout); + for (var i = 0; i < ServiceEndpoint.RetryCountLimit && retryAllowed && watch.Elapsed < ServiceEndpoint.ConnectionErrorRetryTimeout; i++) { if (i > 0) @@ -80,7 +87,7 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C tcpConnectionFactory, ServiceEndpoint, log, - cancellationToken).ConfigureAwait(false); + deadlineCts.Token).ConfigureAwait(false); } catch (Exception ex) when (cancellationToken.IsCancellationRequested) { @@ -175,9 +182,20 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C } catch (OperationCanceledException ex) { - log.WriteException(EventType.Diagnostic, "The operation was canceled", ex); - lastError = ex; - retryAllowed = false; + if (cancellationToken.IsCancellationRequested) + { + log.WriteException(EventType.Diagnostic, "The operation was canceled", ex); + lastError = ex; + retryAllowed = false; + } + else + { + // The ConnectionErrorRetryTimeout deadline fired. Exit the retry loop; HandleError will + // surface this as HalibutClientException rather than propagating OperationCanceledException. + log.Write(EventType.Error, $"The connection to {ServiceEndpoint.Format()} was abandoned because the ConnectionErrorRetryTimeout was reached."); + lastError = new TimeoutException("The connection attempt was abandoned because the ConnectionErrorRetryTimeout was reached.", ex); + break; + } } catch (Exception ex) { From 9c471c93bfbecd7554a1322a9637de6b319eddb6 Mon Sep 17 00:00:00 2001 From: Rhys Parry Date: Wed, 29 Apr 2026 23:04:46 +0000 Subject: [PATCH 2/2] Fix net48 SSL handshake ignoring cancellation token during connection deadline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AuthenticateAsClientAsync on .NET 4.8 does not accept a CancellationToken, so the ConnectionErrorRetryTimeout deadline had no effect during the TLS handshake — the attempt ran until the socket ReceiveTimeout instead. Register a callback on the cancellation token that closes the underlying TcpClient, which unblocks AuthenticateAsClientAsync immediately. The resulting exception is re-thrown as OperationCanceledException so the existing handling in SecureListeningClient routes it correctly. Co-Authored-By: Claude Sonnet 4.6 --- .../Halibut/Transport/TcpConnectionFactory.cs | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/source/Halibut/Transport/TcpConnectionFactory.cs b/source/Halibut/Transport/TcpConnectionFactory.cs index b61e190e5..480717c34 100644 --- a/source/Halibut/Transport/TcpConnectionFactory.cs +++ b/source/Halibut/Transport/TcpConnectionFactory.cs @@ -56,13 +56,24 @@ public async Task EstablishNewConnectionAsync(ExchangeProtocolBuild log.Write(EventType.SecurityNegotiation, "Performing TLS handshake"); #if NETFRAMEWORK - // TODO: ASYNC ME UP! - // AuthenticateAsClientAsync in .NET 4.8 does not support cancellation tokens. So `cancellationToken` is not respected here. - await ssl.AuthenticateAsClientAsync( - serviceEndpoint.BaseUri.Host, - new X509Certificate2Collection(clientCertificate), - sslConfigurationProvider.SupportedProtocols, - false); + // AuthenticateAsClientAsync in .NET 4.8 does not accept a CancellationToken. + // Register a callback that closes the socket when the token fires; closing the + // underlying client unblocks the handshake so cancellation is honoured. + using (cancellationToken.Register(() => client.Close())) + { + try + { + await ssl.AuthenticateAsClientAsync( + serviceEndpoint.BaseUri.Host, + new X509Certificate2Collection(clientCertificate), + sslConfigurationProvider.SupportedProtocols, + false); + } + catch (Exception) when (cancellationToken.IsCancellationRequested) + { + throw new OperationCanceledException(cancellationToken); + } + } #else await ssl.AuthenticateAsClientEnforcingTimeout(serviceEndpoint, new X509Certificate2Collection(clientCertificate), sslConfigurationProvider, cancellationToken); #endif