diff --git a/source/Halibut.Tests/ListeningConnectRetryFixture.cs b/source/Halibut.Tests/ListeningConnectRetryFixture.cs index 4a9a4e9f..76a34b60 100644 --- a/source/Halibut.Tests/ListeningConnectRetryFixture.cs +++ b/source/Halibut.Tests/ListeningConnectRetryFixture.cs @@ -2,6 +2,7 @@ using System.Diagnostics; using System.Threading.Tasks; using FluentAssertions; +using Halibut.Diagnostics; using Halibut.Logging; using Halibut.Tests.Support; using Halibut.Tests.Support.PortForwarding; @@ -121,6 +122,49 @@ public async Task ListeningRetryListeningSleepIntervalWorks(ClientAndServiceTest } } + [Test] + [LatestClientAndLatestServiceTestCases(testNetworkConditions: false, testWebSocket: false, testPolling: false)] + public async Task ConnectionErrorRetryTimeout_IsAHardDeadlineEvenWhenIndividualAttemptsBlockLonger(ClientAndServiceTestCase clientAndServiceTestCase) + { + // Each attempt hangs for perAttemptTimeout while the SSL handshake waits for data + // that the paused port forwarder will never deliver. + // ConnectionErrorRetryTimeout is shorter and should act as the hard deadline. + var perAttemptTimeout = TimeSpan.FromSeconds(8); + var connectionErrorRetryTimeout = TimeSpan.FromSeconds(3); + + var halibutTimeoutsAndLimits = new HalibutTimeoutsAndLimitsForTestsBuilder().Build(); + halibutTimeoutsAndLimits.TcpClientTimeout = new SendReceiveTimeout( + sendTimeout: perAttemptTimeout, + receiveTimeout: perAttemptTimeout); + halibutTimeoutsAndLimits.TcpClientConnectTimeout = TimeSpan.FromSeconds(60); + halibutTimeoutsAndLimits.ConnectionErrorRetryTimeout = connectionErrorRetryTimeout; + halibutTimeoutsAndLimits.RetryCountLimit = int.MaxValue; + + await using var clientAndService = await clientAndServiceTestCase.CreateTestCaseBuilder() + .As() + .WithHalibutTimeoutsAndLimits(halibutTimeoutsAndLimits) + .WithPortForwarding(out var portForwarder) + .WithEchoService() + .WithHalibutLoggingLevel(LogLevel.Fatal) + .Build(CancellationToken); + + // Pause mode: TCP connects immediately but data is frozen, so the SSL handshake + // stalls until the stream read timeout (perAttemptTimeout) fires. + portForwarder.Value.EnterPauseNewAndExistingConnectionsMode(); + + var echoService = clientAndService.CreateAsyncClient(); + + var sw = Stopwatch.StartNew(); + await AssertException.Throws(() => echoService.SayHelloAsync("hello")); + sw.Stop(); + + // Should bail out around connectionErrorRetryTimeout (3s), not perAttemptTimeout (8s). + // Currently FAILS: the retry loop only checks ConnectionErrorRetryTimeout between + // attempts, so the first attempt runs to completion at ~8s before the loop can exit. + sw.Elapsed.Should().BeLessThan(perAttemptTimeout - TimeSpan.FromSeconds(2), + because: $"ConnectionErrorRetryTimeout ({connectionErrorRetryTimeout}) should bound the total wait, not the per-attempt timeout ({perAttemptTimeout})"); + } + [Test] [LatestClientAndLatestServiceTestCases(testNetworkConditions: false, testWebSocket: false, testPolling: false)] public async Task ListeningRetriesAttemptsCanEventuallyWork(ClientAndServiceTestCase clientAndServiceTestCase) diff --git a/source/Halibut/Transport/SecureListeningClient.cs b/source/Halibut/Transport/SecureListeningClient.cs index 36db06a3..efb37ec7 100644 --- a/source/Halibut/Transport/SecureListeningClient.cs +++ b/source/Halibut/Transport/SecureListeningClient.cs @@ -47,10 +47,17 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C // retryAllowed is also used to indicate if the error occurred before or after the connection was made var retryAllowed = true; - // It is important to know if an exception happens while connecting or while transmitting a request, + // It is important to know if an exception happens while connecting or while transmitting a request, // as it determines what we do in Tentacle (for example, if we are cancelling). var hasConnected = false; var watch = Stopwatch.StartNew(); + + // Arm a deadline token so individual connection attempts can't outlive ConnectionErrorRetryTimeout. + // Guard against TimeSpan.MaxValue and other huge values that would overflow CancelAfter's internal timer. + using var deadlineCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + if (ServiceEndpoint.ConnectionErrorRetryTimeout < TimeSpan.FromMilliseconds(uint.MaxValue - 1)) + deadlineCts.CancelAfter(ServiceEndpoint.ConnectionErrorRetryTimeout); + for (var i = 0; i < ServiceEndpoint.RetryCountLimit && retryAllowed && watch.Elapsed < ServiceEndpoint.ConnectionErrorRetryTimeout; i++) { if (i > 0) @@ -80,7 +87,7 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C tcpConnectionFactory, ServiceEndpoint, log, - cancellationToken).ConfigureAwait(false); + deadlineCts.Token).ConfigureAwait(false); } catch (Exception ex) when (cancellationToken.IsCancellationRequested) { @@ -175,9 +182,20 @@ public async Task ExecuteTransactionAsync(ExchangeActionAsync protocolHandler, C } catch (OperationCanceledException ex) { - log.WriteException(EventType.Diagnostic, "The operation was canceled", ex); - lastError = ex; - retryAllowed = false; + if (cancellationToken.IsCancellationRequested) + { + log.WriteException(EventType.Diagnostic, "The operation was canceled", ex); + lastError = ex; + retryAllowed = false; + } + else + { + // The ConnectionErrorRetryTimeout deadline fired. Exit the retry loop; HandleError will + // surface this as HalibutClientException rather than propagating OperationCanceledException. + log.Write(EventType.Error, $"The connection to {ServiceEndpoint.Format()} was abandoned because the ConnectionErrorRetryTimeout was reached."); + lastError = new TimeoutException("The connection attempt was abandoned because the ConnectionErrorRetryTimeout was reached.", ex); + break; + } } catch (Exception ex) { diff --git a/source/Halibut/Transport/TcpConnectionFactory.cs b/source/Halibut/Transport/TcpConnectionFactory.cs index b61e190e..480717c3 100644 --- a/source/Halibut/Transport/TcpConnectionFactory.cs +++ b/source/Halibut/Transport/TcpConnectionFactory.cs @@ -56,13 +56,24 @@ public async Task EstablishNewConnectionAsync(ExchangeProtocolBuild log.Write(EventType.SecurityNegotiation, "Performing TLS handshake"); #if NETFRAMEWORK - // TODO: ASYNC ME UP! - // AuthenticateAsClientAsync in .NET 4.8 does not support cancellation tokens. So `cancellationToken` is not respected here. - await ssl.AuthenticateAsClientAsync( - serviceEndpoint.BaseUri.Host, - new X509Certificate2Collection(clientCertificate), - sslConfigurationProvider.SupportedProtocols, - false); + // AuthenticateAsClientAsync in .NET 4.8 does not accept a CancellationToken. + // Register a callback that closes the socket when the token fires; closing the + // underlying client unblocks the handshake so cancellation is honoured. + using (cancellationToken.Register(() => client.Close())) + { + try + { + await ssl.AuthenticateAsClientAsync( + serviceEndpoint.BaseUri.Host, + new X509Certificate2Collection(clientCertificate), + sslConfigurationProvider.SupportedProtocols, + false); + } + catch (Exception) when (cancellationToken.IsCancellationRequested) + { + throw new OperationCanceledException(cancellationToken); + } + } #else await ssl.AuthenticateAsClientEnforcingTimeout(serviceEndpoint, new X509Certificate2Collection(clientCertificate), sslConfigurationProvider, cancellationToken); #endif