From 512d4a27b8ec3ffd7ff5e41f4511c63342d4ea48 Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Fri, 6 Mar 2026 10:48:51 -0800 Subject: [PATCH 1/2] core: Don't set firstPass=true on address update in pick_first Updating the addresses has no bearing if pick_first is doing the first pass, for the sake of when refreshNameResolution() is triggered. In the first pass pick_first is in CONNECTING and after the first pass it is in TRANSIENT_FAILURE. If pick_first already did a first pass and is in TF, then when it gets updated addresses it _stays_ in TF and the regular "We will count the number of connection failures, and when that number reaches the number of subchannels, we will request re-resolution" should apply. Setting firstPass=true on each address update can cause us to request refreshes too frequently. So we only need to adjust firstPass when changing the rawConnectivityState. And if we do that, we don't actually need firstPass; we can use numTf instead. --- .../internal/PickFirstLeafLoadBalancer.java | 14 +++++++------- .../PickFirstLeafLoadBalancerTest.java | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java b/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java index cd9ff9ab58c..beed32eadc3 100644 --- a/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java +++ b/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java @@ -69,7 +69,6 @@ final class PickFirstLeafLoadBalancer extends LoadBalancer { private final Map subchannels = new HashMap<>(); private final Index addressIndex = new Index(ImmutableList.of(), this.enableHappyEyeballs); private int numTf = 0; - private boolean firstPass = true; @Nullable private ScheduledHandle scheduleConnectionTask = null; private ConnectivityState rawConnectivityState = IDLE; @@ -120,9 +119,6 @@ public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) { } } - // Since we have a new set of addresses, we are again at first pass - firstPass = true; - List cleanServers = deDupAddresses(servers); // We can optionally be configured to shuffle the address list. This can help better distribute @@ -340,10 +336,15 @@ void processSubchannelState(SubchannelData subchannelData, ConnectivityStateInfo shutdownRemaining(subchannelData); addressIndex.seekTo(getAddress(subchannelData.subchannel)); rawConnectivityState = READY; + // Reset for clarity, but isPassComplete() prevents any counts currently present from being + // read until they point they no longer matter + numTf = 0; updateHealthCheckedState(subchannelData); break; case TRANSIENT_FAILURE: + numTf++; + // If we are looking at current channel, request a connection if possible if (addressIndex.isValid() && subchannels.get(addressIndex.getCurrentAddress()) == subchannelData) { @@ -372,9 +373,8 @@ void processSubchannelState(SubchannelData subchannelData, ConnectivityStateInfo // * have had status reported for all subchannels. // * And one of the following conditions: // * Have had enough TF reported since we completed first pass - // * Just completed the first pass - if (++numTf >= addressIndex.size() || firstPass) { - firstPass = false; + // * Just completed the first pass (in which case enough TF have been reported) + if (numTf >= addressIndex.size()) { numTf = 0; helper.refreshNameResolution(); } diff --git a/core/src/test/java/io/grpc/internal/PickFirstLeafLoadBalancerTest.java b/core/src/test/java/io/grpc/internal/PickFirstLeafLoadBalancerTest.java index cb73d17d682..d94e2b94c74 100644 --- a/core/src/test/java/io/grpc/internal/PickFirstLeafLoadBalancerTest.java +++ b/core/src/test/java/io/grpc/internal/PickFirstLeafLoadBalancerTest.java @@ -1940,6 +1940,7 @@ public void updateAddresses_identical_transient_failure() { loadBalancer.acceptResolvedAddresses( ResolvedAddresses.newBuilder().setAddresses(oldServers).setAttributes(affinity).build()); inOrder.verify(mockHelper).updateBalancingState(eq(CONNECTING), pickerCaptor.capture()); + inOrder.verify(mockHelper).createSubchannel(any()); inOrder.verify(mockSubchannel1).start(stateListenerCaptor.capture()); SubchannelStateListener stateListener = stateListenerCaptor.getValue(); assertEquals(CONNECTING, loadBalancer.getConcludedConnectivityState()); @@ -1949,6 +1950,7 @@ public void updateAddresses_identical_transient_failure() { assertEquals(CONNECTING, loadBalancer.getConcludedConnectivityState()); // Second connection attempt is unsuccessful + inOrder.verify(mockHelper).createSubchannel(any()); inOrder.verify(mockSubchannel2).start(stateListenerCaptor.capture()); assertEquals(CONNECTING, loadBalancer.getConcludedConnectivityState()); SubchannelStateListener stateListener2 = stateListenerCaptor.getValue(); @@ -1961,10 +1963,11 @@ public void updateAddresses_identical_transient_failure() { loadBalancer.acceptResolvedAddresses( ResolvedAddresses.newBuilder().setAddresses(oldServers).setAttributes(affinity).build()); - // Verify that no new subchannels were created or started - verify(mockHelper, times(2)).createSubchannel(createArgsCaptor.capture()); - verify(mockSubchannel1, times(1)).start(stateListenerCaptor.capture()); - verify(mockSubchannel2, times(1)).start(stateListenerCaptor.capture()); + // Verify that no new subchannels were created or started, nor resolver refresh + inOrder.verify(mockHelper, never()).createSubchannel(createArgsCaptor.capture()); + inOrder.verify(mockSubchannel1, never()).start(stateListenerCaptor.capture()); + inOrder.verify(mockSubchannel2, never()).start(stateListenerCaptor.capture()); + inOrder.verify(mockHelper, never()).refreshNameResolution(); assertEquals(TRANSIENT_FAILURE, loadBalancer.getConcludedConnectivityState()); // No new connections are requested, subchannels responsible for completing their own backoff @@ -1972,7 +1975,12 @@ public void updateAddresses_identical_transient_failure() { verify(mockHelper, atLeast(0)).getScheduledExecutorService(); verifyNoMoreInteractions(mockHelper); - // First connection attempt is successful + // Second connection is unsuccessful (e.g., retry after backoff), and there is no name resolver + // refresh + stateListener2.onSubchannelState(ConnectivityStateInfo.forTransientFailure(CONNECTION_ERROR)); + inOrder.verify(mockHelper, never()).refreshNameResolution(); + + // First connection is successful stateListener.onSubchannelState(ConnectivityStateInfo.forNonError(READY)); assertEquals(READY, loadBalancer.getConcludedConnectivityState()); inOrder.verify(mockSubchannel2).shutdown(); From eaa363e5dfbef71080251fc2f6c5906f6b597c08 Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Fri, 6 Mar 2026 15:39:40 -0800 Subject: [PATCH 2/2] Adjust for gRFC A61 update --- .../io/grpc/internal/PickFirstLeafLoadBalancer.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java b/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java index beed32eadc3..a56dadf5593 100644 --- a/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java +++ b/core/src/main/java/io/grpc/internal/PickFirstLeafLoadBalancer.java @@ -294,6 +294,7 @@ void processSubchannelState(SubchannelData subchannelData, ConnectivityStateInfo } if (newState == IDLE && subchannelData.state == READY) { + numTf = 0; helper.refreshNameResolution(); } @@ -336,9 +337,6 @@ void processSubchannelState(SubchannelData subchannelData, ConnectivityStateInfo shutdownRemaining(subchannelData); addressIndex.seekTo(getAddress(subchannelData.subchannel)); rawConnectivityState = READY; - // Reset for clarity, but isPassComplete() prevents any counts currently present from being - // read until they point they no longer matter - numTf = 0; updateHealthCheckedState(subchannelData); break; @@ -370,10 +368,8 @@ void processSubchannelState(SubchannelData subchannelData, ConnectivityStateInfo // Refresh Name Resolution, but only when all 3 conditions are met // * We are at the end of addressIndex - // * have had status reported for all subchannels. - // * And one of the following conditions: - // * Have had enough TF reported since we completed first pass - // * Just completed the first pass (in which case enough TF have been reported) + // * Have had status reported for all subchannels. + // * Have had enough TF reported since the last refresh if (numTf >= addressIndex.size()) { numTf = 0; helper.refreshNameResolution();