|
| 1 | +package noncebalancerv2 |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "fmt" |
| 6 | + |
| 7 | + "google.golang.org/grpc/balancer" |
| 8 | + "google.golang.org/grpc/balancer/base" |
| 9 | + "google.golang.org/grpc/connectivity" |
| 10 | + "google.golang.org/grpc/grpclog" |
| 11 | + "google.golang.org/grpc/resolver" |
| 12 | +) |
| 13 | + |
| 14 | +var logger = grpclog.Component("noncebalancerv2") |
| 15 | + |
| 16 | +// nonceBalancer implements balancer.Balancer. It is a near-exact copy of |
| 17 | +// grpc/balancer/base/balancer.go's baseBalancer with one difference: |
| 18 | +// regeneratePicker passes ALL resolver-tracked SubConns to the picker, not just |
| 19 | +// READY ones. This allows the picker to distinguish "backend is temporarily |
| 20 | +// reconnecting" (queue the RPC via ErrNoSubConnAvailable) from "prefix is |
| 21 | +// genuinely unknown" (fail with ErrNoBackendsMatchPrefix). |
| 22 | +type nonceBalancer struct { |
| 23 | + cc balancer.ClientConn |
| 24 | + |
| 25 | + csEvltr *balancer.ConnectivityStateEvaluator |
| 26 | + state connectivity.State |
| 27 | + |
| 28 | + subConns *resolver.AddressMapV2[balancer.SubConn] |
| 29 | + scStates map[balancer.SubConn]connectivity.State |
| 30 | + picker balancer.Picker |
| 31 | + config base.Config |
| 32 | + |
| 33 | + resolverErr error // the last error reported by the resolver; cleared on successful resolution |
| 34 | + connErr error // the last connection error; cleared upon leaving TransientFailure |
| 35 | +} |
| 36 | + |
| 37 | +func (b *nonceBalancer) ResolverError(err error) { |
| 38 | + b.resolverErr = err |
| 39 | + if b.subConns.Len() == 0 { |
| 40 | + b.state = connectivity.TransientFailure |
| 41 | + } |
| 42 | + |
| 43 | + if b.state != connectivity.TransientFailure { |
| 44 | + // The picker will not change since the balancer does not currently |
| 45 | + // report an error. |
| 46 | + return |
| 47 | + } |
| 48 | + b.regeneratePicker() |
| 49 | + b.cc.UpdateState(balancer.State{ |
| 50 | + ConnectivityState: b.state, |
| 51 | + Picker: b.picker, |
| 52 | + }) |
| 53 | +} |
| 54 | + |
| 55 | +func (b *nonceBalancer) UpdateClientConnState(s balancer.ClientConnState) error { |
| 56 | + // TODO: handle s.ResolverState.ServiceConfig? |
| 57 | + if logger.V(2) { |
| 58 | + logger.Info("noncebalancer: got new ClientConn state: ", s) |
| 59 | + } |
| 60 | + // Successful resolution; clear resolver error and ensure we return nil. |
| 61 | + b.resolverErr = nil |
| 62 | + // addrsSet is the set converted from addrs, it's used for quick lookup of an address. |
| 63 | + addrsSet := resolver.NewAddressMapV2[any]() |
| 64 | + for _, a := range s.ResolverState.Addresses { |
| 65 | + addrsSet.Set(a, nil) |
| 66 | + if _, ok := b.subConns.Get(a); !ok { |
| 67 | + // a is a new address (not existing in b.subConns). |
| 68 | + var sc balancer.SubConn |
| 69 | + opts := balancer.NewSubConnOptions{ |
| 70 | + HealthCheckEnabled: b.config.HealthCheck, |
| 71 | + StateListener: func(scs balancer.SubConnState) { b.updateSubConnState(sc, scs) }, |
| 72 | + } |
| 73 | + sc, err := b.cc.NewSubConn([]resolver.Address{a}, opts) |
| 74 | + if err != nil { |
| 75 | + logger.Warningf("noncebalancer: failed to create new SubConn: %v", err) |
| 76 | + continue |
| 77 | + } |
| 78 | + b.subConns.Set(a, sc) |
| 79 | + b.scStates[sc] = connectivity.Idle |
| 80 | + b.csEvltr.RecordTransition(connectivity.Shutdown, connectivity.Idle) |
| 81 | + sc.Connect() |
| 82 | + } |
| 83 | + } |
| 84 | + for _, a := range b.subConns.Keys() { |
| 85 | + sc, _ := b.subConns.Get(a) |
| 86 | + // a was removed by resolver. |
| 87 | + if _, ok := addrsSet.Get(a); !ok { |
| 88 | + sc.Shutdown() |
| 89 | + b.subConns.Delete(a) |
| 90 | + // Keep the state of this sc in b.scStates until sc's state becomes Shutdown. |
| 91 | + // The entry will be deleted in updateSubConnState. |
| 92 | + } |
| 93 | + } |
| 94 | + // If resolver state contains no addresses, return an error so ClientConn |
| 95 | + // will trigger re-resolve. Also records this as a resolver error, so when |
| 96 | + // the overall state turns transient failure, the error message will have |
| 97 | + // the zero address information. |
| 98 | + if len(s.ResolverState.Addresses) == 0 { |
| 99 | + b.ResolverError(errors.New("produced zero addresses")) |
| 100 | + return balancer.ErrBadResolverState |
| 101 | + } |
| 102 | + |
| 103 | + b.regeneratePicker() |
| 104 | + b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker}) |
| 105 | + return nil |
| 106 | +} |
| 107 | + |
| 108 | +// mergeErrors builds an error from the last connection error and the last |
| 109 | +// resolver error. Must only be called if b.state is TransientFailure. |
| 110 | +func (b *nonceBalancer) mergeErrors() error { |
| 111 | + // connErr must always be non-nil unless there are no SubConns, in which |
| 112 | + // case resolverErr must be non-nil. |
| 113 | + if b.connErr == nil { |
| 114 | + return fmt.Errorf("last resolver error: %v", b.resolverErr) |
| 115 | + } |
| 116 | + if b.resolverErr == nil { |
| 117 | + return fmt.Errorf("last connection error: %v", b.connErr) |
| 118 | + } |
| 119 | + return fmt.Errorf("last connection error: %v; last resolver error: %v", b.connErr, b.resolverErr) |
| 120 | +} |
| 121 | + |
| 122 | +// regeneratePicker takes a snapshot of the balancer, and generates a picker |
| 123 | +// from it. The picker is |
| 124 | +// - errPicker if the balancer is in TransientFailure, |
| 125 | +// - a nonce picker with all READY SubConns and all known SubConns otherwise. |
| 126 | +// |
| 127 | +// This is the only method that differs from baseBalancer: it builds both a |
| 128 | +// READY set and a not-READY set from b.subConns. baseBalancer only builds the |
| 129 | +// READY set. |
| 130 | +func (b *nonceBalancer) regeneratePicker() { |
| 131 | + if b.state == connectivity.TransientFailure { |
| 132 | + b.picker = base.NewErrPicker(b.mergeErrors()) |
| 133 | + return |
| 134 | + } |
| 135 | + readySCs := make(map[balancer.SubConn]resolver.Address) |
| 136 | + notReadySCs := make(map[balancer.SubConn]resolver.Address) |
| 137 | + |
| 138 | + for _, addr := range b.subConns.Keys() { |
| 139 | + sc, _ := b.subConns.Get(addr) |
| 140 | + if st, ok := b.scStates[sc]; ok && st == connectivity.Ready { |
| 141 | + readySCs[sc] = addr |
| 142 | + } else { |
| 143 | + notReadySCs[sc] = addr |
| 144 | + } |
| 145 | + } |
| 146 | + b.picker = &picker{ |
| 147 | + readyBackends: readySCs, |
| 148 | + notReadyBackends: notReadySCs, |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +// UpdateSubConnState is a nop because a StateListener is always set in NewSubConn. |
| 153 | +func (b *nonceBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { |
| 154 | + logger.Errorf("noncebalancer: UpdateSubConnState(%v, %+v) called unexpectedly", sc, state) |
| 155 | +} |
| 156 | + |
| 157 | +func (b *nonceBalancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { |
| 158 | + s := state.ConnectivityState |
| 159 | + if logger.V(2) { |
| 160 | + logger.Infof("noncebalancer: handle SubConn state change: %p, %v", sc, s) |
| 161 | + } |
| 162 | + oldS, ok := b.scStates[sc] |
| 163 | + if !ok { |
| 164 | + if logger.V(2) { |
| 165 | + logger.Infof("noncebalancer: got state changes for an unknown SubConn: %p, %v", sc, s) |
| 166 | + } |
| 167 | + return |
| 168 | + } |
| 169 | + if oldS == connectivity.TransientFailure && |
| 170 | + (s == connectivity.Connecting || s == connectivity.Idle) { |
| 171 | + // Once a subconn enters TRANSIENT_FAILURE, ignore subsequent IDLE or |
| 172 | + // CONNECTING transitions to prevent the aggregated state from being |
| 173 | + // always CONNECTING when many backends exist but are all down. |
| 174 | + if s == connectivity.Idle { |
| 175 | + sc.Connect() |
| 176 | + } |
| 177 | + return |
| 178 | + } |
| 179 | + b.scStates[sc] = s |
| 180 | + switch s { |
| 181 | + case connectivity.Idle: |
| 182 | + sc.Connect() |
| 183 | + case connectivity.Shutdown: |
| 184 | + // When an address was removed by resolver, b called Shutdown but kept |
| 185 | + // the sc's state in scStates. Remove state for this sc here. |
| 186 | + delete(b.scStates, sc) |
| 187 | + case connectivity.TransientFailure: |
| 188 | + // Save error to be reported via picker. |
| 189 | + b.connErr = state.ConnectionError |
| 190 | + } |
| 191 | + |
| 192 | + b.state = b.csEvltr.RecordTransition(oldS, s) |
| 193 | + |
| 194 | + // Regenerate picker when one of the following happens: |
| 195 | + // - this sc entered or left ready |
| 196 | + // - the aggregated state of balancer is TransientFailure |
| 197 | + // (may need to update error message) |
| 198 | + if (s == connectivity.Ready) != (oldS == connectivity.Ready) || |
| 199 | + b.state == connectivity.TransientFailure { |
| 200 | + b.regeneratePicker() |
| 201 | + } |
| 202 | + b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker}) |
| 203 | +} |
| 204 | + |
| 205 | +// Close is a nop because base balancer doesn't have internal state to clean up, |
| 206 | +// and it doesn't need to call Shutdown for the SubConns. |
| 207 | +func (b *nonceBalancer) Close() { |
| 208 | +} |
| 209 | + |
| 210 | +// ExitIdle is a nop because the base balancer attempts to stay connected to |
| 211 | +// all SubConns at all times. |
| 212 | +func (b *nonceBalancer) ExitIdle() { |
| 213 | +} |
0 commit comments