Skip to content

Commit bbb19f6

Browse files
committed
GardenerNodeLifecycleController: add timeout for HA-disabled wait
When a node is terminating and offboarded, the controller waits for HaEnabled=False before allowing pod eviction. Previously it would wait indefinitely; now it times out after HaDisabledTimeout (default 15m), measured from the Offboarded condition's LastTransitionTime, so a stalled HA controller cannot block node termination indefinitely.
1 parent 80f53bb commit bbb19f6

4 files changed

Lines changed: 176 additions & 16 deletions

File tree

charts/openstack-hypervisor-operator/crds/hypervisor-crd.yaml

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,27 @@ spec:
164164
description: MaintenanceReason provides the reason for manual maintenance
165165
mode.
166166
type: string
167+
overcommit:
168+
additionalProperties:
169+
type: number
170+
description: |-
171+
Overcommit specifies the desired overcommit ratio by resource type.
172+
173+
If no overcommit is specified for a resource type, the default overcommit
174+
ratio of 1.0 should be applied, i.e. the effective capacity is the same
175+
as the actual capacity.
176+
177+
If the overcommit ratio results in a fractional effective capacity,
178+
the effective capacity is expected to be rounded down. This allows
179+
gradually adjusting the hypervisor capacity.
180+
181+
It is validated that all overcommit ratios are greater than or equal to
182+
1.0, if specified. For this we don't need extra validating webhooks.
183+
See: https://kubernetes.io/blog/2022/09/23/crd-validation-rules-beta/#crd-transition-rules
184+
type: object
185+
x-kubernetes-validations:
186+
- message: overcommit ratios must be >= 1.0
187+
rule: self.all(k, self[k] >= 1.0)
167188
reboot:
168189
default: false
169190
description: Reboot request an reboot after successful installation
@@ -258,7 +279,13 @@ spec:
258279
- type: string
259280
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
260281
x-kubernetes-int-or-string: true
261-
description: Auto-discovered capacity of the hypervisor.
282+
description: |-
283+
Auto-discovered capacity of the hypervisor.
284+
285+
Note that this capacity does not include the applied overcommit ratios,
286+
and represents the actual capacity of the hypervisor. Use the
287+
effective capacity field to get the capacity considering the applied
288+
overcommit ratios.
262289
type: object
263290
cells:
264291
description: Auto-discovered cells on this hypervisor.
@@ -282,12 +309,35 @@ spec:
282309
- type: string
283310
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
284311
x-kubernetes-int-or-string: true
285-
description: Auto-discovered capacity of this cell.
312+
description: |-
313+
Auto-discovered capacity of this cell.
314+
315+
Note that this capacity does not include the applied overcommit ratios,
316+
and represents the actual capacity of the cell. Use the effective capacity
317+
field to get the capacity considering the applied overcommit ratios.
286318
type: object
287319
cellID:
288320
description: Cell ID.
289321
format: int64
290322
type: integer
323+
effectiveCapacity:
324+
additionalProperties:
325+
anyOf:
326+
- type: integer
327+
- type: string
328+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
329+
x-kubernetes-int-or-string: true
330+
description: |-
331+
Auto-discovered capacity of this cell, considering the
332+
applied overcommit ratios.
333+
334+
In case no overcommit ratio is specified for a resource type, the default
335+
overcommit ratio of 1 should be applied, meaning the effective capacity
336+
is the same as the actual capacity.
337+
338+
If the overcommit ratio results in a fractional effective capacity, the
339+
effective capacity is expected to be rounded down.
340+
type: object
291341
required:
292342
- cellID
293343
type: object
@@ -415,6 +465,24 @@ spec:
415465
type: string
416466
type: array
417467
type: object
468+
effectiveCapacity:
469+
additionalProperties:
470+
anyOf:
471+
- type: integer
472+
- type: string
473+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
474+
x-kubernetes-int-or-string: true
475+
description: |-
476+
Auto-discovered capacity of the hypervisor, considering the
477+
applied overcommit ratios.
478+
479+
In case no overcommit ratio is specified for a resource type, the default
480+
overcommit ratio of 1 should be applied, meaning the effective capacity
481+
is the same as the actual capacity.
482+
483+
If the overcommit ratio results in a fractional effective capacity, the
484+
effective capacity is expected to be rounded down.
485+
type: object
418486
evicted:
419487
description: Evicted indicates whether the hypervisor is evicted.
420488
(no instances left with active maintenance mode)

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ require (
104104
k8s.io/component-base v0.35.0 // indirect
105105
k8s.io/klog/v2 v2.130.1 // indirect
106106
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
107-
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
107+
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
108108
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect
109109
sigs.k8s.io/gateway-api v1.4.0 // indirect
110110
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect

internal/controller/gardener_node_lifecycle_controller.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"context"
2222
"fmt"
2323
"maps"
24+
"time"
2425

2526
appsv1 "k8s.io/api/apps/v1"
2627
corev1 "k8s.io/api/core/v1"
@@ -32,6 +33,7 @@ import (
3233
corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
3334
v1 "k8s.io/client-go/applyconfigurations/meta/v1"
3435
policyv1ac "k8s.io/client-go/applyconfigurations/policy/v1"
36+
"k8s.io/utils/clock"
3537
ctrl "sigs.k8s.io/controller-runtime"
3638
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
3739
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
@@ -40,10 +42,19 @@ import (
4042
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
4143
)
4244

45+
const defaultHaDisabledTimeout = 15 * time.Minute
46+
4347
type GardenerNodeLifecycleController struct {
4448
k8sclient.Client
4549
Scheme *runtime.Scheme
4650
namespace string
51+
// HaDisabledTimeout is the maximum time to wait for ConditionTypeHaEnabled to
52+
// become False after the node is offboarded, measured from the lastTransitionTime
53+
// of the ConditionTypeOffboarded condition. After the deadline, Unknown and unset
54+
// are also accepted. Defaults to defaultHaDisabledTimeout when zero.
55+
HaDisabledTimeout time.Duration
56+
// Clock is used to determine the current time. Defaults to clock.RealClock{}.
57+
Clock clock.Clock
4758
}
4859

4960
const (
@@ -84,18 +95,34 @@ func (r *GardenerNodeLifecycleController) Reconcile(ctx context.Context, req ctr
8495
// We do not care about the particular value, as long as it isn't an error
8596
var minAvailable int32 = 1
8697

87-
// Onboarding is not in progress anymore, i.e. the host is onboarded
98+
// Onboarding is not in progress, i.e. the host is onboarded
8899
onboardingCompleted := meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding)
89-
// Evicting is not in progress anymore, i.e. the host is empty
100+
// Evicting is not in progress, i.e. the host is empty
90101
offboarded := meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded)
91102

92103
if offboarded {
93104
minAvailable = 0
94105

95106
if onboardingCompleted && isTerminating(node) {
96-
// Wait for HypervisorInstanceHa controller to disable HA
107+
// Wait for HypervisorInstanceHa controller to disable HA.
108+
// After the deadline (measured from the lastTransitionTime of the
109+
// Offboarded condition) we also accept Unknown and unset, so that a
110+
// stalled HA controller cannot block node termination indefinitely.
97111
if !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeHaEnabled) {
98-
return ctrl.Result{}, nil // Will be reconciled again when condition changes
112+
offboardedCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded)
113+
timeout := r.HaDisabledTimeout
114+
if timeout == 0 {
115+
timeout = defaultHaDisabledTimeout
116+
}
117+
now := r.Clock.Now()
118+
offboardedAt := offboardedCondition.LastTransitionTime.Time
119+
deadline := offboardedAt.Add(timeout)
120+
if now.Before(deadline) {
121+
return ctrl.Result{RequeueAfter: deadline.Sub(now)}, nil
122+
}
123+
log.Info("HA disabled timeout exceeded, proceeding without waiting for HaEnabled=False",
124+
"timeout", timeout,
125+
"offboardedAt", offboardedAt)
99126
}
100127
}
101128
}
@@ -221,7 +248,9 @@ func (r *GardenerNodeLifecycleController) SetupWithManager(mgr ctrl.Manager, nam
221248
ctx := context.Background()
222249
_ = logger.FromContext(ctx)
223250
r.namespace = namespace
224-
251+
if r.Clock == nil {
252+
r.Clock = clock.RealClock{}
253+
}
225254
return ctrl.NewControllerManagedBy(mgr).
226255
Named(MaintenanceControllerName).
227256
For(&corev1.Node{}).

internal/controller/gardener_node_lifecycle_controller_test.go

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controller
1919

2020
import (
2121
"fmt"
22+
"time"
2223

2324
. "github.com/onsi/ginkgo/v2"
2425
. "github.com/onsi/gomega"
@@ -28,6 +29,7 @@ import (
2829
"k8s.io/apimachinery/pkg/api/meta"
2930
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3031
"k8s.io/apimachinery/pkg/types"
32+
"k8s.io/utils/clock"
3133
ctrl "sigs.k8s.io/controller-runtime"
3234
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
3335

@@ -47,6 +49,7 @@ var _ = Describe("Gardener Maintenance Controller", func() {
4749
controller = &GardenerNodeLifecycleController{
4850
Client: k8sClient,
4951
Scheme: k8sClient.Scheme(),
52+
Clock: clock.RealClock{},
5053
}
5154

5255
By("creating the core resource for the Kind Node")
@@ -166,18 +169,21 @@ var _ = Describe("Gardener Maintenance Controller", func() {
166169

167170
Context("When node is terminating and offboarded", func() {
168171
BeforeEach(func(ctx SpecContext) {
169-
// Set node as terminating and add required labels for disableInstanceHA
172+
// Set node labels (spec/metadata update)
170173
node := &corev1.Node{}
171174
Expect(k8sClient.Get(ctx, name, node)).To(Succeed())
172175
node.Labels = map[string]string{
173176
corev1.LabelHostname: nodeName,
174177
"topology.kubernetes.io/zone": "test-zone",
175178
}
179+
Expect(k8sClient.Update(ctx, node)).To(Succeed())
180+
// Set node Terminating condition separately via the status subresource,
181+
// using a fresh Get to avoid the spec Update overwriting the status.
182+
Expect(k8sClient.Get(ctx, name, node)).To(Succeed())
176183
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
177184
Type: "Terminating",
178185
Status: corev1.ConditionTrue,
179186
})
180-
Expect(k8sClient.Update(ctx, node)).To(Succeed())
181187
Expect(k8sClient.Status().Update(ctx, node)).To(Succeed())
182188

183189
// Set hypervisor as onboarded and offboarded
@@ -198,13 +204,70 @@ var _ = Describe("Gardener Maintenance Controller", func() {
198204
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
199205
})
200206

201-
It("should allow pod eviction by setting the PDB to minAvailable 0", func(ctx SpecContext) {
202-
_, err := controller.Reconcile(ctx, reconcileReq)
203-
Expect(err).NotTo(HaveOccurred())
207+
When("HaEnabled is explicitly False", func() {
208+
BeforeEach(func(ctx SpecContext) {
209+
hypervisor := &kvmv1.Hypervisor{}
210+
Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed())
211+
meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
212+
Type: kvmv1.ConditionTypeHaEnabled,
213+
Status: metav1.ConditionFalse,
214+
Reason: "Evicted",
215+
Message: "HA disabled due to eviction",
216+
})
217+
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
218+
})
204219

205-
pdb := &policyv1.PodDisruptionBudget{}
206-
Expect(k8sClient.Get(ctx, maintenanceName, pdb)).To(Succeed())
207-
Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", int32(0))))
220+
It("should allow pod eviction immediately by setting the PDB to minAvailable 0", func(ctx SpecContext) {
221+
result, err := controller.Reconcile(ctx, reconcileReq)
222+
Expect(err).NotTo(HaveOccurred())
223+
Expect(result.RequeueAfter).To(BeZero())
224+
225+
pdb := &policyv1.PodDisruptionBudget{}
226+
Expect(k8sClient.Get(ctx, maintenanceName, pdb)).To(Succeed())
227+
Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", int32(0))))
228+
})
229+
})
230+
231+
When("HaEnabled is not yet False and the timeout has not elapsed", func() {
232+
BeforeEach(func() {
233+
// LastTransitionTime ≈ now (set by meta.SetStatusCondition above),
234+
// so deadline = now + 1h is in the future.
235+
controller.HaDisabledTimeout = time.Hour
236+
})
237+
238+
It("should requeue and not proceed", func(ctx SpecContext) {
239+
result, err := controller.Reconcile(ctx, reconcileReq)
240+
Expect(err).NotTo(HaveOccurred())
241+
// Should requeue before the deadline rather than returning immediately.
242+
Expect(result.RequeueAfter).To(BeNumerically(">", 0))
243+
})
244+
})
245+
246+
When("HaEnabled is not False but the timeout has elapsed", func() {
247+
BeforeEach(func(ctx SpecContext) {
248+
// Push LastTransitionTime 2h into the past so that
249+
// deadline = (now - 2h) + 1h = now - 1h, which has already elapsed.
250+
hypervisor := &kvmv1.Hypervisor{}
251+
Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed())
252+
for i := range hypervisor.Status.Conditions {
253+
if hypervisor.Status.Conditions[i].Type == kvmv1.ConditionTypeOffboarded {
254+
hypervisor.Status.Conditions[i].LastTransitionTime = metav1.NewTime(time.Now().Add(-2 * time.Hour))
255+
break
256+
}
257+
}
258+
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
259+
controller.HaDisabledTimeout = time.Hour
260+
})
261+
262+
It("should allow pod eviction by setting the PDB to minAvailable 0", func(ctx SpecContext) {
263+
result, err := controller.Reconcile(ctx, reconcileReq)
264+
Expect(err).NotTo(HaveOccurred())
265+
Expect(result.RequeueAfter).To(BeZero())
266+
267+
pdb := &policyv1.PodDisruptionBudget{}
268+
Expect(k8sClient.Get(ctx, maintenanceName, pdb)).To(Succeed())
269+
Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", int32(0))))
270+
})
208271
})
209272
})
210273
})

0 commit comments

Comments
 (0)