Skip to content

Commit 2cc41a6

Browse files
committed
Properly set Available and Progressing conditions
The Pod Identity Webhook controller will now report Available: False when there are no pods available on the deployment, and Progressing: True when not all pods match the latest deployment spec (motivating factor: when the image on the deployment spec has been updated)
1 parent af37b11 commit 2cc41a6

14 files changed

Lines changed: 527 additions & 84 deletions

pkg/operator/cleanup/status.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package cleanup
22

33
import (
4+
"context"
5+
46
log "github.com/sirupsen/logrus"
57

68
configv1 "github.com/openshift/api/config/v1"
@@ -9,7 +11,7 @@ import (
911

1012
var _ status.Handler = &ReconcileStaleCredentialsRequest{}
1113

12-
func (r *ReconcileStaleCredentialsRequest) GetConditions(logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
14+
func (r *ReconcileStaleCredentialsRequest) GetConditions(ctx context.Context, logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
1315
return []configv1.ClusterOperatorStatusCondition{}, nil
1416
}
1517

pkg/operator/credentialsrequest/credentialsrequest_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,9 @@ type ReconcileSecretMissingLabel struct {
493493
mutatingClient corev1client.SecretsGetter
494494
}
495495

496-
func (r *ReconcileSecretMissingLabel) GetConditions(logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
496+
func (r *ReconcileSecretMissingLabel) GetConditions(ctx context.Context, logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
497497
var secrets corev1.SecretList
498-
if err := r.cachedClient.List(context.TODO(), &secrets); err != nil {
498+
if err := r.cachedClient.List(ctx, &secrets); err != nil {
499499
return nil, err
500500
}
501501
var missing int

pkg/operator/credentialsrequest/credentialsrequest_controller_azure_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func TestCredentialsRequestAzureReconcile(t *testing.T) {
253253

254254
if test.expectedCOConditions != nil {
255255
logger := log.WithFields(log.Fields{"controller": controllerName})
256-
currentConditions, err := rcr.GetConditions(logger)
256+
currentConditions, err := rcr.GetConditions(context.TODO(), logger)
257257
require.NoError(t, err, "failed getting conditions")
258258

259259
for _, expectedCondition := range test.expectedCOConditions {

pkg/operator/credentialsrequest/credentialsrequest_controller_gcp_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ func TestCredentialsRequestGCPReconcile(t *testing.T) {
974974

975975
if test.expectedCOConditions != nil {
976976
logger := log.WithFields(log.Fields{"controller": controllerName})
977-
currentConditions, err := rcr.GetConditions(logger)
977+
currentConditions, err := rcr.GetConditions(context.TODO(), logger)
978978
require.NoError(t, err, "failed getting conditions")
979979

980980
for _, expectedCondition := range test.expectedCOConditions {

pkg/operator/credentialsrequest/credentialsrequest_controller_label_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ func TestReconcileSecretMissingLabel_GetConditions(t *testing.T) {
251251
mutatingClient: nil,
252252
}
253253

254-
conditions, err := rcr.GetConditions(logrus.StandardLogger())
254+
conditions, err := rcr.GetConditions(context.TODO(), logrus.StandardLogger())
255255

256256
if err != nil && !test.expectErr {
257257
require.NoError(t, err, "Unexpected error: %v", err)

pkg/operator/credentialsrequest/credentialsrequest_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1535,7 +1535,7 @@ func TestCredentialsRequestReconcile(t *testing.T) {
15351535

15361536
if test.expectedCOConditions != nil {
15371537
logger := log.WithFields(log.Fields{"controller": controllerName})
1538-
currentConditions, err := rcr.GetConditions(logger)
1538+
currentConditions, err := rcr.GetConditions(context.TODO(), logger)
15391539
require.NoError(t, err, "failed getting conditions")
15401540

15411541
for _, expectedCondition := range test.expectedCOConditions {

pkg/operator/credentialsrequest/credentialsrequest_controller_vsphere_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ func TestCredentialsRequestVSphereReconcile(t *testing.T) {
226226

227227
if test.expectedCOConditions != nil {
228228
logger := log.WithFields(log.Fields{"controller": controllerName})
229-
currentConditions, err := rcr.GetConditions(logger)
229+
currentConditions, err := rcr.GetConditions(context.TODO(), logger)
230230
require.NoError(t, err, "failed getting conditions")
231231

232232
for _, expectedCondition := range test.expectedCOConditions {

pkg/operator/credentialsrequest/status.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ const (
2929

3030
var _ status.Handler = &ReconcileCredentialsRequest{}
3131

32-
func (r *ReconcileCredentialsRequest) GetConditions(logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
33-
_, credRequests, mode, err := r.getOperatorState(logger)
32+
func (r *ReconcileCredentialsRequest) GetConditions(ctx context.Context, logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
33+
_, credRequests, mode, err := r.getOperatorState(ctx, logger)
3434
if err != nil {
3535
return []configv1.ClusterOperatorStatusCondition{}, fmt.Errorf("failed to get operator state: %v", err)
3636
}
@@ -43,7 +43,7 @@ func (r *ReconcileCredentialsRequest) GetConditions(logger log.FieldLogger) ([]c
4343
}
4444

4545
func (r *ReconcileCredentialsRequest) GetRelatedObjects(logger log.FieldLogger) ([]configv1.ObjectReference, error) {
46-
_, credRequests, _, err := r.getOperatorState(logger)
46+
_, credRequests, _, err := r.getOperatorState(context.TODO(), logger)
4747
if err != nil {
4848
return []configv1.ObjectReference{}, fmt.Errorf("failed to get operator state: %v", err)
4949
}
@@ -56,10 +56,10 @@ func (r *ReconcileCredentialsRequest) Name() string {
5656

5757
// getOperatorState gets and returns the resources necessary to compute the
5858
// operator's current state.
59-
func (r *ReconcileCredentialsRequest) getOperatorState(logger log.FieldLogger) (*corev1.Namespace, []minterv1.CredentialsRequest, operatorv1.CloudCredentialsMode, error) {
59+
func (r *ReconcileCredentialsRequest) getOperatorState(ctx context.Context, logger log.FieldLogger) (*corev1.Namespace, []minterv1.CredentialsRequest, operatorv1.CloudCredentialsMode, error) {
6060

6161
ns := &corev1.Namespace{}
62-
if err := r.Client.Get(context.TODO(), types.NamespacedName{Name: minterv1.CloudCredOperatorNamespace}, ns); err != nil {
62+
if err := r.Client.Get(ctx, types.NamespacedName{Name: minterv1.CloudCredOperatorNamespace}, ns); err != nil {
6363
if apierrors.IsNotFound(err) {
6464
return nil, nil, operatorv1.CloudCredentialsModeDefault, nil
6565
}
@@ -72,7 +72,7 @@ func (r *ReconcileCredentialsRequest) getOperatorState(logger log.FieldLogger) (
7272
// central list to live. Other credentials requests in other namespaces will not affect status,
7373
// but they will still work fine.
7474
credRequestList := &minterv1.CredentialsRequestList{}
75-
err := r.Client.List(context.TODO(), credRequestList, client.InNamespace(minterv1.CloudCredOperatorNamespace))
75+
err := r.Client.List(ctx, credRequestList, client.InNamespace(minterv1.CloudCredOperatorNamespace))
7676
if err != nil {
7777
return nil, nil, operatorv1.CloudCredentialsModeDefault, fmt.Errorf(
7878
"failed to list CredentialsRequests: %v", err)

pkg/operator/loglevel/status.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package loglevel
22

33
import (
4+
"context"
5+
46
log "github.com/sirupsen/logrus"
57

68
configv1 "github.com/openshift/api/config/v1"
@@ -9,7 +11,7 @@ import (
911

1012
var _ status.Handler = &ReconcileCloudCredentialConfig{}
1113

12-
func (r *ReconcileCloudCredentialConfig) GetConditions(logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
14+
func (r *ReconcileCloudCredentialConfig) GetConditions(ctx context.Context, logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
1315
return []configv1.ClusterOperatorStatusCondition{}, nil
1416
}
1517

pkg/operator/podidentity/podidentitywebhook_controller.go

Lines changed: 95 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"strings"
7+
"sync"
78
"time"
89

910
log "github.com/sirupsen/logrus"
@@ -37,12 +38,14 @@ import (
3738
)
3839

3940
const (
40-
controllerName = "pod-identity"
41-
deploymentName = "cloud-credential-operator"
42-
operatorNamespace = "openshift-cloud-credential-operator"
43-
retryInterval = 10 * time.Second
44-
reasonStaticResourceReconcileFailed = "StaticResourceReconcileFailed"
45-
pdb = "v4.1.0/common/poddisruptionbudget.yaml"
41+
controllerName = "pod-identity"
42+
deploymentName = "cloud-credential-operator"
43+
operatorNamespace = "openshift-cloud-credential-operator"
44+
retryInterval = 10 * time.Second
45+
reasonStaticResourceReconcileFailed = "StaticResourceReconcileFailed"
46+
reasonPodIdentityWebhookPodsStillUpdating = "PodIdentityWebhookPodsStillUpdating"
47+
reasonPodIdentityWebhookPodsNotAvailable = "PodIdentityWebhookPodsNotAvailable"
48+
pdb = "v4.1.0/common/poddisruptionbudget.yaml"
4649
)
4750

4851
var (
@@ -198,6 +201,7 @@ func Add(mgr, rootCredentialManager manager.Manager, kubeconfig string) error {
198201
logger: logger,
199202
eventRecorder: eventRecorder,
200203
imagePullSpec: imagePullSpec,
204+
conditionsMutex: &sync.RWMutex{},
201205
conditions: []configv1.ClusterOperatorStatusCondition{},
202206
cache: resourceapply.NewResourceCache(),
203207
podIdentityType: podIdentityType,
@@ -282,6 +286,7 @@ type staticResourceReconciler struct {
282286
eventRecorder events.Recorder
283287
deploymentGeneration int64
284288
imagePullSpec string
289+
conditionsMutex *sync.RWMutex
285290
conditions []configv1.ClusterOperatorStatusCondition
286291
cache resourceapply.ResourceCache
287292
podIdentityType PodIdentityManifestSource
@@ -291,6 +296,8 @@ var _ reconcile.Reconciler = &staticResourceReconciler{}
291296

292297
func (r *staticResourceReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
293298
r.logger.Debugf("reconciling after watch event %#v", request)
299+
r.conditionsMutex.Lock()
300+
294301
err := r.ReconcileResources(ctx)
295302
if err != nil {
296303
r.logger.Errorf("reconciliation failed, retrying in %s", retryInterval.String())
@@ -302,9 +309,13 @@ func (r *staticResourceReconciler) Reconcile(ctx context.Context, request reconc
302309
Message: fmt.Sprintf("static resource reconciliation failed: %v", err),
303310
},
304311
}
312+
313+
r.conditionsMutex.Unlock()
305314
return reconcile.Result{RequeueAfter: retryInterval}, err
306315
}
316+
307317
r.conditions = []configv1.ClusterOperatorStatusCondition{}
318+
r.conditionsMutex.Unlock()
308319
return reconcile.Result{}, nil
309320
}
310321

@@ -384,10 +395,86 @@ func (r *staticResourceReconciler) ReconcileResources(ctx context.Context) error
384395
return nil
385396
}
386397

398+
type webhookPodStatus struct {
399+
desiredReplicas int32
400+
availableReplicas int32
401+
updatedReplicas int32
402+
totalReplicas int32
403+
}
404+
405+
func (wps *webhookPodStatus) Available() bool {
406+
return wps.availableReplicas > 0
407+
}
408+
409+
func (wps *webhookPodStatus) Progressing() bool {
410+
return wps.updatedReplicas != wps.desiredReplicas || wps.totalReplicas != wps.desiredReplicas
411+
}
412+
413+
func (r *staticResourceReconciler) CheckPodStatus(ctx context.Context) (*webhookPodStatus, error) {
414+
deployment := &appsv1.Deployment{}
415+
err := r.client.Get(ctx, client.ObjectKey{
416+
Name: "pod-identity-webhook",
417+
Namespace: operatorNamespace,
418+
}, deployment)
419+
if err != nil {
420+
return nil, err
421+
}
422+
423+
desiredReplicas := int32(1) // kubernetes defaults to 1 if not specified
424+
if deployment.Spec.Replicas != nil {
425+
desiredReplicas = *deployment.Spec.Replicas
426+
}
427+
428+
deploymentStatus := &webhookPodStatus{
429+
desiredReplicas: desiredReplicas,
430+
availableReplicas: deployment.Status.AvailableReplicas,
431+
updatedReplicas: deployment.Status.UpdatedReplicas,
432+
totalReplicas: deployment.Status.Replicas,
433+
}
434+
435+
return deploymentStatus, nil
436+
}
437+
387438
var _ status.Handler = &staticResourceReconciler{}
388439

389-
func (r *staticResourceReconciler) GetConditions(logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
390-
return r.conditions, nil
440+
func (r *staticResourceReconciler) GetConditions(ctx context.Context, logger log.FieldLogger) ([]configv1.ClusterOperatorStatusCondition, error) {
441+
r.conditionsMutex.RLock()
442+
conditions := make([]configv1.ClusterOperatorStatusCondition, len(r.conditions))
443+
copy(conditions, r.conditions)
444+
r.conditionsMutex.RUnlock()
445+
446+
podStatus, err := r.CheckPodStatus(ctx)
447+
if err != nil {
448+
logger.WithError(err).Errorf("checking pod identity webhook status failed")
449+
if len(conditions) > 0 {
450+
// Ignore the pod status error for now, we have bigger conditions to report
451+
return conditions, nil
452+
}
453+
return nil, err
454+
}
455+
456+
if !podStatus.Available() {
457+
conditions = append(conditions, configv1.ClusterOperatorStatusCondition{
458+
Type: configv1.OperatorAvailable,
459+
Status: configv1.ConditionFalse,
460+
Reason: reasonPodIdentityWebhookPodsNotAvailable,
461+
Message: "No pod identity webhook pods are available",
462+
})
463+
}
464+
if podStatus.Progressing() {
465+
conditions = append(conditions, configv1.ClusterOperatorStatusCondition{
466+
Type: configv1.OperatorProgressing,
467+
Status: configv1.ConditionTrue,
468+
Reason: reasonPodIdentityWebhookPodsStillUpdating,
469+
Message: fmt.Sprintf(
470+
"Waiting for pod identity webhook deployment rollout to finish: %d out of %d new replica(s) have been updated...",
471+
podStatus.updatedReplicas,
472+
podStatus.desiredReplicas,
473+
),
474+
})
475+
}
476+
477+
return conditions, nil
391478
}
392479

393480
func (r *staticResourceReconciler) GetRelatedObjects(logger log.FieldLogger) ([]configv1.ObjectReference, error) {

0 commit comments

Comments
 (0)