From 292614314f2719898617ba41be01f59fef883fdc Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 6 Apr 2026 12:47:20 -0700 Subject: [PATCH 1/4] pkg/cvo/cvo_test: Firm up Test_gateApplicableToCurrentVersion checking This code is from 519b466793 (Bug 1978376: Add admin ack Upgradeable condition gate, 2021-07-27, #633). But I'm about to move it, and CodeRabbit complains about the unhandled "wantErr=true, but err==nil" and "expectedResult=true but isApplicable=false" situations. Extend the logic to check for those. Also move from t.Fatal* to t.Error*, because there's no harm in moving on to check the other condition, even if the first condition we check has a problem. Checking both will give the developer more information about what they need to fix to make the test happy, without them needing to retest after fixing one property before they hear about a problem with the other property. [1]: https://github.com/openshift/cluster-version-operator/pull/1368#discussion_r3040738287 --- pkg/cvo/cvo_test.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index f789b9863..5f85b0851 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -3992,14 +3992,13 @@ func Test_gateApplicableToCurrentVersion(t *testing.T) { { t.Run(tt.name, func(t *testing.T) { isApplicable, err := gateApplicableToCurrentVersion(tt.gateName, tt.cv) - if err != nil && !tt.wantErr { - t.Fatalf("gateApplicableToCurrentVersion() unexpected error: %v", err) + if isApplicable != tt.expectedResult { + t.Errorf("gateApplicableToCurrentVersion() %s expected applicable %t, got applicable %t", tt.gateName, tt.expectedResult, isApplicable) } - if err != nil { - return - } - if isApplicable && !tt.expectedResult { - t.Fatalf("gateApplicableToCurrentVersion() %s should not apply", tt.gateName) + if err != nil && !tt.wantErr { + t.Errorf("gateApplicableToCurrentVersion() unexpected error: %v", err) + } else if err == nil && tt.wantErr { + t.Error("gateApplicableToCurrentVersion() unexpected success when an error was expected") } }) } From 20112117d0b3fd1a16e65655a7a7616b1ac596ee Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sun, 5 Apr 2026 12:30:01 -0700 Subject: [PATCH 2/4] pkg: Use risk.Source framework to feed Upgradeable This simplifies the complex cvo package by shifting some logic into small, narrowly scoped helper packages. That makes for easier unit testing of that logic, and less complication when reading the remaining cvo package code. Using the risk.Source framework with the new aggregate implementation allows Upgradeable risks to feed into the conditionalUpdateRisks framework. This should help reduce confusion some users experience when the see Upgradeable=False and think it applies to all updates (when in reality it applies to major and minor bumps, but does not apply to patch updates, especially since 6f8f984547, OTA-1860: Stop blocking patch updates when cluster version overrides are set, 2026-02-10, #1314). It will also allow structured access to individual risks that are reported via this pipeline, while Upgradeable had an aggregated message string only consumable by humans. I'm also removing internal.UpgradeableAdminAckRequired and similar from ClusterVersion status.conditions. While these were visible by end users, they were mostly a mechanism for passing data from one part of the CVO to another. There are no known consumers outside of the CVO. With the refactor providing clearer abstraction layers, I could drop the entire upgradeableCheckIntervals structure. Major touches in the history of that structure were bd05174b43 (pkg/cvo/upgradeable.go: Sync Upgradeable status of the CVO more often, 2022-08-02, #808) and cc94c95cb5 (pkg/cvo/upgradeable: refactor throttling, 2023-01-11, #882). The new approach is event-driven via the informer handlers for almost all of the new risk.Source implementations. The one source that still relies on polling is the deletion source, where it's watching a CVO-side global variable, and not a Kube-side informer. It will still sync whenever ClusterVersion status is updated though, which should be often enough for this usually-transient situation. If folks want lower latency, follow up work could add informer-style handler callbacks to the deletion global. The changeCallback in unit tests is unbuffered, because I want to know exactly how often that callback is being called. If a logic change in the packages causes additional callback calls, the unbuffered channel will block them from sending the message, deadlock the informer for that handler [1], and break later test-cases. [1]: https://github.com/kubernetes/client-go/blob/v0.35.3/tools/cache/shared_informer.go#L1052-L1086 --- pkg/cvo/cvo.go | 83 +- pkg/cvo/cvo_scenarios_test.go | 5 + pkg/cvo/cvo_test.go | 1257 +--------------------- pkg/cvo/status.go | 88 +- pkg/cvo/status_test.go | 7 +- pkg/cvo/upgradeable.go | 542 ---------- pkg/cvo/upgradeable_test.go | 151 --- pkg/risk/adminack/adminack.go | 202 ++++ pkg/risk/adminack/adminack_test.go | 460 ++++++++ pkg/risk/aggregate/aggregate.go | 80 ++ pkg/risk/aggregate/aggregate_test.go | 133 +++ pkg/risk/deletion/deletion.go | 76 ++ pkg/risk/overrides/overrides.go | 113 ++ pkg/risk/overrides/overrides_test.go | 169 +++ pkg/risk/risk.go | 2 +- pkg/risk/risk_test.go | 2 +- pkg/risk/updating/updating.go | 111 ++ pkg/risk/updating/updating_test.go | 190 ++++ pkg/risk/upgradeable/upgradeable.go | 136 +++ pkg/risk/upgradeable/upgradeable_test.go | 194 ++++ pkg/risk/upgradeable/util.go | 24 + 21 files changed, 2022 insertions(+), 2003 deletions(-) delete mode 100644 pkg/cvo/upgradeable.go delete mode 100644 pkg/cvo/upgradeable_test.go create mode 100644 pkg/risk/adminack/adminack.go create mode 100644 pkg/risk/adminack/adminack_test.go create mode 100644 pkg/risk/aggregate/aggregate.go create mode 100644 pkg/risk/aggregate/aggregate_test.go create mode 100644 pkg/risk/deletion/deletion.go create mode 100644 pkg/risk/overrides/overrides.go create mode 100644 pkg/risk/overrides/overrides_test.go create mode 100644 pkg/risk/updating/updating.go create mode 100644 pkg/risk/updating/updating_test.go create mode 100644 pkg/risk/upgradeable/upgradeable.go create mode 100644 pkg/risk/upgradeable/upgradeable_test.go create mode 100644 pkg/risk/upgradeable/util.go diff --git a/pkg/cvo/cvo.go b/pkg/cvo/cvo.go index e9eb1d3f3..b2a1abb1d 100644 --- a/pkg/cvo/cvo.go +++ b/pkg/cvo/cvo.go @@ -54,7 +54,13 @@ import ( "github.com/openshift/cluster-version-operator/pkg/payload/precondition" preconditioncv "github.com/openshift/cluster-version-operator/pkg/payload/precondition/clusterversion" "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/adminack" + "github.com/openshift/cluster-version-operator/pkg/risk/aggregate" "github.com/openshift/cluster-version-operator/pkg/risk/alert" + deletionrisk "github.com/openshift/cluster-version-operator/pkg/risk/deletion" + "github.com/openshift/cluster-version-operator/pkg/risk/overrides" + updatingrisk "github.com/openshift/cluster-version-operator/pkg/risk/updating" + upgradeablerisk "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" ) const ( @@ -130,21 +136,13 @@ type Operator struct { queue workqueue.TypedRateLimitingInterface[any] // availableUpdatesQueue tracks checking for updates from the update server. availableUpdatesQueue workqueue.TypedRateLimitingInterface[any] - // upgradeableQueue tracks checking for upgradeable. - upgradeableQueue workqueue.TypedRateLimitingInterface[any] // statusLock guards access to modifying available updates statusLock sync.Mutex availableUpdates *availableUpdates - // upgradeableStatusLock guards access to modifying Upgradeable conditions - upgradeableStatusLock sync.Mutex - upgradeable *upgradeable - upgradeableChecks []upgradeableCheck - - // upgradeableCheckIntervals drives minimal intervals between Upgradeable status - // synchronization - upgradeableCheckIntervals upgradeableCheckIntervals + // upgradeable tracks the risks that feed the ClusterVersion Upgradeable condition. + upgradeable risk.Source // conditionRegistry is used to evaluate whether a particular condition is risky or not. conditionRegistry clusterconditions.ConditionRegistry @@ -257,7 +255,6 @@ func New( statusInterval: 15 * time.Second, minimumUpdateCheckInterval: minimumInterval, - upgradeableCheckIntervals: defaultUpgradeableCheckIntervals(), payloadDir: overridePayloadDir, updateService: updateService, @@ -267,13 +264,11 @@ func New( eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: namespace}), queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "clusterversion"}), availableUpdatesQueue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "availableupdates"}), - upgradeableQueue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "upgradeable"}), hypershift: hypershift, exclude: exclude, clusterProfile: clusterProfile, conditionRegistry: standard.NewConditionRegistry(promqlTarget), - risks: alert.New("Alert", promqlTarget), injectClusterIdIntoPromQL: injectClusterIdIntoPromQL, requiredFeatureSet: featureSet, @@ -286,12 +281,6 @@ func New( if _, err := cvInformer.Informer().AddEventHandler(optr.clusterVersionEventHandler()); err != nil { return nil, err } - if _, err := cmConfigInformer.Informer().AddEventHandler(optr.adminAcksEventHandler()); err != nil { - return nil, err - } - if _, err := cmConfigManagedInformer.Informer().AddEventHandler(optr.adminGatesEventHandler()); err != nil { - return nil, err - } if _, err := coInformer.Informer().AddEventHandler(optr.clusterOperatorEventHandler()); err != nil { return nil, err } @@ -313,7 +302,19 @@ func New( optr.cacheSynced = append(optr.cacheSynced, featureGateInformer.Informer().HasSynced) // make sure this is initialized after all the listers are initialized - optr.upgradeableChecks = optr.defaultUpgradeableChecks() + riskSourceCallback := func() { optr.availableUpdatesQueue.Add(optr.queueKey()) } + optr.upgradeable = aggregate.New( + updatingrisk.New("ClusterVersionUpdating", optr.name, cvInformer, riskSourceCallback), + overrides.New("ClusterVersionOverrides", optr.name, cvInformer, riskSourceCallback), + deletionrisk.New("ResourceDeletionInProgress", optr.currentVersion), + adminack.New("AdminAck", optr.currentVersion, cmConfigManagedInformer, cmConfigInformer, riskSourceCallback), + upgradeablerisk.New("ClusterOperatorUpgradeable", optr.currentVersion, coInformer, riskSourceCallback), + ) + + optr.risks = aggregate.New( + alert.New("Alert", promqlTarget), + optr.upgradeable, + ) optr.configuration = configuration.NewClusterVersionOperatorConfiguration(operatorClient, operatorInformerFactory) @@ -468,7 +469,6 @@ func loadConfigMapVerifierDataFromUpdate(update *payload.Update, clientBuilder s func (optr *Operator) Run(runContext context.Context, shutdownContext context.Context) error { defer optr.queue.ShutDown() defer optr.availableUpdatesQueue.ShutDown() - defer optr.upgradeableQueue.ShutDown() defer optr.configuration.Queue().ShutDown() stopCh := runContext.Done() @@ -525,25 +525,12 @@ func (optr *Operator) Run(runContext context.Context, shutdownContext context.Co klog.Infof("The ClusterVersionOperatorConfiguration feature gate is disabled or HyperShift is detected; the configuration sync routine will not run.") } - resultChannelCount++ - go func() { - defer utilruntime.HandleCrash() - wait.UntilWithContext(runContext, func(runContext context.Context) { - optr.worker(runContext, optr.upgradeableQueue, optr.upgradeableSyncFunc(false)) - }, time.Second) - resultChannel <- asyncResult{name: "upgradeable"} - }() - resultChannelCount++ go func() { defer utilruntime.HandleCrash() wait.UntilWithContext(runContext, func(runContext context.Context) { // run the worker, then when the queue is closed sync one final time to flush any pending status optr.worker(runContext, optr.queue, func(runContext context.Context, key string) error { return optr.sync(runContext, key) }) - // This is to ensure upgradeableCondition to be synced and thus to avoid the race caused by the throttle - if err := optr.upgradeableSyncFunc(true)(shutdownContext, optr.queueKey()); err != nil { - utilruntime.HandleError(fmt.Errorf("unable to perform final upgradeable sync: %v", err)) - } if err := optr.sync(shutdownContext, optr.queueKey()); err != nil { utilruntime.HandleError(fmt.Errorf("unable to perform final sync: %v", err)) } @@ -593,7 +580,6 @@ func (optr *Operator) Run(runContext context.Context, shutdownContext context.Co shutdown = true optr.queue.ShutDown() optr.availableUpdatesQueue.ShutDown() - optr.upgradeableQueue.ShutDown() optr.configuration.Queue().ShutDown() } } @@ -613,12 +599,10 @@ func (optr *Operator) clusterVersionEventHandler() cache.ResourceEventHandler { AddFunc: func(_ interface{}) { optr.queue.Add(workQueueKey) optr.availableUpdatesQueue.Add(workQueueKey) - optr.upgradeableQueue.Add(workQueueKey) }, UpdateFunc: func(_, _ interface{}) { optr.queue.Add(workQueueKey) optr.availableUpdatesQueue.Add(workQueueKey) - optr.upgradeableQueue.Add(workQueueKey) }, DeleteFunc: func(_ interface{}) { optr.queue.Add(workQueueKey) @@ -829,31 +813,6 @@ func (optr *Operator) availableUpdatesSync(ctx context.Context, key string) erro return optr.syncAvailableUpdates(ctx, config) } -// upgradeableSyncFunc returns a function that is triggered on cluster version change (and periodic requeues) to -// sync upgradeableCondition. It only modifies cluster version. -func (optr *Operator) upgradeableSyncFunc(ignoreThrottlePeriod bool) func(_ context.Context, key string) error { - return func(_ context.Context, key string) error { - startTime := time.Now() - klog.V(2).Infof("Started syncing upgradeable %q", key) - defer func() { - klog.V(2).Infof("Finished syncing upgradeable %q (%v)", key, time.Since(startTime)) - }() - - config, err := optr.cvLister.Get(optr.name) - if apierrors.IsNotFound(err) { - return nil - } - if err != nil { - return err - } - if errs := validation.ValidateClusterVersion(config, optr.shouldReconcileAcceptRisks()); len(errs) > 0 { - return nil - } - - return optr.syncUpgradeable(config, ignoreThrottlePeriod) - } -} - // isOlderThanLastUpdate returns true if the cluster version is older than // the last update we saw. func (optr *Operator) isOlderThanLastUpdate(config *configv1.ClusterVersion) bool { diff --git a/pkg/cvo/cvo_scenarios_test.go b/pkg/cvo/cvo_scenarios_test.go index e7c0c543c..a6dd01b89 100644 --- a/pkg/cvo/cvo_scenarios_test.go +++ b/pkg/cvo/cvo_scenarios_test.go @@ -29,6 +29,8 @@ import ( "github.com/openshift/client-go/config/clientset/versioned/fake" "github.com/openshift/library-go/pkg/manifest" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" "github.com/openshift/cluster-version-operator/pkg/featuregates" "github.com/openshift/cluster-version-operator/pkg/internal" "github.com/openshift/cluster-version-operator/pkg/payload" @@ -114,6 +116,8 @@ func setupCVOTest(payloadDir string) (*Operator, map[string]apiruntime.Object, * fmt.Printf("Cannot create cluster version object, err: %#v\n", err) } + registry := clusterconditions.NewConditionRegistry() + registry.Register("Always", &always.Always{}) o := &Operator{ namespace: "test", name: "version", @@ -121,6 +125,7 @@ func setupCVOTest(payloadDir string) (*Operator, map[string]apiruntime.Object, * client: client, enabledCVOFeatureGates: featuregates.DefaultCvoGates("version"), cvLister: &clientCVLister{client: client}, + conditionRegistry: registry, exclude: "exclude-test", eventRecorder: record.NewFakeRecorder(100), clusterProfile: payload.DefaultClusterProfile, diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index 5f85b0851..0da8eaa56 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -26,12 +26,8 @@ import ( apiruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/informers" kfake "k8s.io/client-go/kubernetes/fake" ktesting "k8s.io/client-go/testing" - "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" @@ -43,9 +39,12 @@ import ( "github.com/openshift/library-go/pkg/verify/store/serial" "github.com/openshift/library-go/pkg/verify/store/sigstore" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" "github.com/openshift/cluster-version-operator/pkg/featuregates" "github.com/openshift/cluster-version-operator/pkg/internal" "github.com/openshift/cluster-version-operator/pkg/payload" + mockrisk "github.com/openshift/cluster-version-operator/pkg/risk/mock" ) var ( @@ -1347,11 +1346,11 @@ func TestOperator_sync(t *testing.T) { }, namespace: "test", name: "default", - upgradeable: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{ - {Type: "Upgradeable", Status: configv1.ConditionFalse}, - {Type: "UpgradeableA", Status: configv1.ConditionFalse}, - {Type: "UpgradeableB", Status: configv1.ConditionFalse}, + upgradeable: &mockrisk.Mock{ + InternalName: "Mock", + InternalRisks: []configv1.ConditionalUpdateRisk{ + {Name: "RiskA", Message: "MessageA", URL: "https://example.com/a", MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}}, + {Name: "RiskB", Message: "MessageB", URL: "https://example.com/b", MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}}, }, }, client: fakeClientsetWithUpdates(&configv1.ClusterVersion{ @@ -1405,9 +1404,7 @@ func TestOperator_sync(t *testing.T) { {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: internal.ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, - {Type: "Upgradeable", Status: configv1.ConditionFalse}, - {Type: "UpgradeableA", Status: configv1.ConditionFalse}, - {Type: "UpgradeableB", Status: configv1.ConditionFalse}, + {Type: "Upgradeable", Status: configv1.ConditionFalse, Reason: "MultipleReasons", Message: "Cluster should not be upgraded between minor or major versions for multiple reasons:\n* MessageA\n* MessageB"}, }, }, }) @@ -1427,10 +1424,11 @@ func TestOperator_sync(t *testing.T) { }, namespace: "test", name: "default", - upgradeable: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{ - {Type: "Upgradeable", Status: configv1.ConditionFalse}, - {Type: "UpgradeableB", Status: configv1.ConditionFalse}, + upgradeable: &mockrisk.Mock{ + InternalName: "Mock", + InternalRisks: []configv1.ConditionalUpdateRisk{ + {Name: "RiskA", Message: "MessageA", URL: "https://example.com/a", MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}}, + {Name: "RiskB", Message: "MessageB", URL: "https://example.com/b", MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}}, }, }, client: fakeClientsetWithUpdates(&configv1.ClusterVersion{ @@ -1451,7 +1449,6 @@ func TestOperator_sync(t *testing.T) { {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: internal.ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, - {Type: "UpgradeableA", Status: configv1.ConditionFalse}, }, }, }), @@ -1485,8 +1482,7 @@ func TestOperator_sync(t *testing.T) { {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: internal.ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, - {Type: "Upgradeable", Status: configv1.ConditionFalse}, - {Type: "UpgradeableB", Status: configv1.ConditionFalse}, + {Type: "Upgradeable", Status: configv1.ConditionFalse, Reason: "MultipleReasons", Message: "Cluster should not be upgraded between minor or major versions for multiple reasons:\n* MessageA\n* MessageB"}, }, }, }) @@ -1506,9 +1502,6 @@ func TestOperator_sync(t *testing.T) { }, namespace: "test", name: "default", - upgradeable: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{}, - }, client: fakeClientsetWithUpdates(&configv1.ClusterVersion{ ObjectMeta: metav1.ObjectMeta{ Name: "default", @@ -1527,9 +1520,6 @@ func TestOperator_sync(t *testing.T) { {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: internal.ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, - {Type: "Upgradeable", Status: configv1.ConditionFalse}, - {Type: "UpgradeableA", Status: configv1.ConditionFalse}, - {Type: "UpgradeableB", Status: configv1.ConditionFalse}, }, }, }), @@ -2277,6 +2267,10 @@ func TestOperator_sync(t *testing.T) { optr.eventRecorder = record.NewFakeRecorder(100) optr.enabledCVOFeatureGates = featuregates.DefaultCvoGates("version") + registry := clusterconditions.NewConditionRegistry() + registry.Register("Always", &always.Always{}) + optr.conditionRegistry = registry + ctx := context.Background() err := optr.sync(ctx, optr.queueKey()) if err != nil && tt.wantErr == nil { @@ -2792,1219 +2786,6 @@ func TestOperator_availableUpdatesSync(t *testing.T) { } } -// waits for admin ack configmap changes -func waitForCm(t *testing.T, cmChan chan *corev1.ConfigMap) { - select { - case cm := <-cmChan: - t.Logf("Got configmap from channel: %s/%s", cm.Namespace, cm.Name) - case <-time.After(wait.ForeverTestTimeout): - t.Error("Informer did not get the configmap") - } -} - -func TestOperator_upgradeableSync(t *testing.T) { - id := uuid.Must(uuid.NewRandom()).String() - - var defaultGateCm = corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - } - var defaultAckCm = corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-acks", - Namespace: "test"}, - } - tests := []struct { - name string - key string - optr *Operator - cm corev1.ConfigMap - gateCm *corev1.ConfigMap - ackCm *corev1.ConfigMap - wantErr func(*testing.T, error) - expectedResult *upgradeable - }{ - { - name: "when version is missing, do nothing (other loops should create it)", - optr: &Operator{ - release: configv1.Release{ - Version: "4.0.1", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset(), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - }, - { - name: "report error condition when overrides is set for version", - optr: &Operator{ - release: configv1.Release{ - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "fast", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: true, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "ClusterVersionOverridesSet", - Message: "Disabling ownership via cluster version overrides prevents upgrades between minor or major versions. Please remove overrides before requesting a minor or major version update.", - }}, - }, - }, - { - name: "report error condition when the single clusteroperator is not upgradeable", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "some random reason why upgrades are not safe.", - }}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "Cluster operator default-operator-1 should not be upgraded between minor or major versions: some random reason why upgrades are not safe.", - }}, - }, - }, - { - name: "report error condition when single clusteroperator is not upgradeable and another has no conditions", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "some random reason why upgrades are not safe.", - }}, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-2", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "Cluster operator default-operator-1 should not be upgraded between minor or major versions: some random reason why upgrades are not safe.", - }}, - }, - }, - { - name: "report error condition when single clusteroperator is not upgradeable and another is upgradeable", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "some random reason why upgrades are not safe.", - }}, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-2", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionTrue, - }}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "Cluster operator default-operator-1 should not be upgraded between minor or major versions: some random reason why upgrades are not safe.", - }}, - }, - }, - { - name: "report error condition when two clusteroperators are not upgradeable", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "some random reason why upgrades are not safe.", - }}, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-2", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason2", - Message: "some random reason 2 why upgrades are not safe.", - }}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "ClusterOperatorsNotUpgradeable", - Message: "Multiple cluster operators should not be upgraded between minor or major versions:\n* Cluster operator default-operator-1 should not be upgraded between minor or major versions: RandomReason: some random reason why upgrades are not safe.\n* Cluster operator default-operator-2 should not be upgraded between minor or major versions: RandomReason2: some random reason 2 why upgrades are not safe.", - }}, - }, - }, - { - name: "report error condition when clusteroperators and version are not upgradeable", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: true, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason", - Message: "some random reason why upgrades are not safe.", - }}, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-2", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "RandomReason2", - Message: "some random reason 2 why upgrades are not safe.", - }}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "MultipleReasons", - Message: "Cluster should not be upgraded between minor or major versions for multiple reasons: ClusterVersionOverridesSet,ClusterOperatorsNotUpgradeable\n* Disabling ownership via cluster version overrides prevents upgrades between minor or major versions. Please remove overrides before requesting a minor or major version update.\n* Multiple cluster operators should not be upgraded between minor or major versions:\n* Cluster operator default-operator-1 should not be upgraded between minor or major versions: RandomReason: some random reason why upgrades are not safe.\n* Cluster operator default-operator-2 should not be upgraded between minor or major versions: RandomReason2: some random reason 2 why upgrades are not safe.", - }, { - Type: "UpgradeableClusterOperators", - Status: configv1.ConditionFalse, - Reason: "ClusterOperatorsNotUpgradeable", - Message: "Multiple cluster operators should not be upgraded between minor or major versions:\n* Cluster operator default-operator-1 should not be upgraded between minor or major versions: RandomReason: some random reason why upgrades are not safe.\n* Cluster operator default-operator-2 should not be upgraded between minor or major versions: RandomReason2: some random reason 2 why upgrades are not safe.", - }, { - Type: "UpgradeableClusterVersionOverrides", - Status: configv1.ConditionFalse, - Reason: "ClusterVersionOverridesSet", - Message: "Disabling ownership via cluster version overrides prevents upgrades between minor or major versions. Please remove overrides before requesting a minor or major version update.", - }}, - }, - }, - { - name: "no error conditions", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{}, - }, - { - name: "no error conditions", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.0.0", - Image: "image/image:v4.0.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Image: "image/image:v4.0.1"}, - }, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{}, - }, - { - name: "no error conditions and admin ack gate does not apply to version", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.9.0", - Image: "image/image:v4.9.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.9.0"}, - {Image: "image/image:v4.9.1"}, - }, - }, - }, - &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-operator-1", - }, - Status: configv1.ClusterOperatorStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionTrue, - }}, - }, - }, - ), - }, - cm: corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - }, - expectedResult: &upgradeable{}, - }, - { - name: "admin-gates configmap gate does not have value", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": ""}, - }, - ackCm: &defaultAckCm, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "AdminAckConfigMapGateValueError", - Message: "admin-gates configmap gate ack-4.8-kube-122-api-removals-in-4.9 must contain a non-empty value."}}, - }, - }, - { - name: "admin ack required", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description."}, - }, - ackCm: &defaultAckCm, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "AdminAckRequired", - Message: "Description."}}, - }, - }, - { - name: "admin ack required and admin ack gate does not apply to version", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description.", - "ack-4.9-kube-122-api-removals-in-4.9": "Description 2."}, - }, - ackCm: &defaultAckCm, expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "AdminAckRequired", - Message: "Description."}}, - }, - }, - { - name: "admin ack required and configmap gate does not have value", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description.", - "ack-4.8-foo": ""}, - }, - ackCm: &defaultAckCm, expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "MultipleReasons", - Message: "Description. admin-gates configmap gate ack-4.8-foo must contain a non-empty value."}}, - }, - }, - { - name: "multiple admin acks required", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description 2.", - "ack-4.8-foo": "Description 1.", - "ack-4.8-bar": "Description 3."}, - }, - ackCm: &defaultAckCm, expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "AdminAckRequired", - Message: "Description 1. Description 2. Description 3."}}, - }, - }, - { - name: "admin ack found", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description."}, - }, - ackCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-acks", - Namespace: "test"}, Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "true"}, - }, - expectedResult: &upgradeable{}, - }, - { - name: "admin ack 2 of 3 found", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description.", - "ack-4.8-foo": "Description foo.", - "ack-4.8-bar": "Description bar."}, - }, - ackCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-acks", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "true", - "ack-4.8-bar": "true"}}, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "AdminAckRequired", - Message: "Description foo."}}, - }, - }, - { - name: "multiple admin acks found", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description.", - "ack-4.8-foo": "Description foo.", - "ack-4.8-bar": "Description bar."}, - }, - ackCm: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "admin-acks", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "true", - "ack-4.8-bar": "true", - "ack-4.8-foo": "true"}}, - expectedResult: &upgradeable{}, - }, - // delete tests are last so we don't have to re-create the config map for other tests - { - name: "admin-acks configmap not found", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "admin-gates", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description."}, - }, - // Name triggers deletion of config map - ackCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "delete", - Namespace: "test"}, Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "true"}, - }, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "UnableToAccessAdminAcksConfigMap", - Message: "admin-acks configmap not found."}}, - }, - }, - // delete tests are last so we don't have to re-create the config map for other tests - { - name: "admin-gates configmap not found", - optr: &Operator{ - updateService: "http://localhost:8080/graph", - release: configv1.Release{ - Version: "v4.8.0", - Image: "image/image:v4.8.1", - }, - namespace: "test", - name: "default", - client: fake.NewClientset( - &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - }, - Spec: configv1.ClusterVersionSpec{ - ClusterID: configv1.ClusterID(id), - Channel: "", - Overrides: []configv1.ComponentOverride{{ - Unmanaged: false, - }}, - }, - Status: configv1.ClusterVersionStatus{ - History: []configv1.UpdateHistory{ - {Version: "4.8.0"}, - }, - }, - }, - ), - }, - // Name triggers deletion of config map - gateCm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "delete", - Namespace: "test"}, - Data: map[string]string{"ack-4.8-kube-122-api-removals-in-4.9": "Description."}, - }, - ackCm: nil, - expectedResult: &upgradeable{ - Conditions: []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "UnableToAccessAdminGatesConfigMap", - Message: "admin-gates configmap not found."}}, - }, - }, - } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - watcherStarted := make(chan struct{}) - f := kfake.NewClientset() - - // A catch-all watch reactor that allows us to inject the watcherStarted channel. - f.PrependWatchReactor("*", func(action ktesting.Action) (handled bool, ret watch.Interface, err error) { - gvr := action.GetResource() - ns := action.GetNamespace() - watch, err := f.Tracker().Watch(gvr, ns) - if err != nil { - return false, nil, err - } - close(watcherStarted) - return true, watch, nil - }) - cms := make(chan *corev1.ConfigMap, 1) - configManagedInformer := informers.NewSharedInformerFactory(f, 0) - cmInformerLister := configManagedInformer.Core().V1().ConfigMaps() - cmInformer := configManagedInformer.Core().V1().ConfigMaps().Informer() - if _, err := cmInformer.AddEventHandler(&cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - cm := obj.(*corev1.ConfigMap) - t.Logf("cm added: %s/%s", cm.Namespace, cm.Name) - cms <- cm - }, - DeleteFunc: func(obj interface{}) { - cm := obj.(*corev1.ConfigMap) - t.Logf("cm deleted: %s/%s", cm.Namespace, cm.Name) - cms <- cm - }, - UpdateFunc: func(oldObj, newObj interface{}) { - cm := newObj.(*corev1.ConfigMap) - t.Logf("cm updated: %s/%s", cm.Namespace, cm.Name) - cms <- cm - }, - }); err != nil { - t.Errorf("error adding ConfigMap event handler: %v", err) - } - configManagedInformer.Start(ctx.Done()) - - _, err := f.CoreV1().ConfigMaps("test").Create(ctx, &defaultGateCm, metav1.CreateOptions{}) - if err != nil { - t.Errorf("error injecting admin-gates configmap: %v", err) - } - waitForCm(t, cms) - _, err = f.CoreV1().ConfigMaps("test").Create(ctx, &defaultAckCm, metav1.CreateOptions{}) - if err != nil { - t.Errorf("error injecting admin-acks configmap: %v", err) - } - waitForCm(t, cms) - - for _, tt := range tests { - { - t.Run(tt.name, func(t *testing.T) { - optr := tt.optr - optr.queue = workqueue.NewTypedRateLimitingQueue[any](workqueue.DefaultTypedControllerRateLimiter[any]()) - optr.proxyLister = &clientProxyLister{client: optr.client} - optr.coLister = &clientCOLister{client: optr.client} - optr.cvLister = &clientCVLister{client: optr.client} - optr.cmConfigManagedLister = cmInformerLister.Lister().ConfigMaps("test") - optr.cmConfigLister = cmInformerLister.Lister().ConfigMaps("test") - - optr.upgradeableChecks = optr.defaultUpgradeableChecks() - optr.eventRecorder = record.NewFakeRecorder(100) - optr.enabledCVOFeatureGates = featuregates.DefaultCvoGates("version") - - if tt.gateCm != nil { - if tt.gateCm.Name == "delete" { - err := f.CoreV1().ConfigMaps("test").Delete(ctx, "admin-gates", metav1.DeleteOptions{}) - if err != nil { - t.Errorf("error deleting configmap admin-gates: %v", err) - } - } else { - _, err = f.CoreV1().ConfigMaps("test").Update(ctx, tt.gateCm, metav1.UpdateOptions{}) - if err != nil { - t.Errorf("error updating configmap admin-gates: %v", err) - } - } - waitForCm(t, cms) - } - if tt.ackCm != nil { - if tt.ackCm.Name == "delete" { - err := f.CoreV1().ConfigMaps("test").Delete(ctx, "admin-acks", metav1.DeleteOptions{}) - if err != nil { - t.Errorf("error deleting configmap admin-acks: %v", err) - } - } else { - _, err = f.CoreV1().ConfigMaps("test").Update(ctx, tt.ackCm, metav1.UpdateOptions{}) - if err != nil { - t.Errorf("error updating configmap admin-acks: %v", err) - } - } - waitForCm(t, cms) - } - - err = optr.upgradeableSyncFunc(false)(ctx, optr.queueKey()) - if err != nil && tt.wantErr == nil { - t.Fatalf("Operator.sync() unexpected error: %v", err) - } - if err != nil { - return - } - - if optr.upgradeable != nil { - optr.upgradeable.At = time.Time{} - for i := range optr.upgradeable.Conditions { - optr.upgradeable.Conditions[i].LastTransitionTime = metav1.Time{} - } - } - - if !reflect.DeepEqual(optr.upgradeable, tt.expectedResult) { - t.Fatalf("unexpected: %s", cmp.Diff(tt.expectedResult, optr.upgradeable)) - } - if (optr.queue.Len() > 0) != (optr.upgradeable != nil) { - t.Fatalf("unexpected queue") - } - }) - } - } -} - -func Test_gateApplicableToCurrentVersion(t *testing.T) { - tests := []struct { - name string - gateName string - cv string - wantErr bool - expectedResult bool - }{ - { - name: "gate name invalid format no dot", - gateName: "ack-4>8-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format 2 dots", - gateName: "ack-4..8-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format does not start with ack", - gateName: "ck-4.8-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format major version must be 4 or 5", - gateName: "ack-3.8-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format minor version must be a number", - gateName: "ack-4.x-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format no following dash", - gateName: "ack-4.8.1-foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format 2 following dashes", - gateName: "ack-4.x--foo", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name invalid format no description following dash", - gateName: "ack-4.x-", - cv: "4.8.1", - wantErr: true, - expectedResult: false, - }, - { - name: "gate name match", - gateName: "ack-4.8-foo", - cv: "4.8.1", - wantErr: false, - expectedResult: true, - }, - { - name: "gate name match big minor version", - gateName: "ack-4.123456-foo", - cv: "4.123456", - wantErr: false, - expectedResult: true, - }, - { - name: "gate name no match", - gateName: "ack-4.8-foo", - cv: "4.9.1", - wantErr: false, - expectedResult: false, - }, - { - name: "gate name no match multi digit minor", - gateName: "ack-4.8-foo", - cv: "4.80.1", - wantErr: false, - expectedResult: false, - }, - } - for _, tt := range tests { - { - t.Run(tt.name, func(t *testing.T) { - isApplicable, err := gateApplicableToCurrentVersion(tt.gateName, tt.cv) - if isApplicable != tt.expectedResult { - t.Errorf("gateApplicableToCurrentVersion() %s expected applicable %t, got applicable %t", tt.gateName, tt.expectedResult, isApplicable) - } - if err != nil && !tt.wantErr { - t.Errorf("gateApplicableToCurrentVersion() unexpected error: %v", err) - } else if err == nil && tt.wantErr { - t.Error("gateApplicableToCurrentVersion() unexpected success when an error was expected") - } - }) - } - } -} - func expectGet(t *testing.T, a ktesting.Action, resource, namespace, name string) { t.Helper() if a.GetVerb() != "get" { diff --git a/pkg/cvo/status.go b/pkg/cvo/status.go index 1687e8d06..abf04e5a7 100644 --- a/pkg/cvo/status.go +++ b/pkg/cvo/status.go @@ -24,9 +24,11 @@ import ( configclientv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" "github.com/openshift/cluster-version-operator/pkg/featuregates" "github.com/openshift/cluster-version-operator/pkg/internal" "github.com/openshift/cluster-version-operator/pkg/payload" + "github.com/openshift/cluster-version-operator/pkg/risk" ) const ( @@ -168,16 +170,11 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 cvUpdated = true config = updated } - // update the config with upgradeable - if updated := optr.getUpgradeable().NeedsUpdate(config); updated != nil { - cvUpdated = true - config = updated - } if !cvUpdated && (original == nil || original == config) { original = config.DeepCopy() } - updateClusterVersionStatus(&config.Status, status, optr.release, optr.getAvailableUpdates, optr.enabledCVOFeatureGates, validationErrs, optr.shouldReconcileAcceptRisks) + updateClusterVersionStatus(ctx, &config.Status, status, optr.release, optr.conditionRegistry, optr.getAvailableUpdates, optr.upgradeable, optr.enabledCVOFeatureGates, validationErrs, optr.shouldReconcileAcceptRisks) if klog.V(6).Enabled() { klog.Infof("Apply config: %s", cmp.Diff(original, config)) @@ -189,10 +186,13 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 // updateClusterVersionStatus updates the passed cvStatus with the latest status information func updateClusterVersionStatus( + ctx context.Context, cvStatus *configv1.ClusterVersionStatus, status *SyncWorkerStatus, release configv1.Release, + conditionRegistry clusterconditions.ConditionRegistry, getAvailableUpdates func() *availableUpdates, + upgradeable risk.Source, enabledGates featuregates.CvoGateChecker, validationErrs field.ErrorList, shouldReconcileAcceptRisks func() bool, @@ -227,8 +227,8 @@ func updateClusterVersionStatus( var riskNamesForDesiredImage []string if shouldReconcileAcceptRisks() { - updates := getAvailableUpdates() var riskConditions map[string][]metav1.Condition + updates := getAvailableUpdates() conditionalUpdates := cvStatus.ConditionalUpdates if updates != nil { // updates.Updates may become updates.ConditionalUpdates if some alert becomes a risk @@ -241,6 +241,8 @@ func updateClusterVersionStatus( cvStatus.ConditionalUpdateRisks = conditionalUpdateRisks(cvStatus.ConditionalUpdates) } + setUpgradeableCondition(ctx, cvStatus, upgradeable, conditionRegistry, now) + risksMsg := "" if desired.Image == status.loadPayloadStatus.Update.Image { risksMsg = status.loadPayloadStatus.AcceptedRisks @@ -606,6 +608,78 @@ func setReleaseAcceptedCondition(cvStatus *configv1.ClusterVersionStatus, status } } +func setUpgradeableCondition(ctx context.Context, cvStatus *configv1.ClusterVersionStatus, upgradeable risk.Source, conditionRegistry clusterconditions.ConditionRegistry, now metav1.Time) { + var err error + var risks []configv1.ConditionalUpdateRisk + if upgradeable != nil { + var updateVersions []string + for _, update := range cvStatus.AvailableUpdates { + updateVersions = append(updateVersions, update.Version) + } + for _, conditionalUpdate := range cvStatus.ConditionalUpdates { + updateVersions = append(updateVersions, conditionalUpdate.Release.Version) + } + risks, _, err = upgradeable.Risks(ctx, updateVersions) + if err != nil { + klog.Errorf("errors while calculating risks for the %s condition: %v", configv1.OperatorUpgradeable, err) + } + + for i := len(risks) - 1; i >= 0; i-- { + c := meta.FindStatusCondition(risks[i].Conditions, internal.ConditionalUpdateRiskConditionTypeApplies) + if c == nil { + if match, err := conditionRegistry.Match(ctx, risks[i].MatchingRules); err != nil || match { + continue // keep the risks that failed to eval or matched + } + } else if c.Status != metav1.ConditionFalse { + continue // keep the risks that failed to eval or matched + } + risks = append(risks[:i], risks[i+1:]...) // exclude non-applicable risks + } + } + + // remove obsolete subconditions + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, internal.UpgradeableAdminAckRequired) + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, internal.UpgradeableDeletesInProgress) + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, internal.UpgradeableClusterOperators) + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, internal.UpgradeableClusterVersionOverrides) + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, internal.UpgradeableUpgradeInProgress) + + if len(risks) == 0 { + if err != nil { + resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorUpgradeable, + Status: configv1.ConditionUnknown, + Reason: recommendedReasonEvaluationFailed, + Message: err.Error(), + LastTransitionTime: now, + }) + } else { + resourcemerge.RemoveOperatorStatusCondition(&cvStatus.Conditions, configv1.OperatorUpgradeable) + } + } else { + reason := "RiskNameNotCompatibleWithReasonProperty" + if reasonPattern.MatchString(risks[0].Name) { + reason = risks[0].Name + } + message := risks[0].Message + if len(risks) > 1 { + reason = recommendedReasonMultiple + var msgs []string + for _, risk := range risks { + msgs = append(msgs, risk.Message) + } + message = fmt.Sprintf("Cluster should not be upgraded between minor or major versions for multiple reasons:\n* %s", strings.Join(msgs, "\n* ")) + } + resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorUpgradeable, + Status: configv1.ConditionFalse, + Reason: reason, + Message: message, + LastTransitionTime: now, + }) + } +} + const slowCOUpdatePrefix = "Slow::" // convertErrorToProgressing returns true if the provided status indicates a failure condition can be interpreted as diff --git a/pkg/cvo/status_test.go b/pkg/cvo/status_test.go index 36a9dbf56..99830bde4 100644 --- a/pkg/cvo/status_test.go +++ b/pkg/cvo/status_test.go @@ -19,6 +19,8 @@ import ( "github.com/openshift/client-go/config/clientset/versioned/fake" "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" "github.com/openshift/cluster-version-operator/pkg/internal" "github.com/openshift/cluster-version-operator/pkg/payload" ) @@ -229,6 +231,7 @@ func (f fakeRiFlags) AcceptRisks() bool { } func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t *testing.T) { + ctx := context.Background() ignoreLastTransitionTime := cmpopts.IgnoreFields(configv1.ClusterOperatorStatusCondition{}, "LastTransitionTime") type args struct { syncWorkerStatus *SyncWorkerStatus @@ -752,7 +755,9 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t if tc.shouldModifyWhenNotReconcilingAndHistoryNotEmpty && !c.isReconciling && !c.isHistoryEmpty { expectedCondition = tc.expectedConditionModified } - updateClusterVersionStatus(cvStatus, tc.args.syncWorkerStatus, release, getAvailableUpdates, gates, noErrors, func() bool { + registry := clusterconditions.NewConditionRegistry() + registry.Register("Always", &always.Always{}) + updateClusterVersionStatus(ctx, cvStatus, tc.args.syncWorkerStatus, release, registry, getAvailableUpdates, nil, gates, noErrors, func() bool { return false }) condition := resourcemerge.FindOperatorStatusCondition(cvStatus.Conditions, internal.ClusterStatusFailing) diff --git a/pkg/cvo/upgradeable.go b/pkg/cvo/upgradeable.go deleted file mode 100644 index f6c9daf32..000000000 --- a/pkg/cvo/upgradeable.go +++ /dev/null @@ -1,542 +0,0 @@ -package cvo - -import ( - "fmt" - "regexp" - "sort" - "strings" - "time" - - "github.com/pkg/errors" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - listerscorev1 "k8s.io/client-go/listers/core/v1" - "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" - - configv1 "github.com/openshift/api/config/v1" - configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" - - "github.com/openshift/cluster-version-operator/lib/resourcedelete" - "github.com/openshift/cluster-version-operator/lib/resourcemerge" - "github.com/openshift/cluster-version-operator/pkg/internal" - "github.com/openshift/cluster-version-operator/pkg/payload/precondition/clusterversion" -) - -const ( - adminAckGateFmt string = "^ack-[4-5][.]([0-9]{1,})-[^-]" -) - -var adminAckGateRegexp = regexp.MustCompile(adminAckGateFmt) - -// upgradeableCheckIntervals holds the time intervals that drive how often CVO checks for upgradeability -type upgradeableCheckIntervals struct { - // min is the base minimum interval between upgradeability checks, applied under normal circumstances - min time.Duration - - // minOnFailedPreconditions is the minimum interval between upgradeability checks when precondition checks are - // failing and were recently (see afterPreconditionsFailed) changed. This should be lower than min because we want CVO - // to check upgradeability more often - minOnFailedPreconditions time.Duration - - // afterFailingPreconditions is the period of time after preconditions failed when minOnFailedPreconditions is - // applied instead of min - afterPreconditionsFailed time.Duration -} - -func defaultUpgradeableCheckIntervals() upgradeableCheckIntervals { - return upgradeableCheckIntervals{ - // 2 minutes are here because it is a lower bound of previously nondeterministicly chosen interval - // TODO (OTA-860): Investigate our options of reducing this interval. We will need to investigate - // the API usage patterns of the underlying checks, there is anecdotal evidence that they hit - // apiserver instead of using local informer cache - min: 2 * time.Minute, - minOnFailedPreconditions: 15 * time.Second, - afterPreconditionsFailed: 2 * time.Minute, - } -} - -// syncUpgradeable synchronizes the upgradeable status only if the sufficient time passed since its last update. This -// throttling period is dynamic and is driven by upgradeableCheckIntervals. -func (optr *Operator) syncUpgradeable(cv *configv1.ClusterVersion, ignoreThrottlePeriod bool) error { - if u := optr.getUpgradeable(); u != nil && !ignoreThrottlePeriod { - throttleFor := optr.upgradeableCheckIntervals.throttlePeriod(cv) - if earliestNext := u.At.Add(throttleFor); time.Now().Before(earliestNext) { - klog.V(2).Infof("Upgradeability last checked %s ago, will not re-check until %s", time.Since(u.At), earliestNext.Format(time.RFC3339)) - return nil - } - } - optr.setUpgradeableConditions() - - // requeue - optr.queue.Add(optr.queueKey()) - return nil -} - -func (optr *Operator) setUpgradeableConditions() { - now := metav1.Now() - var conds []configv1.ClusterOperatorStatusCondition - var reasons []string - var msgs []string - klog.V(4).Infof("Checking upgradeability conditions") - for _, check := range optr.upgradeableChecks { - if cond := check.Check(); cond != nil { - reasons = append(reasons, cond.Reason) - msgs = append(msgs, cond.Message) - cond.LastTransitionTime = now - conds = append(conds, *cond) - klog.V(2).Infof("Upgradeability condition failed (type='%s' reason='%s' message='%s')", cond.Type, cond.Reason, cond.Message) - } - } - if len(conds) == 1 { - conds = []configv1.ClusterOperatorStatusCondition{{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: conds[0].Reason, - Message: conds[0].Message, - LastTransitionTime: now, - }} - } else if len(conds) > 1 { - conds = append(conds, configv1.ClusterOperatorStatusCondition{ - Type: configv1.OperatorUpgradeable, - Status: configv1.ConditionFalse, - Reason: "MultipleReasons", - Message: fmt.Sprintf("Cluster should not be upgraded between minor or major versions for multiple reasons: %s\n* %s", strings.Join(reasons, ","), strings.Join(msgs, "\n* ")), - LastTransitionTime: now, - }) - } else { - klog.V(2).Infof("All upgradeability conditions are passing") - } - sort.Slice(conds, func(i, j int) bool { return conds[i].Type < conds[j].Type }) - optr.setUpgradeable(&upgradeable{ - Conditions: conds, - }) -} - -type upgradeable struct { - At time.Time - - // these are sorted by Type - Conditions []configv1.ClusterOperatorStatusCondition -} - -func (u *upgradeable) NeedsUpdate(original *configv1.ClusterVersion) *configv1.ClusterVersion { - if u == nil { - return nil - } - - origUpConditions := collectUpgradeableConditions(original.Status.Conditions) - if equality.Semantic.DeepEqual(u.Conditions, origUpConditions) { - return nil - } - - config := original.DeepCopy() - for _, c := range u.Conditions { - resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, c) - } - for _, origc := range origUpConditions { - if c := resourcemerge.FindOperatorStatusCondition(u.Conditions, origc.Type); c == nil { - resourcemerge.RemoveOperatorStatusCondition(&config.Status.Conditions, origc.Type) - } - } - return config -} - -func collectUpgradeableConditions(conditions []configv1.ClusterOperatorStatusCondition) []configv1.ClusterOperatorStatusCondition { - var ret []configv1.ClusterOperatorStatusCondition - for _, c := range conditions { - if strings.HasPrefix(string(c.Type), string(configv1.OperatorUpgradeable)) { - ret = append(ret, c) - } - } - sort.Slice(ret, func(i, j int) bool { return ret[i].Type < ret[j].Type }) - return ret -} - -// setUpgradeable updates the currently calculated status of Upgradeable -func (optr *Operator) setUpgradeable(u *upgradeable) { - if u != nil { - u.At = time.Now() - } - - optr.upgradeableStatusLock.Lock() - defer optr.upgradeableStatusLock.Unlock() - optr.upgradeable = u -} - -// getUpgradeable returns the current calculated status of upgradeable. It -// may be nil. -func (optr *Operator) getUpgradeable() *upgradeable { - optr.upgradeableStatusLock.Lock() - defer optr.upgradeableStatusLock.Unlock() - return optr.upgradeable -} - -type upgradeableCheck interface { - // Check returns a not-nil condition that should be addressed before a minor or major version upgrade when the check fails. - Check() *configv1.ClusterOperatorStatusCondition -} - -type clusterOperatorsUpgradeable struct { - coLister configlistersv1.ClusterOperatorLister -} - -func (check *clusterOperatorsUpgradeable) Check() *configv1.ClusterOperatorStatusCondition { - cond := &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableClusterOperators, - Status: configv1.ConditionFalse, - } - ops, err := check.coLister.List(labels.Everything()) - if meta.IsNoMatchError(err) { - return nil - } - if err != nil { - cond.Reason = "FailedToListClusterOperators" - cond.Message = errors.Wrap(err, "failed to list cluster operators").Error() - return cond - } - - type notUpgradeableCondition struct { - name string - condition *configv1.ClusterOperatorStatusCondition - } - var notup []notUpgradeableCondition - for _, op := range ops { - if up := resourcemerge.FindOperatorStatusCondition(op.Status.Conditions, configv1.OperatorUpgradeable); up != nil && up.Status == configv1.ConditionFalse { - notup = append(notup, notUpgradeableCondition{name: op.GetName(), condition: up}) - } - } - - if len(notup) == 0 { - return nil - } - msg := "" - reason := "" - if len(notup) == 1 { - reason = notup[0].condition.Reason - msg = fmt.Sprintf("Cluster operator %s should not be upgraded between minor or major versions: %s", notup[0].name, notup[0].condition.Message) - } else { - reason = "ClusterOperatorsNotUpgradeable" - var msgs []string - for _, cond := range notup { - msgs = append(msgs, fmt.Sprintf("Cluster operator %s should not be upgraded between minor or major versions: %s: %s", cond.name, cond.condition.Reason, cond.condition.Message)) - } - msg = fmt.Sprintf("Multiple cluster operators should not be upgraded between minor or major versions:\n* %s", strings.Join(msgs, "\n* ")) - } - cond.Reason = reason - cond.Message = msg - return cond -} - -type clusterVersionOverridesUpgradeable struct { - name string - cvLister configlistersv1.ClusterVersionLister -} - -func (check *clusterVersionOverridesUpgradeable) Check() *configv1.ClusterOperatorStatusCondition { - cond := &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableClusterVersionOverrides, - Status: configv1.ConditionFalse, - } - - cv, err := check.cvLister.Get(check.name) - if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) { - return nil - } - - overrides := false - for _, o := range cv.Spec.Overrides { - if o.Unmanaged { - overrides = true - } - } - if !overrides { - return nil - } - - cond.Reason = "ClusterVersionOverridesSet" - cond.Message = "Disabling ownership via cluster version overrides prevents upgrades between minor or major versions. Please remove overrides before requesting a minor or major version update." - return cond -} - -type upgradeInProgressUpgradeable struct { - name string - cvLister configlistersv1.ClusterVersionLister -} - -func (check *upgradeInProgressUpgradeable) Check() *configv1.ClusterOperatorStatusCondition { - cond := &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableUpgradeInProgress, - Status: configv1.ConditionTrue, - } - - cv, err := check.cvLister.Get(check.name) - if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) { - return nil - } else if err != nil { - klog.Error(err) - return nil - } - - if progressingCondition := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing); progressingCondition != nil && - progressingCondition.Status == configv1.ConditionTrue { - cond.Reason = "UpdateInProgress" - cond.Message = fmt.Sprintf("An update is already in progress and the details are in the %s condition", configv1.OperatorProgressing) - return cond - } - return nil -} - -type clusterManifestDeleteInProgressUpgradeable struct { -} - -func (check *clusterManifestDeleteInProgressUpgradeable) Check() *configv1.ClusterOperatorStatusCondition { - cond := &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableDeletesInProgress, - Status: configv1.ConditionFalse, - } - if deletes := resourcedelete.DeletesInProgress(); len(deletes) > 0 { - resources := strings.Join(deletes, ",") - klog.V(2).Infof("Resource deletions in progress; resources=%s", resources) - cond.Reason = "ResourceDeletesInProgress" - cond.Message = fmt.Sprintf("Cluster minor or major version upgrades are not allowed while resource deletions are in progress; resources=%s", resources) - return cond - } - return nil -} - -func gateApplicableToCurrentVersion(gateName string, currentVersion string) (bool, error) { - var applicable bool - if ackVersion := adminAckGateRegexp.FindString(gateName); ackVersion == "" { - return false, fmt.Errorf("%s configmap gate name %s has invalid format; must comply with %q", - internal.AdminGatesConfigMap, gateName, adminAckGateFmt) - } else { - parts := strings.Split(ackVersion, "-") - ackMinor := getEffectiveMinor(parts[1]) - cvMinor := getEffectiveMinor(currentVersion) - if ackMinor == cvMinor { - applicable = true - } - } - return applicable, nil -} - -func checkAdminGate(gateName string, gateValue string, currentVersion string, - ackConfigmap *corev1.ConfigMap) (string, string) { - - if applies, err := gateApplicableToCurrentVersion(gateName, currentVersion); err == nil { - if !applies { - return "", "" - } - } else { - klog.Error(err) - return "AdminAckConfigMapGateNameError", err.Error() - } - if gateValue == "" { - message := fmt.Sprintf("%s configmap gate %s must contain a non-empty value.", internal.AdminGatesConfigMap, gateName) - klog.Error(message) - return "AdminAckConfigMapGateValueError", message - } - if val, ok := ackConfigmap.Data[gateName]; !ok || val != "true" { - return "AdminAckRequired", gateValue - } - return "", "" -} - -type clusterAdminAcksCompletedUpgradeable struct { - adminGatesLister listerscorev1.ConfigMapNamespaceLister - adminAcksLister listerscorev1.ConfigMapNamespaceLister - cvLister configlistersv1.ClusterVersionLister - cvoName string -} - -func (check *clusterAdminAcksCompletedUpgradeable) Check() *configv1.ClusterOperatorStatusCondition { - cv, err := check.cvLister.Get(check.cvoName) - if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) { - message := fmt.Sprintf("Unable to get ClusterVersion, err=%v.", err) - klog.Error(message) - return &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableAdminAckRequired, - Status: configv1.ConditionFalse, - Reason: "UnableToGetClusterVersion", - Message: message, - } - } - currentVersion := clusterversion.GetCurrentVersion(cv.Status.History) - - // This can occur in early start up when the configmap is first added and version history - // has not yet been populated. - if currentVersion == "" { - return nil - } - - var gateCm *corev1.ConfigMap - if gateCm, err = check.adminGatesLister.Get(internal.AdminGatesConfigMap); err != nil { - var message string - if apierrors.IsNotFound(err) { - message = fmt.Sprintf("%s configmap not found.", internal.AdminGatesConfigMap) - } else if err != nil { - message = fmt.Sprintf("Unable to access configmap %s, err=%v.", internal.AdminGatesConfigMap, err) - } - klog.Error(message) - return &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableAdminAckRequired, - Status: configv1.ConditionFalse, - Reason: "UnableToAccessAdminGatesConfigMap", - Message: message, - } - } - var ackCm *corev1.ConfigMap - if ackCm, err = check.adminAcksLister.Get(internal.AdminAcksConfigMap); err != nil { - var message string - if apierrors.IsNotFound(err) { - message = fmt.Sprintf("%s configmap not found.", internal.AdminAcksConfigMap) - } else if err != nil { - message = fmt.Sprintf("Unable to access configmap %s, err=%v.", internal.AdminAcksConfigMap, err) - } - klog.Error(message) - return &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableAdminAckRequired, - Status: configv1.ConditionFalse, - Reason: "UnableToAccessAdminAcksConfigMap", - Message: message, - } - } - reasons := make(map[string][]string) - for k, v := range gateCm.Data { - if reason, message := checkAdminGate(k, v, currentVersion, ackCm); reason != "" { - reasons[reason] = append(reasons[reason], message) - } - } - var reason string - var messages []string - for k, v := range reasons { - reason = k - sort.Strings(v) - messages = append(messages, strings.Join(v, " ")) - } - if len(reasons) == 1 { - return &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableAdminAckRequired, - Status: configv1.ConditionFalse, - Reason: reason, - Message: messages[0], - } - } else if len(reasons) > 1 { - sort.Strings(messages) - return &configv1.ClusterOperatorStatusCondition{ - Type: internal.UpgradeableAdminAckRequired, - Status: configv1.ConditionFalse, - Reason: "MultipleReasons", - Message: strings.Join(messages, " "), - } - } - return nil -} - -func (optr *Operator) defaultUpgradeableChecks() []upgradeableCheck { - return []upgradeableCheck{ - &clusterVersionOverridesUpgradeable{name: optr.name, cvLister: optr.cvLister}, - &clusterAdminAcksCompletedUpgradeable{ - adminGatesLister: optr.cmConfigManagedLister, - adminAcksLister: optr.cmConfigLister, - cvLister: optr.cvLister, - cvoName: optr.name, - }, - &clusterOperatorsUpgradeable{coLister: optr.coLister}, - &clusterManifestDeleteInProgressUpgradeable{}, - &upgradeInProgressUpgradeable{name: optr.name, cvLister: optr.cvLister}, - } -} - -// setUpgradableConditionsIfSynced calls setUpgradableConditions if all informers were synced at least once, otherwise -// it queues a full upgradable sync that will eventually execute -// -// This method is needed because it is called in an informer handler, but setUpgradeableConditions uses a lister -// itself, so it races on startup (it can get triggered while one informer's cache is being synced, but depends -// on another informer's cache already synced -func (optr *Operator) setUpgradableConditionsIfSynced() { - for _, synced := range optr.cacheSynced { - if !synced() { - optr.upgradeableQueue.Add(optr.queueKey()) - return - } - } - optr.setUpgradeableConditions() -} - -func (optr *Operator) addFunc(obj interface{}) { - cm := obj.(*corev1.ConfigMap) - if cm.Name == internal.AdminGatesConfigMap || cm.Name == internal.AdminAcksConfigMap { - klog.V(2).Infof("ConfigMap %s/%s added.", cm.Namespace, cm.Name) - optr.setUpgradableConditionsIfSynced() - } -} - -func (optr *Operator) updateFunc(oldObj, newObj interface{}) { - cm := newObj.(*corev1.ConfigMap) - if cm.Name == internal.AdminGatesConfigMap || cm.Name == internal.AdminAcksConfigMap { - oldCm := oldObj.(*corev1.ConfigMap) - if !equality.Semantic.DeepEqual(cm, oldCm) { - klog.V(2).Infof("ConfigMap %s/%s updated.", cm.Namespace, cm.Name) - optr.setUpgradableConditionsIfSynced() - } - } -} - -func (optr *Operator) deleteFunc(obj interface{}) { - if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { - obj = tombstone.Obj - } - cm, ok := obj.(*corev1.ConfigMap) - if !ok { - klog.Errorf("Unexpected type %T", obj) - return - } - if cm.Name == internal.AdminGatesConfigMap || cm.Name == internal.AdminAcksConfigMap { - klog.V(2).Infof("ConfigMap %s/%s deleted.", cm.Namespace, cm.Name) - optr.setUpgradableConditionsIfSynced() - } -} - -// adminAcksEventHandler handles changes to the admin-acks configmap by re-assessing all -// Upgradeable conditions. -func (optr *Operator) adminAcksEventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: optr.addFunc, - UpdateFunc: optr.updateFunc, - DeleteFunc: optr.deleteFunc, - } -} - -// adminGatesEventHandler handles changes to the admin-gates configmap by re-assessing all -// Upgradeable conditions. -func (optr *Operator) adminGatesEventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: optr.addFunc, - UpdateFunc: optr.updateFunc, - DeleteFunc: optr.deleteFunc, - } -} - -// throttlePeriod returns the duration for which upgradeable status should be considered recent -// enough and unnecessary to update. The baseline duration is min. When the precondition checks -// on the payload are failing for less than afterPreconditionsFailed we want to synchronize -// the upgradeable status more frequently at beginning of an upgrade and return -// minOnFailedPreconditions which is expected to be lower than min. -// -// The cv parameter is expected to be non-nil. -func (intervals *upgradeableCheckIntervals) throttlePeriod(cv *configv1.ClusterVersion) time.Duration { - if cond := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, internal.ReleaseAccepted); cond != nil { - deadline := cond.LastTransitionTime.Add(intervals.afterPreconditionsFailed) - if cond.Reason == "PreconditionChecks" && cond.Status == configv1.ConditionFalse && time.Now().Before(deadline) { - return intervals.minOnFailedPreconditions - } - } - return intervals.min -} diff --git a/pkg/cvo/upgradeable_test.go b/pkg/cvo/upgradeable_test.go deleted file mode 100644 index 2a33b07f1..000000000 --- a/pkg/cvo/upgradeable_test.go +++ /dev/null @@ -1,151 +0,0 @@ -package cvo - -import ( - "testing" - "time" - - "github.com/google/go-cmp/cmp" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - configv1 "github.com/openshift/api/config/v1" - "github.com/openshift/client-go/config/clientset/versioned/fake" - - "github.com/openshift/cluster-version-operator/pkg/internal" -) - -func TestUpgradeableCheckIntervalsThrottlePeriod(t *testing.T) { - intervals := defaultUpgradeableCheckIntervals() - testCases := []struct { - name string - condition *configv1.ClusterOperatorStatusCondition - expected time.Duration - }{ - { - name: "no condition", - expected: intervals.min, - }, - { - name: "passing preconditions", - condition: &configv1.ClusterOperatorStatusCondition{Type: internal.ReleaseAccepted, Reason: "PreconditionChecks", Status: configv1.ConditionTrue, LastTransitionTime: metav1.Now()}, - expected: intervals.min, - }, - { - name: "failing but not precondition", - condition: &configv1.ClusterOperatorStatusCondition{Type: internal.ReleaseAccepted, Reason: "NotPreconditionChecks", Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()}, - expected: intervals.min, - }, - { - name: "failing preconditions but too long ago", - condition: &configv1.ClusterOperatorStatusCondition{ - Type: internal.ReleaseAccepted, - Reason: "PreconditionChecks", - Status: configv1.ConditionFalse, - LastTransitionTime: metav1.NewTime(time.Now().Add(-(intervals.afterPreconditionsFailed + time.Hour))), - }, - expected: intervals.min, - }, - { - name: "failing preconditions recently", - condition: &configv1.ClusterOperatorStatusCondition{Type: internal.ReleaseAccepted, Reason: "PreconditionChecks", Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()}, - expected: intervals.minOnFailedPreconditions, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - cv := &configv1.ClusterVersion{Status: configv1.ClusterVersionStatus{Conditions: []configv1.ClusterOperatorStatusCondition{}}} - if tc.condition != nil { - cv.Status.Conditions = append(cv.Status.Conditions, *tc.condition) - } - if actual := intervals.throttlePeriod(cv); actual != tc.expected { - t.Errorf("throttlePeriod() = %v, want %v", actual, tc.expected) - } - }) - } -} - -func TestUpgradeInProgressUpgradeable(t *testing.T) { - testCases := []struct { - name string - cv *configv1.ClusterVersion - expected *configv1.ClusterOperatorStatusCondition - }{ - { - name: "empty conditions", - cv: &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unit-test", - }, - }, - }, - { - name: "progressing is true", - cv: &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unit-test", - }, - Status: configv1.ClusterVersionStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{ - { - Type: configv1.OperatorProgressing, - Status: configv1.ConditionTrue, - Reason: "UpdateInProgress", - Message: "An update is already in progress and the details are in the Progressing condition", - }, - }, - }, - }, - expected: &configv1.ClusterOperatorStatusCondition{ - Type: "UpgradeableUpgradeInProgress", - Status: configv1.ConditionTrue, - Reason: "UpdateInProgress", - Message: "An update is already in progress and the details are in the Progressing condition", - }, - }, - { - name: "progressing is false", - cv: &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unit-test", - }, - Status: configv1.ClusterVersionStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{ - { - Type: configv1.OperatorProgressing, - Status: configv1.ConditionFalse, - Reason: "a", - Message: "b", - }, - }, - }, - }, - }, - { - name: "progressing has no status", - cv: &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unit-test", - }, - Status: configv1.ClusterVersionStatus{ - Conditions: []configv1.ClusterOperatorStatusCondition{ - { - Type: configv1.OperatorProgressing, - Reason: "a", - Message: "b", - }, - }, - }, - }, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - client := fake.NewClientset(tc.cv) - unit := upgradeInProgressUpgradeable{name: "unit-test", cvLister: &clientCVLister{client: client}} - actual := unit.Check() - if diff := cmp.Diff(tc.expected, actual); diff != "" { - t.Errorf("%s differs from expected:\n%s", tc.name, diff) - } - }) - } -} diff --git a/pkg/risk/adminack/adminack.go b/pkg/risk/adminack/adminack.go new file mode 100644 index 000000000..7dd496391 --- /dev/null +++ b/pkg/risk/adminack/adminack.go @@ -0,0 +1,202 @@ +// Package adminack implements an update risk source based on +// administrator acknowledgements. +package adminack + +import ( + "context" + "fmt" + "regexp" + "slices" + "strings" + + "github.com/blang/semver/v4" + "github.com/google/go-cmp/cmp" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + informerscorev1 "k8s.io/client-go/informers/core/v1" + listerscorev1 "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/internal" + "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" +) + +const ( + adminAckGateFmt string = "^ack-([4-5][.][0-9]{1,})-[^-]" +) + +var adminAckGateRegexp = regexp.MustCompile(adminAckGateFmt) + +type adminAck struct { + name string + currentVersion func() configv1.Release + adminGatesLister listerscorev1.ConfigMapNamespaceLister + adminAcksLister listerscorev1.ConfigMapNamespaceLister + lastSeen []configv1.ConditionalUpdateRisk +} + +// New returns a new update-risk source, tracking administrator acknowledgements. +func New(name string, currentVersion func() configv1.Release, adminGatesInformer, adminAcksInformer informerscorev1.ConfigMapInformer, changeCallback func()) risk.Source { + adminGatesLister := adminGatesInformer.Lister().ConfigMaps(internal.ConfigManagedNamespace) + adminAcksLister := adminAcksInformer.Lister().ConfigMaps(internal.ConfigNamespace) + source := &adminAck{name: name, currentVersion: currentVersion, adminGatesLister: adminGatesLister, adminAcksLister: adminAcksLister} + adminGatesInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, + DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + }) + adminAcksInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, + DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + }) + return source +} + +// Name returns the source's name. +func (a *adminAck) Name() string { + return a.name +} + +// Risks returns the current set of risks the source is aware of. +func (a *adminAck) Risks(_ context.Context, versions []string) ([]configv1.ConditionalUpdateRisk, map[string][]string, error) { + risks, version, err := a.risks() + if meta.IsNoMatchError(err) { + a.lastSeen = nil + return nil, nil, nil + } + + if len(risks) == 0 { + a.lastSeen = nil + return nil, nil, err + } + + a.lastSeen = risks + majorAndMinorUpdates := upgradeable.MajorAndMinorUpdates(version, versions) + if len(majorAndMinorUpdates) == 0 { + return risks, nil, err + } + + versionMap := make(map[string][]string, len(risks)) + for _, risk := range risks { + versionMap[risk.Name] = majorAndMinorUpdates + } + + return risks, versionMap, err +} + +func (a *adminAck) risks() ([]configv1.ConditionalUpdateRisk, semver.Version, error) { + currentRelease := a.currentVersion() + version, err := semver.Parse(currentRelease.Version) + if err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", a.name), + Message: fmt.Sprintf("Failed to parse cluster version to check admin-acks: %v", err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, version, err + } + + var gateCm *corev1.ConfigMap + if gateCm, err = a.adminGatesLister.Get(internal.AdminGatesConfigMap); err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", a.name), + Message: fmt.Sprintf("Failed to retrieve ConfigMap %s from namespace %s: %v", internal.AdminGatesConfigMap, internal.ConfigManagedNamespace, err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, version, err + } + var ackCm *corev1.ConfigMap + if ackCm, err = a.adminAcksLister.Get(internal.AdminAcksConfigMap); err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", a.name), + Message: fmt.Sprintf("Failed to retrieve ConfigMap %s from namespace %s: %v", internal.AdminAcksConfigMap, internal.ConfigNamespace, err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, version, err + } + var risks []configv1.ConditionalUpdateRisk + for k, v := range gateCm.Data { + if risk := checkAdminGate(k, v, fmt.Sprintf("%s-", a.name), version, ackCm); risk != nil { + risks = append(risks, *risk) + } + } + + slices.SortFunc(risks, func(a, b configv1.ConditionalUpdateRisk) int { + return strings.Compare(a.Name, b.Name) + }) + + return risks, version, err +} + +func (a *adminAck) eventHandler(changeCallback func()) { + risks, _, err := a.risks() + if err != nil { + if changeCallback != nil { + changeCallback() + } + return + } + if diff := cmp.Diff(a.lastSeen, risks); diff != "" { + klog.V(2).Infof("admin-ack risks changed (-old +new):\n%s", diff) + if changeCallback != nil { + changeCallback() + } + } +} + +func gateApplicableToCurrentVersion(gateName string, currentVersion semver.Version) (bool, error) { + var applicable bool + if matches := adminAckGateRegexp.FindStringSubmatch(gateName); len(matches) != 2 { + return false, fmt.Errorf("%s configmap gate name %s has invalid format; must comply with %q", + internal.AdminGatesConfigMap, gateName, adminAckGateFmt) + } else { + ackVersion, err := semver.Parse(fmt.Sprintf("%s.0", matches[1])) + if err != nil { + return false, fmt.Errorf("failed to parse SemVer ack version from admin-gate %q: %w", gateName, err) + } + if ackVersion.Major == currentVersion.Major && ackVersion.Minor == currentVersion.Minor { + applicable = true + } + } + return applicable, nil +} + +func checkAdminGate(gateName string, gateValue string, riskNamePrefix string, currentVersion semver.Version, ackConfigmap *corev1.ConfigMap) *configv1.ConditionalUpdateRisk { + riskName := fmt.Sprintf("%s%s", riskNamePrefix, gateName) + if applies, err := gateApplicableToCurrentVersion(gateName, currentVersion); err == nil { + if !applies { + return nil + } + } else { + return &configv1.ConditionalUpdateRisk{ + Name: riskName, + Message: fmt.Sprintf("%s configmap gate %q is invalid: %v", internal.AdminGatesConfigMap, gateName, err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + } + + } + if gateValue == "" { + return &configv1.ConditionalUpdateRisk{ + Name: riskName, + Message: fmt.Sprintf("%s configmap gate %s must contain a non-empty value.", internal.AdminGatesConfigMap, gateName), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + } + } + if val, ok := ackConfigmap.Data[gateName]; !ok || val != "true" { + return &configv1.ConditionalUpdateRisk{ + Name: riskName, + Message: gateValue, + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + } + } + return nil +} diff --git a/pkg/risk/adminack/adminack_test.go b/pkg/risk/adminack/adminack_test.go new file mode 100644 index 000000000..04ae3fea0 --- /dev/null +++ b/pkg/risk/adminack/adminack_test.go @@ -0,0 +1,460 @@ +package adminack + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/blang/semver/v4" + "github.com/google/go-cmp/cmp" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/informers" + kfake "k8s.io/client-go/kubernetes/fake" + ktesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/internal" +) + +func TestOperator_upgradeableSync(t *testing.T) { + var defaultGateCm = corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: internal.AdminGatesConfigMap, Namespace: internal.ConfigManagedNamespace}, + } + var defaultAckCm = corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: internal.AdminAcksConfigMap, Namespace: internal.ConfigNamespace}, + } + tests := []struct { + name string + gateCm *corev1.ConfigMap + ackCm *corev1.ConfigMap + expectedRisks []configv1.ConditionalUpdateRisk + expectedError string + }{ + { + name: "admin ack gate does not apply to version", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{"ack-4.8-test": "Testing"}, + }, + ackCm: &defaultAckCm, + }, + { + name: "admin-gates configmap gate does not have value", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{"ack-4.21-test": ""}, + }, + ackCm: &defaultAckCm, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAck-ack-4.21-test", + Message: "admin-gates configmap gate ack-4.21-test must contain a non-empty value.", + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + { + name: "admin ack required", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{"ack-4.21-test": "Description."}, + }, + ackCm: &defaultAckCm, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAck-ack-4.21-test", + Message: "Description.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + { + name: "admin ack required and admin ack gate does not apply to version", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{ + "ack-4.21-test": "Description A.", + "ack-4.22-test": "Description B.", + }, + }, + ackCm: &defaultAckCm, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAck-ack-4.21-test", + Message: "Description A.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + { + name: "admin ack required and configmap gate does not have value", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{ + "ack-4.21-with-description": "Description.", + "ack-4.21-without-description": "", + }, + }, + ackCm: &defaultAckCm, + expectedRisks: []configv1.ConditionalUpdateRisk{ + { + Name: "AdminAck-ack-4.21-with-description", + Message: "Description.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + { + Name: "AdminAck-ack-4.21-without-description", + Message: "admin-gates configmap gate ack-4.21-without-description must contain a non-empty value.", + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + }, + }, + { + name: "multiple admin acks required", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{ + "ack-4.21-b": "Description 2.", + "ack-4.21-a": "Description 1.", + "ack-4.21-c": "Description 3.", + }, + }, + ackCm: &defaultAckCm, + expectedRisks: []configv1.ConditionalUpdateRisk{ + { + Name: "AdminAck-ack-4.21-a", + Message: "Description 1.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + { + Name: "AdminAck-ack-4.21-b", + Message: "Description 2.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + + { + Name: "AdminAck-ack-4.21-c", + Message: "Description 3.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + }, + }, + { + name: "admin ack found", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{"ack-4.21-test": "Description."}, + }, + ackCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigNamespace, Name: internal.AdminAcksConfigMap}, + Data: map[string]string{"ack-4.21-test": "true"}, + }, + }, + { + name: "admin ack 2 of 3 found", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{ + "ack-4.21-a": "Description 1.", + "ack-4.21-b": "Description 2.", + "ack-4.21-c": "Description 3.", + }, + }, + ackCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigNamespace, Name: internal.AdminAcksConfigMap}, + Data: map[string]string{"ack-4.21-a": "true", "ack-4.21-c": "true"}, + }, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAck-ack-4.21-b", + Message: "Description 2.", + URL: "https://example.com/FIXME-look-for-a-URI-in-the-message", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + { + name: "multiple admin acks found", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{ + "ack-4.21-a": "Description 1.", + "ack-4.21-b": "Description 2.", + "ack-4.21-c": "Description 3.", + }, + }, + ackCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigNamespace, Name: internal.AdminAcksConfigMap}, + Data: map[string]string{"ack-4.21-a": "true", "ack-4.21-b": "true", "ack-4.21-c": "true"}, + }, + }, + // delete tests are last so we don't have to re-create the config map for other tests + { + name: "admin-acks configmap not found", + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: internal.AdminGatesConfigMap}, + Data: map[string]string{"ack-4.21-a": "Description 1."}, + }, + // Name triggers deletion of config map + ackCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigNamespace, Name: "delete"}, + }, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAckUnknown", + Message: `Failed to retrieve ConfigMap admin-acks from namespace openshift-config: configmap "admin-acks" not found`, + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + expectedError: `configmap "admin-acks" not found`, + }, + // delete tests are last so we don't have to re-create the config map for other tests + { + name: "admin-gates configmap not found", + // Name triggers deletion of config map + gateCm: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: internal.ConfigManagedNamespace, Name: "delete"}, + }, + ackCm: nil, + expectedRisks: []configv1.ConditionalUpdateRisk{{ + Name: "AdminAckUnknown", + Message: `Failed to retrieve ConfigMap admin-gates from namespace openshift-config-managed: configmap "admin-gates" not found`, + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + expectedError: `configmap "admin-gates" not found`, + }, + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + watcherStarted := make(chan struct{}) + f := kfake.NewClientset() + + // A catch-all watch reactor that allows us to inject the watcherStarted channel. + f.PrependWatchReactor("*", func(action ktesting.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := f.Tracker().Watch(gvr, ns) + if err != nil { + return false, nil, err + } + close(watcherStarted) + return true, watch, nil + }) + cms := make(chan *corev1.ConfigMap, 1) + configManagedInformer := informers.NewSharedInformerFactory(f, 0) + cmInformer := configManagedInformer.Core().V1().ConfigMaps() + if _, err := cmInformer.Informer().AddEventHandler(&cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + cm := obj.(*corev1.ConfigMap) + t.Logf("cm added: %s/%s", cm.Namespace, cm.Name) + cms <- cm + }, + DeleteFunc: func(obj interface{}) { + cm := obj.(*corev1.ConfigMap) + t.Logf("cm deleted: %s/%s", cm.Namespace, cm.Name) + cms <- cm + }, + UpdateFunc: func(oldObj, newObj interface{}) { + cm := newObj.(*corev1.ConfigMap) + t.Logf("cm updated: %s/%s", cm.Namespace, cm.Name) + cms <- cm + }, + }); err != nil { + t.Errorf("error adding ConfigMap event handler: %v", err) + } + configManagedInformer.Start(ctx.Done()) + + _, err := f.CoreV1().ConfigMaps(internal.ConfigManagedNamespace).Create(ctx, &defaultGateCm, metav1.CreateOptions{}) + if err != nil { + t.Errorf("error injecting admin-gates configmap: %v", err) + } + waitForCm(t, cms) + _, err = f.CoreV1().ConfigMaps(internal.ConfigNamespace).Create(ctx, &defaultAckCm, metav1.CreateOptions{}) + if err != nil { + t.Errorf("error injecting admin-acks configmap: %v", err) + } + waitForCm(t, cms) + + for _, tt := range tests { + { + t.Run(tt.name, func(t *testing.T) { + source := New("AdminAck", func() configv1.Release { return configv1.Release{Version: "4.21.0"} }, cmInformer, cmInformer, nil) + if tt.gateCm != nil { + if tt.gateCm.Name == "delete" { + err := f.CoreV1().ConfigMaps(internal.ConfigManagedNamespace).Delete(ctx, internal.AdminGatesConfigMap, metav1.DeleteOptions{}) + if err != nil { + t.Errorf("error deleting configmap %s: %v", internal.AdminGatesConfigMap, err) + } + } else { + _, err = f.CoreV1().ConfigMaps(internal.ConfigManagedNamespace).Update(ctx, tt.gateCm, metav1.UpdateOptions{}) + if err != nil { + t.Errorf("error updating configmap %s: %v", internal.AdminGatesConfigMap, err) + } + } + waitForCm(t, cms) + } + if tt.ackCm != nil { + if tt.ackCm.Name == "delete" { + err := f.CoreV1().ConfigMaps(internal.ConfigNamespace).Delete(ctx, internal.AdminAcksConfigMap, metav1.DeleteOptions{}) + if err != nil { + t.Errorf("error deleting configmap %s: %v", internal.AdminAcksConfigMap, err) + } + } else { + _, err = f.CoreV1().ConfigMaps(internal.ConfigNamespace).Update(ctx, tt.ackCm, metav1.UpdateOptions{}) + if err != nil { + t.Errorf("error updating configmap admin-acks: %v", err) + } + } + waitForCm(t, cms) + } + + var expectedVersions map[string][]string + risks, versionMap, err := source.Risks(ctx, nil) + if diff := cmp.Diff(tt.expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + var actualError string + if err != nil { + actualError = strings.ReplaceAll(err.Error(), "\u00a0", " ") + } + if diff := cmp.Diff(tt.expectedError, actualError); diff != "" { + t.Errorf("error mismatch (-want +got):\n%s", diff) + } + }) + } + } +} + +func Test_gateApplicableToCurrentVersion(t *testing.T) { + tests := []struct { + name string + gateName string + cv string + wantErr bool + expectedResult bool + }{ + { + name: "gate name invalid format no dot", + gateName: "ack-4>8-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format 2 dots", + gateName: "ack-4..8-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format does not start with ack", + gateName: "ck-4.8-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format major version must be 4 or 5", + gateName: "ack-3.8-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format minor version must be a number", + gateName: "ack-4.x-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format no following dash", + gateName: "ack-4.8.1-foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format 2 following dashes", + gateName: "ack-4.x--foo", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name invalid format no description following dash", + gateName: "ack-4.x-", + cv: "4.8.1", + wantErr: true, + expectedResult: false, + }, + { + name: "gate name match", + gateName: "ack-4.8-foo", + cv: "4.8.1", + wantErr: false, + expectedResult: true, + }, + { + name: "gate name match big minor version", + gateName: "ack-4.123456-foo", + cv: "4.123456.0", + wantErr: false, + expectedResult: true, + }, + { + name: "gate name no match", + gateName: "ack-4.8-foo", + cv: "4.9.1", + wantErr: false, + expectedResult: false, + }, + { + name: "gate name no match multi digit minor", + gateName: "ack-4.8-foo", + cv: "4.80.1", + wantErr: false, + expectedResult: false, + }, + } + for _, tt := range tests { + { + t.Run(tt.name, func(t *testing.T) { + version, err := semver.Parse(tt.cv) + if err != nil { + t.Fatalf("unable to parse version %q: %v", tt.cv, err) + } + isApplicable, err := gateApplicableToCurrentVersion(tt.gateName, version) + if isApplicable != tt.expectedResult { + t.Errorf("gateApplicableToCurrentVersion() %s expected applicable %t, got applicable %t", tt.gateName, tt.expectedResult, isApplicable) + } + if err != nil && !tt.wantErr { + t.Errorf("gateApplicableToCurrentVersion() unexpected error: %v", err) + } else if err == nil && tt.wantErr { + t.Error("gateApplicableToCurrentVersion() unexpected success when an error was expected") + } + }) + } + } +} + +// waits for admin ack configmap changes +func waitForCm(t *testing.T, cmChan chan *corev1.ConfigMap) { + select { + case cm := <-cmChan: + t.Logf("Got configmap from channel: %s/%s", cm.Namespace, cm.Name) + case <-time.After(wait.ForeverTestTimeout): + t.Error("Informer did not get the configmap") + } +} diff --git a/pkg/risk/aggregate/aggregate.go b/pkg/risk/aggregate/aggregate.go new file mode 100644 index 000000000..f817f7179 --- /dev/null +++ b/pkg/risk/aggregate/aggregate.go @@ -0,0 +1,80 @@ +// Package aggregate implements an update risk source that aggregates +// lower-level update risk sources. package aggregate +package aggregate + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/risk" +) + +// aggregate implements an update risk source that aggregates +// lower-level update risk sources. +type aggregate struct { + sources []risk.Source +} + +func New(sources ...risk.Source) risk.Source { + return &aggregate{sources: sources} +} + +// Name returns the source's name. +func (a *aggregate) Name() string { + names := make([]string, 0, len(a.sources)) + for _, source := range a.sources { + names = append(names, source.Name()) + } + return fmt.Sprintf("Aggregate(%s)", strings.Join(names, ", ")) +} + +// Risks returns the current set of risks the source is aware of. +func (a *aggregate) Risks(ctx context.Context, versions []string) (risks []configv1.ConditionalUpdateRisk, versionMap map[string][]string, err error) { + var errs []error + riskMap := make(map[string]configv1.ConditionalUpdateRisk) + for _, source := range a.sources { + r, v, err := source.Risks(ctx, versions) + if err != nil { + errs = append(errs, err) // collect grumbles, but continue to process anything we got back + } + for i, risk := range r { + if existingRisk, ok := riskMap[risk.Name]; ok { + if diff := cmp.Diff(existingRisk, risk, cmpopts.IgnoreFields(configv1.ConditionalUpdateRisk{}, "Conditions")); diff != "" { + errs = append(errs, fmt.Errorf("divergent definitions of risk %q from %q:\n%s", risk.Name, source.Name(), diff)) + } + continue + } + riskMap[risk.Name] = r[i] + risks = append(risks, r[i]) + } + if len(v) > 0 && versionMap == nil { + versionMap = make(map[string][]string, len(v)) + } + for riskName, vs := range v { + if existingVersions, ok := versionMap[riskName]; ok { + for _, version := range vs { + found := false + for _, existingVersion := range existingVersions { + if version == existingVersion { + found = true + break + } + } + if !found { + existingVersions = append(existingVersions, version) + } + } + vs = existingVersions + } + versionMap[riskName] = vs + } + } + return risks, versionMap, errors.Join(errs...) +} diff --git a/pkg/risk/aggregate/aggregate_test.go b/pkg/risk/aggregate/aggregate_test.go new file mode 100644 index 000000000..f2407ec94 --- /dev/null +++ b/pkg/risk/aggregate/aggregate_test.go @@ -0,0 +1,133 @@ +package aggregate_test + +import ( + "context" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/aggregate" + "github.com/openshift/cluster-version-operator/pkg/risk/mock" +) + +func Test_aggregate(t *testing.T) { + tests := []struct { + name string + sources []risk.Source + versions []string + expectedName string + expectedRisks []configv1.ConditionalUpdateRisk + expectedVersions map[string][]string + expectedError string + }{ + { + name: "basic case", + sources: []risk.Source{ + &mock.Mock{ + InternalName: "MockA", + InternalRisks: []configv1.ConditionalUpdateRisk{{ + Name: "TestAlert", + Message: "Test summary.", + URL: "https://example.com/testAlert", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + &mock.Mock{ + InternalName: "MockB", + InternalRisks: []configv1.ConditionalUpdateRisk{{ + Name: "UpgradeableFalse", + Message: "Upgradeable summary.", + URL: "https://example.com/upgradeable", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + InternalVersions: map[string][]string{ + "UpgradeableFalse": {"2.3.4"}, + }, + }, + }, + versions: []string{"1.2.3", "2.3.4"}, + expectedName: "Aggregate(MockA, MockB)", + expectedRisks: []configv1.ConditionalUpdateRisk{ + { + Name: "TestAlert", + Message: "Test summary.", + URL: "https://example.com/testAlert", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + { + Name: "UpgradeableFalse", + Message: "Upgradeable summary.", + URL: "https://example.com/upgradeable", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + }, + expectedVersions: map[string][]string{ + "TestAlert": {"1.2.3", "2.3.4"}, + "UpgradeableFalse": {"2.3.4"}, + }, + }, + { + name: "matching duplicates", + sources: []risk.Source{ + &mock.Mock{ + InternalName: "MockA", + InternalRisks: []configv1.ConditionalUpdateRisk{{ + Name: "TestAlert", + Message: "Test summary.", + URL: "https://example.com/testAlert", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + &mock.Mock{ + InternalName: "MockB", + InternalRisks: []configv1.ConditionalUpdateRisk{{ + Name: "TestAlert", + Message: "Test summary.", + URL: "https://example.com/testAlert", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, + }, + }, + versions: []string{"1.2.3"}, + expectedName: "Aggregate(MockA, MockB)", + expectedRisks: []configv1.ConditionalUpdateRisk{ + { + Name: "TestAlert", + Message: "Test summary.", + URL: "https://example.com/testAlert", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }, + }, + expectedVersions: map[string][]string{ + "TestAlert": {"1.2.3"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + source := aggregate.New(tt.sources...) + name := source.Name() + if name != tt.expectedName { + t.Errorf("unexpected name %q diverges from expected %q", name, tt.expectedName) + } + risks, versions, err := source.Risks(context.Background(), tt.versions) + if diff := cmp.Diff(tt.expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tt.expectedVersions, versions); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + var actualError string + if err != nil { + actualError = strings.ReplaceAll(err.Error(), "\u00a0", " ") + } + if diff := cmp.Diff(tt.expectedError, actualError); diff != "" { + t.Errorf("error mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/pkg/risk/deletion/deletion.go b/pkg/risk/deletion/deletion.go new file mode 100644 index 000000000..a7f0d1e79 --- /dev/null +++ b/pkg/risk/deletion/deletion.go @@ -0,0 +1,76 @@ +// Package deletion implements an update risk source based on +// in-progress resource deletion. +package deletion + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/blang/semver/v4" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/lib/resourcedelete" + "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" +) + +type deletion struct { + name string + currentVersion func() configv1.Release +} + +// New returns a new update-risk source, tracking in-progress resource deletion. +func New(name string, currentVersion func() configv1.Release) risk.Source { + return &deletion{name: name, currentVersion: currentVersion} +} + +// Name returns the source's name. +func (d *deletion) Name() string { + return d.name +} + +// Risks returns the current set of risks the source is aware of. +func (d *deletion) Risks(_ context.Context, versions []string) ([]configv1.ConditionalUpdateRisk, map[string][]string, error) { + currentRelease := d.currentVersion() + version, err := semver.Parse(currentRelease.Version) + if err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", d.name), + Message: fmt.Sprintf("Failed to parse cluster version to check resource deletion: %v", err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, nil, err + } + + var risks []configv1.ConditionalUpdateRisk + if deletes := resourcedelete.DeletesInProgress(); len(deletes) > 0 { + sort.Strings(deletes) + resources := strings.Join(deletes, ",") + risks = append(risks, configv1.ConditionalUpdateRisk{ + Name: d.name, + Message: fmt.Sprintf("Cluster minor or major version upgrades are not allowed while resource deletions are in progress; resources=%s", resources), + URL: fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%d.%d/html/updating_clusters/understanding-openshift-updates-1#update-manifest-application_how-updates-work", version.Major, version.Minor), + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }) + } + + if len(risks) == 0 { + return nil, nil, nil + } + + majorAndMinorUpdates := upgradeable.MajorAndMinorUpdates(version, versions) + if len(majorAndMinorUpdates) == 0 { + return risks, nil, err + } + + versionMap := make(map[string][]string, len(risks)) + for _, risk := range risks { + versionMap[risk.Name] = majorAndMinorUpdates + } + + return risks, versionMap, err + +} diff --git a/pkg/risk/overrides/overrides.go b/pkg/risk/overrides/overrides.go new file mode 100644 index 000000000..b256946e8 --- /dev/null +++ b/pkg/risk/overrides/overrides.go @@ -0,0 +1,113 @@ +// Package overrides declares a risk of updating to the next major +// or minor release when ClusterVersion overrides are in place. +package overrides + +import ( + "context" + "fmt" + + "github.com/blang/semver/v4" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + configv1 "github.com/openshift/api/config/v1" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1" + configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" +) + +type overrides struct { + name string + cvName string + cvLister configlistersv1.ClusterVersionLister + sawOverrides *bool +} + +// New returns a new update-risk source, tracking the use of ClusterVersion overrides. +func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) risk.Source { + cvLister := cvInformer.Lister() + source := &overrides{name: name, cvName: cvName, cvLister: cvLister} + cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, + }) + return source +} + +// Name returns the source's name. +func (o *overrides) Name() string { + return o.name +} + +// Risks returns the current set of risks the source is aware of. +func (o *overrides) Risks(_ context.Context, versions []string) ([]configv1.ConditionalUpdateRisk, map[string][]string, error) { + cv, overrides, err := o.hasOverrides() + if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) { + return nil, nil, nil + } + + o.sawOverrides = &overrides + if !overrides { + return nil, nil, nil + } + + version, err := semver.Parse(cv.Status.Desired.Version) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse current target %q to SemVer: %w", cv.Status.Desired.Version, err) + } + + risk := configv1.ConditionalUpdateRisk{ + Name: o.name, + Message: "Disabling ownership via cluster version overrides prevents updates between minor or major versions. Please remove overrides before requesting a minor or major version update.", + URL: fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%d.%d/html/config_apis/clusterversion-config-openshift-io-v1#spec-8", version.Major, version.Minor), + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + } + + majorAndMinorUpdates := upgradeable.MajorAndMinorUpdates(version, versions) + + var versionMap map[string][]string + if len(majorAndMinorUpdates) > 0 { + versionMap = map[string][]string{risk.Name: majorAndMinorUpdates} + } + + return []configv1.ConditionalUpdateRisk{risk}, versionMap, nil +} + +func (o *overrides) hasOverrides() (*configv1.ClusterVersion, bool, error) { + cv, err := o.cvLister.Get(o.cvName) + if err != nil { + return cv, false, err + } + + overrides := false + for _, o := range cv.Spec.Overrides { + if o.Unmanaged { + overrides = true + } + } + return cv, overrides, nil +} + +func (o *overrides) eventHandler(changeCallback func()) { + if o.sawOverrides == nil { + if changeCallback != nil { + changeCallback() + } + return + } + _, overrides, err := o.hasOverrides() + if err != nil { + return + } + if overrides != *o.sawOverrides { + klog.V(2).Infof("ClusterVersion overrides changed from %t to %t, triggering override callback", *o.sawOverrides, overrides) + if changeCallback != nil { + changeCallback() + } + } +} diff --git a/pkg/risk/overrides/overrides_test.go b/pkg/risk/overrides/overrides_test.go new file mode 100644 index 000000000..046ce040e --- /dev/null +++ b/pkg/risk/overrides/overrides_test.go @@ -0,0 +1,169 @@ +package overrides_test + +import ( + "context" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/cache" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/client-go/config/clientset/versioned/fake" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions" + + "github.com/openshift/cluster-version-operator/pkg/risk/overrides" +) + +func Test_New(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cv := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-version", + }, + Status: configv1.ClusterVersionStatus{ + Desired: configv1.Release{Version: "4.21.0"}, + }, + } + fakeClient := fake.NewClientset(cv) + informerFactory := configinformersv1.NewSharedInformerFactory(fakeClient, 0) + cvInformer := informerFactory.Config().V1().ClusterVersions() + + changeChannel := make(chan struct{}) + changeCallback := func() { + t.Logf("%s sending change notification", time.Now()) + changeChannel <- struct{}{} + } + + versions := []string{"4.21.1", "4.22.0", "5.0.0"} + + expectedName := "ClusterVersionOverrides" + source := overrides.New(expectedName, cv.Name, cvInformer, changeCallback) + + t.Run("name", func(t *testing.T) { + if name := source.Name(); name != expectedName { + t.Errorf("unexpected name %q diverges from expected %q", name, expectedName) + } + }) + + informerFactory.Start(ctx.Done()) + cache.WaitForCacheSync(ctx.Done(), cvInformer.Informer().HasSynced) + + drained := false + for !drained { // drain any early notifications + select { + case <-changeChannel: + t.Log("received early change notification") + continue + default: + t.Log("early change notifications drained") + drained = true + } + } + + t.Run("quick change notification on overrides set", func(t *testing.T) { + cv.Spec.Overrides = []configv1.ComponentOverride{{ + Kind: "Deployment", + Namespace: "example", + Name: "test", + Unmanaged: true, + }} + + t.Log("updating the ClusterVersion to set overrides, which should trigger a quick change") + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterVersion change") + } + + t.Run("expected Risks output", func(t *testing.T) { + expectedRisks := []configv1.ConditionalUpdateRisk{{ + Name: "ClusterVersionOverrides", + Message: "Disabling ownership via cluster version overrides prevents updates between minor or major versions. Please remove overrides before requesting a minor or major version update.", + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/4.21/html/config_apis/clusterversion-config-openshift-io-v1#spec-8", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }} + expectedVersions := map[string][]string{ + "ClusterVersionOverrides": {"4.22.0", "5.0.0"}, + } + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) + + t.Run("no change notification on unrelated change", func(t *testing.T) { + cv.Spec.Channel = "testing" + + t.Log("updating the ClusterVersion channel, which should not trigger a change") + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Fatalf("received change notification after %s", end.Sub(start)) + case <-time.After(1 * time.Second): + t.Logf("did not receive change notification within one second after ClusterVersion change") + } + }) + + t.Run("quick change notification on overrides unset", func(t *testing.T) { + if cv.Spec.Overrides == nil { + t.Skip("cannot test change-to-unset unless the earlier change-to-set test case ran successfully") + } + cv.Spec.Overrides = nil + + t.Log("updating the ClusterVersion to unset overrides, which should trigger a change") + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterVersion change") + } + + t.Run("expected Risks output", func(t *testing.T) { + var expectedRisks []configv1.ConditionalUpdateRisk + var expectedVersions map[string][]string + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) +} diff --git a/pkg/risk/risk.go b/pkg/risk/risk.go index 065999a0b..b75d29392 100644 --- a/pkg/risk/risk.go +++ b/pkg/risk/risk.go @@ -23,7 +23,7 @@ import ( ) // validName enforces the non-empty CamelCase risk-naming convention. -var validName = regexp.MustCompile(`^[A-Z][A-Za-z0-9]*$`) +var validName = regexp.MustCompile(`^[A-Z][A-Za-z0-9.-]*$`) // Source represents a risk source, like the update service's // conditionalEdges. Or firing in-cluster alerts. Or another source. diff --git a/pkg/risk/risk_test.go b/pkg/risk/risk_test.go index c0b3638a5..b13d6436a 100644 --- a/pkg/risk/risk_test.go +++ b/pkg/risk/risk_test.go @@ -112,7 +112,7 @@ func Test_Merge(t *testing.T) { }}, }, expectedUpdates: []configv1.Release{{Version: "1.2.3"}}, - expectedError: `invalid risk name "" does not match "^[A-Z][A-Za-z0-9]*$"`, + expectedError: `invalid risk name "" does not match "^[A-Z][A-Za-z0-9.-]*$"`, }, { name: "single-source collision", diff --git a/pkg/risk/updating/updating.go b/pkg/risk/updating/updating.go new file mode 100644 index 000000000..c1c8f4c1c --- /dev/null +++ b/pkg/risk/updating/updating.go @@ -0,0 +1,111 @@ +// Package update declares a risk of updating to a later major or +// minor version when an update is currently in progress. +package updating + +import ( + "context" + "fmt" + + "github.com/blang/semver/v4" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + configv1 "github.com/openshift/api/config/v1" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1" + configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" + + "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/risk" + "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" +) + +type updating struct { + name string + cvName string + cvLister configlistersv1.ClusterVersionLister + wasUpdating *bool +} + +// New returns a new updating-risk source, tracking whether an update is in progress. +func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) risk.Source { + cvLister := cvInformer.Lister() + source := &updating{name: name, cvName: cvName, cvLister: cvLister} + cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, + }) + return source +} + +// Name returns the source's name. +func (u *updating) Name() string { + return u.name +} + +// Risks returns the current set of risks the source is aware of. +func (u *updating) Risks(_ context.Context, versions []string) ([]configv1.ConditionalUpdateRisk, map[string][]string, error) { + cv, updating, err := u.isUpdating() + if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) { + return nil, nil, nil + } else if err != nil { + return nil, nil, err + } + + u.wasUpdating = &updating + if !updating { + return nil, nil, nil + } + + version, err := semver.Parse(cv.Status.Desired.Version) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse current target %q to SemVer: %w", cv.Status.Desired.Version, err) + } + + risk := configv1.ConditionalUpdateRisk{ + Name: u.name, + Message: fmt.Sprintf("An update is already in progress and the details are in the %s condition.", configv1.OperatorProgressing), + URL: fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%d.%d/html/updating_clusters/understanding-openshift-updates-1#understanding_clusteroperator_conditiontypes_understanding-openshift-updates", version.Major, version.Minor), + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + } + + majorAndMinorUpdates := upgradeable.MajorAndMinorUpdates(version, versions) + + var versionMap map[string][]string + if len(majorAndMinorUpdates) > 0 { + versionMap = map[string][]string{risk.Name: majorAndMinorUpdates} + } + + return []configv1.ConditionalUpdateRisk{risk}, versionMap, nil +} + +func (u *updating) isUpdating() (*configv1.ClusterVersion, bool, error) { + cv, err := u.cvLister.Get(u.cvName) + if err != nil { + return cv, false, err + } + + updating := resourcemerge.IsOperatorStatusConditionTrue(cv.Status.Conditions, configv1.OperatorProgressing) + return cv, updating, nil +} + +func (u *updating) eventHandler(changeCallback func()) { + if u.wasUpdating == nil { + if changeCallback != nil { + changeCallback() + } + return + } + _, updating, err := u.isUpdating() + if err != nil { + return + } + if updating != *u.wasUpdating { + klog.V(2).Infof("ClusterVersion updating changed from %t to %t, triggering override callback", *u.wasUpdating, updating) + if changeCallback != nil { + changeCallback() + } + } +} diff --git a/pkg/risk/updating/updating_test.go b/pkg/risk/updating/updating_test.go new file mode 100644 index 000000000..f9a37bacb --- /dev/null +++ b/pkg/risk/updating/updating_test.go @@ -0,0 +1,190 @@ +package updating_test + +import ( + "context" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/cache" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/client-go/config/clientset/versioned/fake" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions" + + "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/risk/updating" +) + +func Test_New(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cv := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-version", + }, + Status: configv1.ClusterVersionStatus{ + Desired: configv1.Release{Version: "4.21.0"}, + }, + } + fakeClient := fake.NewClientset(cv) + informerFactory := configinformersv1.NewSharedInformerFactory(fakeClient, 0) + cvInformer := informerFactory.Config().V1().ClusterVersions() + + changeChannel := make(chan struct{}) + changeCallback := func() { + t.Logf("%s sending change notification", time.Now()) + changeChannel <- struct{}{} + } + + versions := []string{"4.21.1", "4.22.0", "5.0.0"} + + expectedName := "ClusterVersionUpdating" + source := updating.New(expectedName, cv.Name, cvInformer, changeCallback) + + t.Run("name", func(t *testing.T) { + if name := source.Name(); name != expectedName { + t.Errorf("unexpected name %q diverges from expected %q", name, expectedName) + } + }) + + informerFactory.Start(ctx.Done()) + cache.WaitForCacheSync(ctx.Done(), cvInformer.Informer().HasSynced) + + drained := false + for !drained { // drain any early notifications + select { + case <-changeChannel: + t.Log("received early change notification") + continue + default: + t.Log("early change notifications drained") + drained = true + } + } + + t.Run("empty conditions", func(t *testing.T) { + var expectedRisks []configv1.ConditionalUpdateRisk + var expectedVersions map[string][]string + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + + t.Run("quick change notification on progressing true", func(t *testing.T) { + cv.Status.Conditions = []configv1.ClusterOperatorStatusCondition{{ + Type: configv1.OperatorProgressing, + Status: configv1.ConditionTrue, + Reason: "UpdateInProgress", + Message: "An update is already in progress and the details are in the Progressing condition.", + }} + + t.Logf("updating the ClusterVersion to set %s=%s, which should trigger a quick change", configv1.OperatorProgressing, configv1.ConditionTrue) + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().UpdateStatus(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterVersion change") + } + + t.Run("expected Risks output", func(t *testing.T) { + expectedRisks := []configv1.ConditionalUpdateRisk{{ + Name: "ClusterVersionUpdating", + Message: "An update is already in progress and the details are in the Progressing condition.", + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/4.21/html/updating_clusters/understanding-openshift-updates-1#understanding_clusteroperator_conditiontypes_understanding-openshift-updates", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }} + expectedVersions := map[string][]string{ + "ClusterVersionUpdating": {"4.22.0", "5.0.0"}, + } + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) + + t.Run("no change notification on unrelated change", func(t *testing.T) { + cv.Status.Desired.URL = "https://example.com/release-notes" + + t.Log("updating the ClusterVersion desired release's release notes, which should not trigger a change") + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().UpdateStatus(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Fatalf("received change notification after %s", end.Sub(start)) + case <-time.After(1 * time.Second): + t.Logf("did not receive change notification within one second after ClusterVersion change") + } + }) + + t.Run("quick change notification on progressing false", func(t *testing.T) { + if !resourcemerge.IsOperatorStatusConditionTrue(cv.Status.Conditions, configv1.OperatorProgressing) { + t.Skip("cannot test change-to-unset unless the earlier change-to-set test case ran successfully") + } + resourcemerge.SetOperatorStatusCondition(&cv.Status.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorProgressing, + Status: configv1.ConditionFalse, + Reason: "AsExpected", + Message: "The cluster is running 4.21.0.", + }) + + t.Log("updating the ClusterVersion to unset overrides, which should trigger a change") + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterVersions().UpdateStatus(ctx, cv, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterVersion: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterVersion change") + } + + t.Run("expected Risks output", func(t *testing.T) { + var expectedRisks []configv1.ConditionalUpdateRisk + var expectedVersions map[string][]string + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) +} diff --git a/pkg/risk/upgradeable/upgradeable.go b/pkg/risk/upgradeable/upgradeable.go new file mode 100644 index 000000000..20d7a4bcf --- /dev/null +++ b/pkg/risk/upgradeable/upgradeable.go @@ -0,0 +1,136 @@ +// Package upgradeable implements an update risk source based on ClusterOperator +// Upgradeable conditions. +package upgradeable + +import ( + "context" + "fmt" + "slices" + "strings" + + "github.com/blang/semver/v4" + "github.com/google/go-cmp/cmp" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + configv1 "github.com/openshift/api/config/v1" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1" + configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" + + "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/risk" +) + +type upgradeable struct { + name string + currentVersion func() configv1.Release + coLister configlistersv1.ClusterOperatorLister + lastSeen []configv1.ConditionalUpdateRisk +} + +// New returns a new update-risk source, tracking ClusterOperator Upgradeable conditions. +func New(name string, currentVersion func() configv1.Release, coInformer configinformersv1.ClusterOperatorInformer, changeCallback func()) risk.Source { + coLister := coInformer.Lister() + source := &upgradeable{name: name, currentVersion: currentVersion, coLister: coLister} + coInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, + DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, + }) + return source +} + +// Name returns the source's name. +func (u *upgradeable) Name() string { + return u.name +} + +// Risks returns the current set of risks the source is aware of. +func (u *upgradeable) Risks(_ context.Context, versions []string) ([]configv1.ConditionalUpdateRisk, map[string][]string, error) { + risks, version, err := u.risks() + if meta.IsNoMatchError(err) { + u.lastSeen = nil + return nil, nil, nil + } + + if len(risks) == 0 { + u.lastSeen = nil + return nil, nil, err + } + + u.lastSeen = risks + majorAndMinorUpdates := MajorAndMinorUpdates(version, versions) + if len(majorAndMinorUpdates) == 0 { + return risks, nil, err + } + + versionMap := make(map[string][]string, len(risks)) + for _, risk := range risks { + versionMap[risk.Name] = majorAndMinorUpdates + } + + return risks, versionMap, err +} + +func (u *upgradeable) risks() ([]configv1.ConditionalUpdateRisk, semver.Version, error) { + currentRelease := u.currentVersion() + version, err := semver.Parse(currentRelease.Version) + if err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", u.name), + Message: fmt.Sprintf("Failed to parse cluster version to check Upgradeable conditions: %v", err), + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, version, err + } + + ops, err := u.coLister.List(labels.Everything()) + if meta.IsNoMatchError(err) { + return nil, version, nil + } + if err != nil { + return []configv1.ConditionalUpdateRisk{{ + Name: fmt.Sprintf("%sUnknown", u.name), + Message: fmt.Sprintf("Failed to retrieve ClusterOperators to check Upgradeable conditions: %v", err), + URL: fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%d.%d/html/updating_clusters/understanding-openshift-updates-1#understanding_clusteroperator_conditiontypes_understanding-openshift-updates", version.Major, version.Minor), + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }}, version, err + } + + var risks []configv1.ConditionalUpdateRisk + for _, op := range ops { + if up := resourcemerge.FindOperatorStatusCondition(op.Status.Conditions, configv1.OperatorUpgradeable); up != nil && up.Status == configv1.ConditionFalse { + risks = append(risks, configv1.ConditionalUpdateRisk{ + Name: fmt.Sprintf("%s-%s", u.name, op.Name), + Message: up.Message, + URL: fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%d.%d/html/updating_clusters/understanding-openshift-updates-1#understanding_clusteroperator_conditiontypes_understanding-openshift-updates", version.Major, version.Minor), + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }) + } + } + + slices.SortFunc(risks, func(a, b configv1.ConditionalUpdateRisk) int { + return strings.Compare(a.Name, b.Name) + }) + + return risks, version, nil +} + +func (u *upgradeable) eventHandler(changeCallback func()) { + risks, _, err := u.risks() + if err != nil { + if changeCallback != nil { + changeCallback() + } + return + } + if diff := cmp.Diff(u.lastSeen, risks); diff != "" { + klog.V(2).Infof("ClusterOperator Upgradeable changed (-old +new):\n%s", diff) + if changeCallback != nil { + changeCallback() + } + } +} diff --git a/pkg/risk/upgradeable/upgradeable_test.go b/pkg/risk/upgradeable/upgradeable_test.go new file mode 100644 index 000000000..4250a8bb6 --- /dev/null +++ b/pkg/risk/upgradeable/upgradeable_test.go @@ -0,0 +1,194 @@ +package upgradeable_test + +import ( + "context" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/cache" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/client-go/config/clientset/versioned/fake" + configinformersv1 "github.com/openshift/client-go/config/informers/externalversions" + + "github.com/openshift/cluster-version-operator/lib/resourcemerge" + "github.com/openshift/cluster-version-operator/pkg/risk/upgradeable" +) + +func Test_New(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + co := &configv1.ClusterOperator{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-operator", + }, + Status: configv1.ClusterOperatorStatus{ + Conditions: []configv1.ClusterOperatorStatusCondition{ + { + Type: configv1.OperatorUpgradeable, + Status: configv1.ConditionTrue, + }, + }, + }, + } + fakeClient := fake.NewClientset(co) + informerFactory := configinformersv1.NewSharedInformerFactory(fakeClient, 0) + coInformer := informerFactory.Config().V1().ClusterOperators() + + changeChannel := make(chan struct{}) + changeCallback := func() { + t.Logf("%s sending change notification", time.Now()) + changeChannel <- struct{}{} + } + + currentVersion := func() configv1.Release { + return configv1.Release{Version: "4.21.0"} + } + versions := []string{"4.21.1", "4.22.0", "5.0.0"} + + expectedName := "ClusterOperatorUpgradeable" + source := upgradeable.New(expectedName, currentVersion, coInformer, changeCallback) + + t.Run("name", func(t *testing.T) { + if name := source.Name(); name != expectedName { + t.Errorf("unexpected name %q diverges from expected %q", name, expectedName) + } + }) + + informerFactory.Start(ctx.Done()) + cache.WaitForCacheSync(ctx.Done(), coInformer.Informer().HasSynced) + + drained := false + for !drained { // drain any early notifications + select { + case <-changeChannel: + t.Log("received early change notification") + continue + default: + t.Log("early change notifications drained") + drained = true + } + } + + t.Run("quick change notification on Upgradeable change to False", func(t *testing.T) { + resourcemerge.SetOperatorStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorUpgradeable, + Status: configv1.ConditionFalse, + Reason: "TestReason", + Message: "Cluster is not upgradeable for testing.", + }) + + select { + case <-changeChannel: + t.Fatalf("updating the local pointer triggered a change notification, when it should have taken until the fake client update") + case <-time.After(1 * time.Second): + t.Logf("updating the ClusterOperator to %s=%s to trigger a change", configv1.OperatorUpgradeable, configv1.ConditionFalse) + } + start := time.Now() + + _, err := fakeClient.ConfigV1().ClusterOperators().Update(ctx, co, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterOperator: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterOperator change") + } + + t.Run("expected Risks output", func(t *testing.T) { + expectedRisks := []configv1.ConditionalUpdateRisk{{ + Name: "ClusterOperatorUpgradeable-test-operator", + Message: "Cluster is not upgradeable for testing.", + URL: "https://docs.redhat.com/en/documentation/openshift_container_platform/4.21/html/updating_clusters/understanding-openshift-updates-1#understanding_clusteroperator_conditiontypes_understanding-openshift-updates", + MatchingRules: []configv1.ClusterCondition{{Type: "Always"}}, + }} + expectedVersions := map[string][]string{ + "ClusterOperatorUpgradeable-test-operator": {"4.22.0", "5.0.0"}, + } + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) + + t.Run("no change notification on unrelated change", func(t *testing.T) { + resourcemerge.SetOperatorStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorAvailable, + Status: configv1.ConditionFalse, + Reason: "TestReason", + Message: "Cluster is not available for testing.", + }) + + t.Logf("updating the ClusterOperator to %s=%s, which should not trigger a change", configv1.OperatorAvailable, configv1.ConditionFalse) + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterOperators().Update(ctx, co, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterOperator: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Fatalf("received change notification after %s", end.Sub(start)) + case <-time.After(1 * time.Second): + t.Logf("did not receive change notification within one second after ClusterOperator change") + } + }) + + t.Run("quick change notification on Upgradeable change to True", func(t *testing.T) { + if resourcemerge.IsOperatorStatusConditionTrue(co.Status.Conditions, configv1.OperatorUpgradeable) { + t.Skip("cannot test change-to-True unless the earlier change-to-False test case ran successfully") + } + resourcemerge.SetOperatorStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{ + Type: configv1.OperatorUpgradeable, + Status: configv1.ConditionTrue, + Reason: "TestReason", + Message: "Cluster is upgradeable for testing.", + }) + + t.Logf("updating the ClusterOperator to %s=%s, which should trigger a change", configv1.OperatorUpgradeable, configv1.ConditionTrue) + start := time.Now() + _, err := fakeClient.ConfigV1().ClusterOperators().Update(ctx, co, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterOperator: %v", err) + } + + select { + case <-changeChannel: + end := time.Now() + t.Logf("received change notification %s after %s", end, end.Sub(start)) + case <-time.After(1 * time.Second): + t.Fatalf("did not receive change notification within one second after ClusterOperator change") + } + + t.Run("expected Risks output", func(t *testing.T) { + var expectedRisks []configv1.ConditionalUpdateRisk + var expectedVersions map[string][]string + risks, versionsMap, err := source.Risks(ctx, versions) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if diff := cmp.Diff(expectedRisks, risks); diff != "" { + t.Errorf("risk mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(expectedVersions, versionsMap); diff != "" { + t.Errorf("versions mismatch (-want +got):\n%s", diff) + } + }) + }) +} diff --git a/pkg/risk/upgradeable/util.go b/pkg/risk/upgradeable/util.go new file mode 100644 index 000000000..1b6561a6a --- /dev/null +++ b/pkg/risk/upgradeable/util.go @@ -0,0 +1,24 @@ +package upgradeable + +import ( + "github.com/blang/semver/v4" +) + +// MajorAndMinorUpdates returns all versions that are a major or minor +// version beyond the current version. Versions that cannot be parsed +// as SemVer are included as well. +func MajorAndMinorUpdates(currentVersion semver.Version, versions []string) []string { + var majorAndMinorUpdates []string + for _, vs := range versions { + vp, err := semver.Parse(vs) + if err != nil { + majorAndMinorUpdates = append(majorAndMinorUpdates, vs) + continue + } + if vp.Major > currentVersion.Major || + (vp.Major == currentVersion.Major && vp.Minor > currentVersion.Minor) { + majorAndMinorUpdates = append(majorAndMinorUpdates, vs) + } + } + return majorAndMinorUpdates +} From 354542684f4cbde94bd5969640a2a14c7e088f88 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 6 Apr 2026 10:47:49 -0700 Subject: [PATCH 3/4] pkg/cvo: Wait until ConfigMap informers have synced Avoid startup-time "hey, I can't find those ConfigMaps!" complaints from pkg/risk/adminack by waiting until the informers have synchronized. The pkg/cvo Operator type has a cacheSynced that's populated in Operator.New (with the ClusterVersion, ClusterOperator, and FeatureGate informers). cacheSynced goes all the way back to 648d27cae7 (metrics: Report a gauge for when cluster operator conditions change, 2019-01-31, #107). But when we added the initial ConfigMap informer as cmLister back in c14773282d (metrics: add cluster_installer series, 2019-06-26, #213) we didn't add that informer to cacheSynced. And when I added cmManagedInformer in d18deeee6d (pkg/cvo: Separate ConfigMap informer for openshift-config-managed, 2020-08-21, #441), I didn't add that informer to cacheSynced. None of these earlier commits explain why the ConfigMap informers were not included in cacheSynced, so I'm assuming that was oversight, and catching up now. There is a risk that the ConfigMap informers fail to synchronize quickly, but I'm having trouble imagining a situation where that would happen but the ClusterVersion and ClusterOperator informers wouldn't also fail to synchronize. And either way, we don't serve metrics while we wait on this cache synchronization, so the critical ClusterVersionOperatorDown alert will be firing within 10 minutes, and the responding system administrators can sort out whatever was giving the cache-sync logic trouble. --- pkg/cvo/cvo.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/cvo/cvo.go b/pkg/cvo/cvo.go index b2a1abb1d..2e5e1b63c 100644 --- a/pkg/cvo/cvo.go +++ b/pkg/cvo/cvo.go @@ -296,7 +296,9 @@ func New( optr.proxyLister = proxyInformer.Lister() optr.cmConfigLister = cmConfigInformer.Lister().ConfigMaps(internal.ConfigNamespace) + optr.cacheSynced = append(optr.cacheSynced, cmConfigInformer.Informer().HasSynced) optr.cmConfigManagedLister = cmConfigManagedInformer.Lister().ConfigMaps(internal.ConfigManagedNamespace) + optr.cacheSynced = append(optr.cacheSynced, cmConfigManagedInformer.Informer().HasSynced) optr.featureGateLister = featureGateInformer.Lister() optr.cacheSynced = append(optr.cacheSynced, featureGateInformer.Informer().HasSynced) From f5453e7edd567f39109478d46467076065106ae7 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 6 Apr 2026 12:21:52 -0700 Subject: [PATCH 4/4] pkg/cvo: Surface errors in update-risk detector creation vendor/k8s.io/client-go/tools/cache/shared_informer.go has: func (s *sharedIndexInformer) AddEventHandlerWithOptions(handler ResourceEventHandler, options HandlerOptions) (ResourceEventHandlerRegistration, error) { s.startedLock.Lock() defer s.startedLock.Unlock() if s.stopped { return nil, fmt.Errorf("handler %v was not added to shared informer because it has stopped already", handler) } which might be the only reason AddEeventHandler could fail. But regardless, it is still good practice to bubble things up and fail fast, to summon a cluster administrator to help sort out whatever the issue is. --- pkg/cvo/cvo.go | 32 ++++++++++++++++++------ pkg/risk/adminack/adminack.go | 9 ++++--- pkg/risk/adminack/adminack_test.go | 5 +++- pkg/risk/overrides/overrides.go | 6 ++--- pkg/risk/overrides/overrides_test.go | 5 +++- pkg/risk/updating/updating.go | 6 ++--- pkg/risk/updating/updating_test.go | 5 +++- pkg/risk/upgradeable/upgradeable.go | 6 ++--- pkg/risk/upgradeable/upgradeable_test.go | 5 +++- 9 files changed, 55 insertions(+), 24 deletions(-) diff --git a/pkg/cvo/cvo.go b/pkg/cvo/cvo.go index 2e5e1b63c..72b87b229 100644 --- a/pkg/cvo/cvo.go +++ b/pkg/cvo/cvo.go @@ -305,13 +305,31 @@ func New( // make sure this is initialized after all the listers are initialized riskSourceCallback := func() { optr.availableUpdatesQueue.Add(optr.queueKey()) } - optr.upgradeable = aggregate.New( - updatingrisk.New("ClusterVersionUpdating", optr.name, cvInformer, riskSourceCallback), - overrides.New("ClusterVersionOverrides", optr.name, cvInformer, riskSourceCallback), - deletionrisk.New("ResourceDeletionInProgress", optr.currentVersion), - adminack.New("AdminAck", optr.currentVersion, cmConfigManagedInformer, cmConfigInformer, riskSourceCallback), - upgradeablerisk.New("ClusterOperatorUpgradeable", optr.currentVersion, coInformer, riskSourceCallback), - ) + + risks := []risk.Source{} + if source, err := updatingrisk.New("ClusterVersionUpdating", optr.name, cvInformer, riskSourceCallback); err != nil { + return optr, err + } else { + risks = append(risks, source) + } + if source, err := overrides.New("ClusterVersionOverrides", optr.name, cvInformer, riskSourceCallback); err != nil { + return optr, err + } else { + risks = append(risks, source) + } + risks = append(risks, deletionrisk.New("ResourceDeletionInProgress", optr.currentVersion)) + if source, err := adminack.New("AdminAck", optr.currentVersion, cmConfigManagedInformer, cmConfigInformer, riskSourceCallback); err != nil { + return optr, err + } else { + risks = append(risks, source) + } + if source, err := upgradeablerisk.New("ClusterOperatorUpgradeable", optr.currentVersion, coInformer, riskSourceCallback); err != nil { + return optr, err + } else { + risks = append(risks, source) + } + + optr.upgradeable = aggregate.New(risks...) optr.risks = aggregate.New( alert.New("Alert", promqlTarget), diff --git a/pkg/risk/adminack/adminack.go b/pkg/risk/adminack/adminack.go index 7dd496391..ddea729ad 100644 --- a/pkg/risk/adminack/adminack.go +++ b/pkg/risk/adminack/adminack.go @@ -4,6 +4,7 @@ package adminack import ( "context" + "errors" "fmt" "regexp" "slices" @@ -41,21 +42,21 @@ type adminAck struct { } // New returns a new update-risk source, tracking administrator acknowledgements. -func New(name string, currentVersion func() configv1.Release, adminGatesInformer, adminAcksInformer informerscorev1.ConfigMapInformer, changeCallback func()) risk.Source { +func New(name string, currentVersion func() configv1.Release, adminGatesInformer, adminAcksInformer informerscorev1.ConfigMapInformer, changeCallback func()) (risk.Source, error) { adminGatesLister := adminGatesInformer.Lister().ConfigMaps(internal.ConfigManagedNamespace) adminAcksLister := adminAcksInformer.Lister().ConfigMaps(internal.ConfigNamespace) source := &adminAck{name: name, currentVersion: currentVersion, adminGatesLister: adminGatesLister, adminAcksLister: adminAcksLister} - adminGatesInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err1 := adminGatesInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, }) - adminAcksInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err2 := adminAcksInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, }) - return source + return source, errors.Join(err1, err2) } // Name returns the source's name. diff --git a/pkg/risk/adminack/adminack_test.go b/pkg/risk/adminack/adminack_test.go index 04ae3fea0..e52fd0380 100644 --- a/pkg/risk/adminack/adminack_test.go +++ b/pkg/risk/adminack/adminack_test.go @@ -285,7 +285,10 @@ func TestOperator_upgradeableSync(t *testing.T) { for _, tt := range tests { { t.Run(tt.name, func(t *testing.T) { - source := New("AdminAck", func() configv1.Release { return configv1.Release{Version: "4.21.0"} }, cmInformer, cmInformer, nil) + source, err := New("AdminAck", func() configv1.Release { return configv1.Release{Version: "4.21.0"} }, cmInformer, cmInformer, nil) + if err != nil { + t.Fatal(err) + } if tt.gateCm != nil { if tt.gateCm.Name == "delete" { err := f.CoreV1().ConfigMaps(internal.ConfigManagedNamespace).Delete(ctx, internal.AdminGatesConfigMap, metav1.DeleteOptions{}) diff --git a/pkg/risk/overrides/overrides.go b/pkg/risk/overrides/overrides.go index b256946e8..6c85c364f 100644 --- a/pkg/risk/overrides/overrides.go +++ b/pkg/risk/overrides/overrides.go @@ -29,14 +29,14 @@ type overrides struct { } // New returns a new update-risk source, tracking the use of ClusterVersion overrides. -func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) risk.Source { +func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) (risk.Source, error) { cvLister := cvInformer.Lister() source := &overrides{name: name, cvName: cvName, cvLister: cvLister} - cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err := cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, }) - return source + return source, err } // Name returns the source's name. diff --git a/pkg/risk/overrides/overrides_test.go b/pkg/risk/overrides/overrides_test.go index 046ce040e..c9ba055e4 100644 --- a/pkg/risk/overrides/overrides_test.go +++ b/pkg/risk/overrides/overrides_test.go @@ -42,7 +42,10 @@ func Test_New(t *testing.T) { versions := []string{"4.21.1", "4.22.0", "5.0.0"} expectedName := "ClusterVersionOverrides" - source := overrides.New(expectedName, cv.Name, cvInformer, changeCallback) + source, err := overrides.New(expectedName, cv.Name, cvInformer, changeCallback) + if err != nil { + t.Fatal(err) + } t.Run("name", func(t *testing.T) { if name := source.Name(); name != expectedName { diff --git a/pkg/risk/updating/updating.go b/pkg/risk/updating/updating.go index c1c8f4c1c..1bd33fb96 100644 --- a/pkg/risk/updating/updating.go +++ b/pkg/risk/updating/updating.go @@ -30,14 +30,14 @@ type updating struct { } // New returns a new updating-risk source, tracking whether an update is in progress. -func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) risk.Source { +func New(name string, cvName string, cvInformer configinformersv1.ClusterVersionInformer, changeCallback func()) (risk.Source, error) { cvLister := cvInformer.Lister() source := &updating{name: name, cvName: cvName, cvLister: cvLister} - cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err := cvInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, }) - return source + return source, err } // Name returns the source's name. diff --git a/pkg/risk/updating/updating_test.go b/pkg/risk/updating/updating_test.go index f9a37bacb..24e628026 100644 --- a/pkg/risk/updating/updating_test.go +++ b/pkg/risk/updating/updating_test.go @@ -43,7 +43,10 @@ func Test_New(t *testing.T) { versions := []string{"4.21.1", "4.22.0", "5.0.0"} expectedName := "ClusterVersionUpdating" - source := updating.New(expectedName, cv.Name, cvInformer, changeCallback) + source, err := updating.New(expectedName, cv.Name, cvInformer, changeCallback) + if err != nil { + t.Fatal(err) + } t.Run("name", func(t *testing.T) { if name := source.Name(); name != expectedName { diff --git a/pkg/risk/upgradeable/upgradeable.go b/pkg/risk/upgradeable/upgradeable.go index 20d7a4bcf..0701b1832 100644 --- a/pkg/risk/upgradeable/upgradeable.go +++ b/pkg/risk/upgradeable/upgradeable.go @@ -32,15 +32,15 @@ type upgradeable struct { } // New returns a new update-risk source, tracking ClusterOperator Upgradeable conditions. -func New(name string, currentVersion func() configv1.Release, coInformer configinformersv1.ClusterOperatorInformer, changeCallback func()) risk.Source { +func New(name string, currentVersion func() configv1.Release, coInformer configinformersv1.ClusterOperatorInformer, changeCallback func()) (risk.Source, error) { coLister := coInformer.Lister() source := &upgradeable{name: name, currentVersion: currentVersion, coLister: coLister} - coInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err := coInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, UpdateFunc: func(_, _ interface{}) { source.eventHandler(changeCallback) }, DeleteFunc: func(_ interface{}) { source.eventHandler(changeCallback) }, }) - return source + return source, err } // Name returns the source's name. diff --git a/pkg/risk/upgradeable/upgradeable_test.go b/pkg/risk/upgradeable/upgradeable_test.go index 4250a8bb6..96d799c91 100644 --- a/pkg/risk/upgradeable/upgradeable_test.go +++ b/pkg/risk/upgradeable/upgradeable_test.go @@ -51,7 +51,10 @@ func Test_New(t *testing.T) { versions := []string{"4.21.1", "4.22.0", "5.0.0"} expectedName := "ClusterOperatorUpgradeable" - source := upgradeable.New(expectedName, currentVersion, coInformer, changeCallback) + source, err := upgradeable.New(expectedName, currentVersion, coInformer, changeCallback) + if err != nil { + t.Fatal(err) + } t.Run("name", func(t *testing.T) { if name := source.Name(); name != expectedName {