diff --git a/cmd/operator-controller/main.go b/cmd/operator-controller/main.go index 2d78edda4f..eef9dfc03d 100644 --- a/cmd/operator-controller/main.go +++ b/cmd/operator-controller/main.go @@ -623,7 +623,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl controllers.HandleFinalizers(c.finalizers), controllers.MigrateStorage(storageMigrator), controllers.RetrieveRevisionStates(revisionStatesGetter), - controllers.ResolveBundle(c.resolver), + controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), controllers.UnpackBundle(c.imagePuller, c.imageCache), controllers.ApplyBundleWithBoxcutter(appl.Apply), } @@ -742,7 +742,7 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ controllers.HandleFinalizers(c.finalizers), controllers.RetrieveRevisionStates(revisionStatesGetter), - controllers.ResolveBundle(c.resolver), + controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), controllers.UnpackBundle(c.imagePuller, c.imageCache), controllers.ApplyBundle(appl), } diff --git a/internal/operator-controller/applier/boxcutter.go b/internal/operator-controller/applier/boxcutter.go index 63578e9cb8..8a0264c843 100644 --- a/internal/operator-controller/applier/boxcutter.go +++ b/internal/operator-controller/applier/boxcutter.go @@ -303,21 +303,37 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro return bc.Client.Patch(ctx, obj, client.Apply, client.FieldOwner(bc.FieldOwner), client.ForceOwnership) } -func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error { - // Generate desired revision - desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations) +func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) { + // Note: We list revisions first (before checking contentFS) because we need the revision list + // to determine if we can fall back when contentFS is nil. If the API call fails here, + // it indicates a serious cluster connectivity issue, and we should not proceed even in fallback mode + // since the ClusterExtensionRevision controller also requires API access to maintain resources. + existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName()) if err != nil { - return err + return false, "", err } - if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil { - return fmt.Errorf("set ownerref: %w", err) + // If contentFS is nil, we're maintaining the current state without catalog access. + // In this case, we should use the existing installed revision without generating a new one. + if contentFS == nil { + if len(existingRevisions) == 0 { + return false, "", fmt.Errorf("catalog content unavailable and no revision installed") + } + // Returning true here signals that the rollout has succeeded using the current revision. The + // ClusterExtensionRevision controller will continue to reconcile, apply, and maintain the + // resources defined in that revision via Server-Side Apply, ensuring the workload keeps running + // even when catalog access (and thus new revision content) is unavailable. + return true, "", nil } - // List all existing revisions - existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName()) + // Generate desired revision + desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations) if err != nil { - return err + return false, "", err + } + + if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil { + return false, "", fmt.Errorf("set ownerref: %w", err) } currentRevision := &ocv1.ClusterExtensionRevision{} @@ -339,7 +355,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust // inplace patch was successful, no changes in phases state = StateUnchanged default: - return fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err) + return false, "", fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err) } } @@ -353,7 +369,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust case StateNeedsInstall: err := preflight.Install(ctx, plainObjs) if err != nil { - return err + return false, "", err } // TODO: jlanford's IDE says that "StateNeedsUpgrade" condition is always true, but // it isn't immediately obvious why that is. Perhaps len(existingRevisions) is @@ -362,7 +378,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust case StateNeedsUpgrade: err := preflight.Upgrade(ctx, plainObjs) if err != nil { - return err + return false, "", err } } } @@ -376,15 +392,15 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust desiredRevision.Spec.Revision = revisionNumber if err = bc.garbageCollectOldRevisions(ctx, prevRevisions); err != nil { - return fmt.Errorf("garbage collecting old revisions: %w", err) + return false, "", fmt.Errorf("garbage collecting old revisions: %w", err) } if err := bc.createOrUpdate(ctx, desiredRevision); err != nil { - return fmt.Errorf("creating new Revision: %w", err) + return false, "", fmt.Errorf("creating new Revision: %w", err) } } - return nil + return true, "", nil } // garbageCollectOldRevisions deletes archived revisions beyond ClusterExtensionRevisionRetentionLimit. diff --git a/internal/operator-controller/applier/boxcutter_test.go b/internal/operator-controller/applier/boxcutter_test.go index 081747285c..b61ae16a5a 100644 --- a/internal/operator-controller/applier/boxcutter_test.go +++ b/internal/operator-controller/applier/boxcutter_test.go @@ -926,14 +926,18 @@ func TestBoxcutter_Apply(t *testing.T) { labels.PackageNameKey: "test-package", } } - err := boxcutter.Apply(t.Context(), testFS, ext, nil, revisionAnnotations) + completed, status, err := boxcutter.Apply(t.Context(), testFS, ext, nil, revisionAnnotations) // Assert if tc.expectedErr != "" { require.Error(t, err) assert.Contains(t, err.Error(), tc.expectedErr) + assert.False(t, completed) + assert.Empty(t, status) } else { require.NoError(t, err) + assert.True(t, completed) + assert.Empty(t, status) } if tc.validate != nil { diff --git a/internal/operator-controller/applier/helm.go b/internal/operator-controller/applier/helm.go index 22ed096fe1..7450872bdd 100644 --- a/internal/operator-controller/applier/helm.go +++ b/internal/operator-controller/applier/helm.go @@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE } func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) { + // If contentFS is nil, we're maintaining the current state without catalog access. + // In this case, reconcile the existing Helm release if it exists. + if contentFS == nil { + ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext) + if err != nil { + return false, "", err + } + return h.reconcileExistingRelease(ctx, ac, ext) + } + chrt, err := h.buildHelmChart(contentFS, ext) if err != nil { return false, "", err @@ -197,6 +207,62 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte return true, "", nil } +// reconcileExistingRelease reconciles an existing Helm release without catalog access. +// This is used when the catalog is unavailable but we need to maintain the current installation. +// It reconciles the release to actively maintain resources, and sets up watchers for monitoring/observability. +func (h *Helm) reconcileExistingRelease(ctx context.Context, ac helmclient.ActionInterface, ext *ocv1.ClusterExtension) (bool, string, error) { + rel, err := ac.Get(ext.GetName()) + if errors.Is(err, driver.ErrReleaseNotFound) { + return false, "", fmt.Errorf("catalog content unavailable and no release installed") + } + if err != nil { + return false, "", fmt.Errorf("failed to get current release: %w", err) + } + + // Reconcile the existing release to ensure resources are maintained + if err := ac.Reconcile(rel); err != nil { + // Reconcile failed - resources NOT maintained + // Return false (rollout failed) with error + return false, "", err + } + + // At this point: Reconcile succeeded - resources ARE maintained (applied to cluster via Server-Side Apply) + // The operations below are for setting up watches to detect drift (i.e., if someone manually modifies the + // resources). If watch setup fails, the resources are still successfully maintained, but we won't detect + // and auto-correct manual modifications. We return true (rollout succeeded) and log watch errors. + logger := klog.FromContext(ctx) + + relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name)) + if err != nil { + logger.Error(err, "failed to parse manifest objects, cannot set up drift detection watches (resources are applied but drift detection disabled)") + return true, "", nil + } + + logger.V(1).Info("setting up drift detection watches on managed objects") + + // Defensive nil checks to prevent panics if Manager or Watcher not properly initialized + if h.Manager == nil { + logger.Error(fmt.Errorf("manager is nil"), "Manager not initialized, cannot set up drift detection watches (resources are applied but drift detection disabled)") + return true, "", nil + } + cache, err := h.Manager.Get(ctx, ext) + if err != nil { + logger.Error(err, "failed to get managed content cache, cannot set up drift detection watches (resources are applied but drift detection disabled)") + return true, "", nil + } + + if h.Watcher == nil { + logger.Error(fmt.Errorf("watcher is nil"), "Watcher not initialized, cannot set up drift detection watches (resources are applied but drift detection disabled)") + return true, "", nil + } + if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil { + logger.Error(err, "failed to set up drift detection watches (resources are applied but drift detection disabled)") + return true, "", nil + } + + return true, "", nil +} + func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) { if h.HelmChartProvider == nil { return nil, errors.New("HelmChartProvider is nil") diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go index 4c9eec36ce..5c746c7b0f 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -94,7 +94,7 @@ func MigrateStorage(m StorageMigrator) ReconcileStepFunc { } } -func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error) ReconcileStepFunc { +func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error)) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) revisionAnnotations := map[string]string{ @@ -109,7 +109,8 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e } l.Info("applying bundle contents") - if err := apply(ctx, state.imageFS, ext, objLbls, revisionAnnotations); err != nil { + _, _, err := apply(ctx, state.imageFS, ext, objLbls, revisionAnnotations) + if err != nil { // If there was an error applying the resolved bundle, // report the error via the Progressing condition. setStatusProgressing(ext, wrapErrorWithResolutionInfo(state.resolvedRevisionMetadata.BundleMetadata, err)) diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go index 63aff2b0ad..78adb70090 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go @@ -133,8 +133,8 @@ func TestApplyBundleWithBoxcutter(t *testing.T) { imageFS: fstest.MapFS{}, } - stepFunc := ApplyBundleWithBoxcutter(func(_ context.Context, _ fs.FS, _ *ocv1.ClusterExtension, _, _ map[string]string) error { - return nil + stepFunc := ApplyBundleWithBoxcutter(func(_ context.Context, _ fs.FS, _ *ocv1.ClusterExtension, _, _ map[string]string) (bool, string, error) { + return true, "", nil }) result, err := stepFunc(ctx, state, ext) require.NoError(t, err) diff --git a/internal/operator-controller/controllers/clusterextension_admission_test.go b/internal/operator-controller/controllers/clusterextension_admission_test.go index 2f8791999f..6ce9fc3c76 100644 --- a/internal/operator-controller/controllers/clusterextension_admission_test.go +++ b/internal/operator-controller/controllers/clusterextension_admission_test.go @@ -13,7 +13,9 @@ import ( ) func TestClusterExtensionSourceConfig(t *testing.T) { - sourceTypeEmptyError := "Invalid value: null" + // NOTE: Kubernetes validation error format for JSON null values varies across K8s versions. + // We check for the common part "Invalid value:" which appears in all versions. + sourceTypeEmptyError := "Invalid value:" sourceTypeMismatchError := "spec.source.sourceType: Unsupported value" sourceConfigInvalidError := "spec.source: Invalid value" // unionField represents the required Catalog or (future) Bundle field required by SourceConfig diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index 7f3192b0fd..f381152e6c 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -168,6 +168,8 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req // ensureAllConditionsWithReason checks that all defined condition types exist in the given ClusterExtension, // and assigns a specified reason and custom message to any missing condition. +// +//nolint:unparam // reason parameter is designed to be flexible, even if current callers use the same value func ensureAllConditionsWithReason(ext *ocv1.ClusterExtension, reason v1alpha1.ConditionReason, message string) { for _, condType := range conditionsets.ConditionTypes { cond := apimeta.FindStatusCondition(ext.Status.Conditions, condType) diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 1b09a43adf..eabdba4f94 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -1611,3 +1611,443 @@ func TestGetInstalledBundleHistory(t *testing.T) { } } } + +// TestResolutionFallbackToInstalledBundle tests the catalog deletion resilience fallback logic +func TestResolutionFallbackToInstalledBundle(t *testing.T) { + t.Run("falls back when catalog unavailable and no version change", func(t *testing.T) { + resolveAttempt := 0 + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + // First reconcile: catalog available, second reconcile: catalog unavailable + d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolveAttempt++ + if resolveAttempt == 1 { + // First reconcile: catalog available, resolve to version 1.0.0 + v := bundle.VersionRelease{Version: bsemver.MustParse("1.0.0")} + return &declcfg.Bundle{ + Name: "test.1.0.0", + Package: "test-pkg", + Image: "test-image:1.0.0", + }, &v, &declcfg.Deprecation{}, nil + } + // Second reconcile: catalog unavailable + return nil, nil, nil, fmt.Errorf("catalog unavailable") + }) + // Applier succeeds (resources maintained) + d.Applier = &MockApplier{ + installCompleted: true, + installStatus: "", + err: nil, + } + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "test-pkg", + BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, + Image: "test-image:1.0.0", + }, + }, + } + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))} + + // Create ClusterExtension with no version specified + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + // No version - should fall back + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{Name: "default"}, + }, + } + require.NoError(t, cl.Create(ctx, ext)) + + // First reconcile: catalog available, install version 1.0.0 + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, res) + + require.NoError(t, cl.Get(ctx, extKey, ext)) + require.Equal(t, "1.0.0", ext.Status.Install.Bundle.Version) + + // Verify status after first install + instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason) + + progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason) + + // Verify all conditions are present and valid after first reconcile + verifyInvariants(ctx, t, cl, ext) + + // Second reconcile: catalog unavailable, should fallback to installed version + // Catalog watch will trigger reconciliation when catalog becomes available again + res, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, res) + + // Verify status shows successful reconciliation after fallback + require.NoError(t, cl.Get(ctx, extKey, ext)) + + // Version should remain 1.0.0 (maintained from fallback) + require.Equal(t, "1.0.0", ext.Status.Install.Bundle.Version) + + // Progressing should be Succeeded (apply completed successfully) + progCond = apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason) + + // Installed should be True (maintaining current version) + instCond = apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason) + + // Verify all conditions remain valid after fallback + verifyInvariants(ctx, t, cl, ext) + }) + + t.Run("fails when version upgrade requested without catalog", func(t *testing.T) { + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + return nil, nil, nil, fmt.Errorf("catalog unavailable") + }) + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, + }, + }, + } + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))} + + // Create ClusterExtension requesting version upgrade + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + Version: "1.0.1", // Requesting upgrade + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{Name: "default"}, + }, + } + require.NoError(t, cl.Create(ctx, ext)) + + // Reconcile should fail (can't upgrade without catalog) + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.Error(t, err) + require.Equal(t, ctrl.Result{}, res) + + // Verify status shows Retrying + require.NoError(t, cl.Get(ctx, extKey, ext)) + + // Progressing should be Retrying (can't resolve without catalog) + progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonRetrying, progCond.Reason) + + // Installed should be True (v1.0.0 is already installed per RevisionStatesGetter) + // but we can't upgrade to v1.0.1 without catalog + instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + + // Verify all conditions are present and valid + verifyInvariants(ctx, t, cl, ext) + }) + + t.Run("auto-updates when catalog becomes available after fallback", func(t *testing.T) { + resolveAttempt := 0 + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + // First attempt: catalog unavailable, then becomes available + d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolveAttempt++ + if resolveAttempt == 1 { + // First reconcile: catalog unavailable + return nil, nil, nil, fmt.Errorf("catalog temporarily unavailable") + } + // Second reconcile (triggered by catalog watch): catalog available with new version + v := bundle.VersionRelease{Version: bsemver.MustParse("2.0.0")} + return &declcfg.Bundle{ + Name: "test.2.0.0", + Package: "test-pkg", + Image: "test-image:2.0.0", + }, &v, &declcfg.Deprecation{}, nil + }) + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "test-pkg", + BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, + Image: "test-image:1.0.0", + }, + }, + } + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))} + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + // No version - auto-update to latest + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{Name: "default"}, + }, + } + require.NoError(t, cl.Create(ctx, ext)) + + // First reconcile: catalog unavailable, falls back to v1.0.0 + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, res) + + require.NoError(t, cl.Get(ctx, extKey, ext)) + require.Equal(t, "1.0.0", ext.Status.Install.Bundle.Version) + + // Verify core status after fallback to installed version + instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason) + + progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason) + + // Note: When falling back without catalog access initially, deprecation conditions + // may not be set yet. Full validation happens after catalog is available. + + // Second reconcile: simulating catalog watch trigger, catalog now available with v2.0.0 + res, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, res) + + // Should have upgraded to v2.0.0 + require.NoError(t, cl.Get(ctx, extKey, ext)) + require.Equal(t, "2.0.0", ext.Status.Install.Bundle.Version) + + // Verify status after upgrade + instCond = apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason) + + progCond = apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason) + + // Verify all conditions remain valid after upgrade + verifyInvariants(ctx, t, cl, ext) + + // Verify resolution was attempted twice (fallback, then success) + require.Equal(t, 2, resolveAttempt) + }) + + t.Run("retries when catalogs exist but resolution fails", func(t *testing.T) { + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + // Resolver fails (transient issue) + d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + return nil, nil, nil, fmt.Errorf("transient catalog issue: cache stale") + }) + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "test-pkg", + BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, + Image: "test-image:1.0.0", + }, + }, + } + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))} + + // Create a ClusterCatalog matching the extension's selector + catalog := &ocv1.ClusterCatalog{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-catalog", + }, + Spec: ocv1.ClusterCatalogSpec{ + Source: ocv1.CatalogSource{ + Type: ocv1.SourceTypeImage, + Image: &ocv1.ImageSource{ + Ref: "test-registry/catalog:latest", + }, + }, + }, + } + require.NoError(t, cl.Create(ctx, catalog)) + + // Create ClusterExtension with no version specified + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + // No version specified + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{Name: "default"}, + }, + } + require.NoError(t, cl.Create(ctx, ext)) + + // Reconcile should fail and RETRY (not fallback) + // Catalogs exist, so this is likely a transient issue (catalog updating, cache stale, etc.) + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.Error(t, err) + require.Equal(t, ctrl.Result{}, res) + + // Verify status shows Retrying (not falling back to installed bundle) + require.NoError(t, cl.Get(ctx, extKey, ext)) + + progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progCond) + require.Equal(t, metav1.ConditionTrue, progCond.Status) + require.Equal(t, ocv1.ReasonRetrying, progCond.Reason) + require.Contains(t, progCond.Message, "transient catalog issue") + + // Installed should remain True (existing installation is maintained) + instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, instCond) + require.Equal(t, metav1.ConditionTrue, instCond.Status) + + // Verify we did NOT fall back - status should show we're retrying + verifyInvariants(ctx, t, cl, ext) + + // Clean up the catalog so it doesn't affect other tests + require.NoError(t, cl.Delete(ctx, catalog)) + }) +} + +func TestCheckCatalogsExist(t *testing.T) { + t.Run("returns false when no catalogs exist", func(t *testing.T) { + cl := newClient(t) + ctx := context.Background() + + ext := &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + }, + }, + }, + } + + exists, err := controllers.CheckCatalogsExist(ctx, cl, ext) + require.NoError(t, err) + require.False(t, exists, "should return false when no catalogs exist") + }) + + t.Run("returns false when no selector provided", func(t *testing.T) { + cl := newClient(t) + ctx := context.Background() + + ext := &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + Selector: nil, // No selector + }, + }, + }, + } + + exists, err := controllers.CheckCatalogsExist(ctx, cl, ext) + require.NoError(t, err) + require.False(t, exists, "should return false when no catalogs exist (no selector)") + }) + + t.Run("returns false when empty selector provided", func(t *testing.T) { + cl := newClient(t) + ctx := context.Background() + + ext := &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + Selector: &metav1.LabelSelector{}, // Empty selector (matches everything) + }, + }, + }, + } + + exists, err := controllers.CheckCatalogsExist(ctx, cl, ext) + require.NoError(t, err, "empty selector should not cause error") + require.False(t, exists, "should return false when no catalogs exist (empty selector)") + }) + + t.Run("returns error for invalid selector", func(t *testing.T) { + cl := newClient(t) + ctx := context.Background() + + ext := &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-pkg", + Selector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "invalid", + Operator: "InvalidOperator", // Invalid operator + Values: []string{"value"}, + }, + }, + }, + }, + }, + }, + } + + exists, err := controllers.CheckCatalogsExist(ctx, cl, ext) + require.Error(t, err, "should return error for invalid selector") + require.Contains(t, err.Error(), "invalid catalog selector") + require.False(t, exists) + }) +} diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 09dd2ce7d2..03cecfe1ce 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -19,10 +19,12 @@ package controllers import ( "context" "errors" + "fmt" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/finalizer" "sigs.k8s.io/controller-runtime/pkg/log" @@ -83,7 +85,14 @@ func RetrieveRevisionStates(r RevisionStatesGetter) ReconcileStepFunc { } } -func ResolveBundle(r resolve.Resolver) ReconcileStepFunc { +// ResolveBundle resolves the bundle to install or roll out for a ClusterExtension. +// It requires a controller-runtime client (in addition to the resolve.Resolver) to enable +// intelligent error handling when resolution fails. The client is used to check if ClusterCatalogs +// matching the extension's selector still exist in the cluster, allowing the controller to +// distinguish between "ClusterCatalog deleted" (fall back to installed bundle) and "transient failure" +// (retry resolution). This ensures workload resilience during ClusterCatalog outages while maintaining +// responsiveness during ClusterCatalog updates. +func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) var resolvedRevisionMetadata *RevisionMetadata @@ -95,11 +104,7 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc { } resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolve(ctx, ext, bm) if err != nil { - // Note: We don't distinguish between resolution-specific errors and generic errors - setStatusProgressing(ext, err) - setInstalledStatusFromRevisionStates(ext, state.revisionStates) - ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error()) - return nil, err + return handleResolutionError(ctx, c, state, ext, err) } // set deprecation status after _successful_ resolution @@ -134,19 +139,190 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc { } } +// handleResolutionError handles the case when bundle resolution fails. +// If a bundle is already installed and the spec isn't requesting a version change, +// we check if the ClusterCatalogs have been deleted. If so, we fall back to using the +// installed bundle to maintain the current state (ClusterCatalog deletion resilience). +// However, if ClusterCatalogs still exist, we retry to allow for transient failures or ClusterCatalog updates. +// If the spec explicitly requests a different version, we must fail and retry regardless. +func handleResolutionError(ctx context.Context, c client.Client, state *reconcileState, ext *ocv1.ClusterExtension, err error) (*ctrl.Result, error) { + l := log.FromContext(ctx) + + // If we have an installed bundle, check if we can fall back to it + if state.revisionStates.Installed != nil { + // Check if the spec is requesting a specific version that differs from installed + specVersion := "" + if ext.Spec.Source.Catalog != nil { + specVersion = ext.Spec.Source.Catalog.Version + } + installedVersion := state.revisionStates.Installed.Version + + // If spec requests a different version, we cannot fall back - must fail and retry + if specVersion != "" && specVersion != installedVersion { + msg := fmt.Sprintf("unable to upgrade to version %s: %v (currently installed: %s)", specVersion, err, installedVersion) + l.Error(err, "resolution failed and spec requests version change - cannot fall back", + "requestedVersion", specVersion, + "installedVersion", installedVersion) + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonRetrying, msg) + return nil, err + } + + // No version change requested - check if ClusterCatalogs exist + // Only fall back if ClusterCatalogs have been deleted + catalogsExist, catalogCheckErr := CheckCatalogsExist(ctx, c, ext) + if catalogCheckErr != nil { + msg := fmt.Sprintf("failed to resolve bundle: %v", err) + catalogName := getCatalogNameFromSelector(ext.Spec.Source.Catalog.Selector) + l.Error(catalogCheckErr, "error checking if ClusterCatalogs exist, will retry resolution", + "resolutionError", err, + "packageName", getPackageName(ext), + "catalogName", catalogName) + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonRetrying, msg) + return nil, err + } + + if catalogsExist { + // ClusterCatalogs exist but resolution failed - likely a transient issue (ClusterCatalog updating, cache stale, etc.) + // Retry resolution instead of falling back + msg := fmt.Sprintf("failed to resolve bundle, retrying: %v", err) + catalogName := getCatalogNameFromSelector(ext.Spec.Source.Catalog.Selector) + l.Error(err, "resolution failed but matching ClusterCatalogs exist - retrying instead of falling back", + "packageName", getPackageName(ext), + "catalogName", catalogName) + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonRetrying, msg) + return nil, err + } + + // ClusterCatalogs don't exist (deleted) - fall back to installed bundle to maintain current state. + // The controller watches ClusterCatalog resources, so when ClusterCatalogs become available again, + // a reconcile will be triggered automatically, allowing the extension to upgrade. + msg := fmt.Sprintf("continuing to maintain current installation at version %s due to catalog unavailability", state.revisionStates.Installed.Version) + catalogName := getCatalogNameFromSelector(ext.Spec.Source.Catalog.Selector) + l.Error(err, "matching ClusterCatalogs unavailable or deleted - falling back to installed bundle to maintain workload", + "packageName", getPackageName(ext), + "catalogName", catalogName, + "installedBundle", state.revisionStates.Installed.Name, + "installedVersion", state.revisionStates.Installed.Version) + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonRetrying, msg) + state.resolvedRevisionMetadata = state.revisionStates.Installed + // Return no error to allow Apply step to run and maintain resources. + return nil, nil + } + + // No installed bundle and resolution failed - cannot proceed + msg := fmt.Sprintf("failed to resolve bundle: %v", err) + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonRetrying, msg) + return nil, err +} + +// getCatalogNameFromSelector extracts the catalog name from the selector if available. +// Returns empty string if selector is nil or doesn't contain the metadata.name label. +func getCatalogNameFromSelector(selector *metav1.LabelSelector) string { + if selector == nil || selector.MatchLabels == nil { + return "" + } + return selector.MatchLabels["olm.operatorframework.io/metadata.name"] +} + +// getPackageName safely extracts the package name from the extension spec. +// Returns empty string if Catalog source is nil. +func getPackageName(ext *ocv1.ClusterExtension) string { + if ext.Spec.Source.Catalog == nil { + return "" + } + return ext.Spec.Source.Catalog.PackageName +} + +// CheckCatalogsExist checks if any ClusterCatalogs matching the extension's selector exist. +// Returns true if at least one matching ClusterCatalog exists, false if none exist. +// Treats "CRD doesn't exist" errors as "no ClusterCatalogs exist" (returns false, nil). +// Returns an error only if the check itself fails unexpectedly. +func CheckCatalogsExist(ctx context.Context, c client.Client, ext *ocv1.ClusterExtension) (bool, error) { + var catalogList *ocv1.ClusterCatalogList + var listErr error + + if ext.Spec.Source.Catalog == nil || ext.Spec.Source.Catalog.Selector == nil { + // No selector means all ClusterCatalogs match - check if any ClusterCatalogs exist at all + catalogList = &ocv1.ClusterCatalogList{} + listErr = c.List(ctx, catalogList, client.Limit(1)) + } else { + // Convert label selector to k8slabels.Selector + // Note: An empty LabelSelector matches everything by default + selector, err := metav1.LabelSelectorAsSelector(ext.Spec.Source.Catalog.Selector) + if err != nil { + return false, fmt.Errorf("invalid catalog selector: %w", err) + } + + // List ClusterCatalogs matching the selector (limit to 1 since we only care if any exist) + catalogList = &ocv1.ClusterCatalogList{} + listErr = c.List(ctx, catalogList, client.MatchingLabelsSelector{Selector: selector}, client.Limit(1)) + } + + if listErr != nil { + // Check if the error is because the ClusterCatalog CRD doesn't exist + // This can happen if catalogd is not installed, which means no ClusterCatalogs exist + if apimeta.IsNoMatchError(listErr) { + return false, nil + } + return false, fmt.Errorf("failed to list ClusterCatalogs: %w", listErr) + } + + return len(catalogList.Items) > 0, nil +} + func UnpackBundle(i imageutil.Puller, cache imageutil.Cache) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) - l.Info("unpacking resolved bundle") + + // Defensive check: resolvedRevisionMetadata should be set by ResolveBundle step + if state.resolvedRevisionMetadata == nil { + return nil, fmt.Errorf("unable to retrieve bundle information") + } + + // Always try to pull the bundle content (Pull uses cache-first strategy, so this is efficient) + l.V(1).Info("pulling bundle content") imageFS, _, _, err := i.Pull(ctx, ext.GetName(), state.resolvedRevisionMetadata.Image, cache) + + // Check if resolved bundle matches installed bundle (no version change) + bundleUnchanged := state.revisionStates != nil && + state.revisionStates.Installed != nil && + state.resolvedRevisionMetadata.Name == state.revisionStates.Installed.Name && + state.resolvedRevisionMetadata.Version == state.revisionStates.Installed.Version + if err != nil { - // Wrap the error passed to this with the resolution information until we have successfully - // installed since we intend for the progressing condition to replace the resolved condition - // and will be removing the .status.resolution field from the ClusterExtension status API + if bundleUnchanged { + // Bundle hasn't changed and Pull failed (likely cache miss + catalog unavailable). + // This happens in fallback mode after catalog deletion. Set imageFS to nil so the + // applier can maintain the workload using existing Helm release or ClusterExtensionRevision. + l.V(1).Info("bundle unchanged and content unavailable, continuing with nil contentFS (applier will use existing release/revision)", + "bundle", state.resolvedRevisionMetadata.Name, + "version", state.resolvedRevisionMetadata.Version, + "error", err.Error()) + state.imageFS = nil + return nil, nil + } + // New bundle version but Pull failed - this is an error condition setStatusProgressing(ext, wrapErrorWithResolutionInfo(state.resolvedRevisionMetadata.BundleMetadata, err)) setInstalledStatusFromRevisionStates(ext, state.revisionStates) return nil, err } + + if bundleUnchanged { + l.V(1).Info("bundle unchanged, using cached content for resource reconciliation", + "bundle", state.resolvedRevisionMetadata.Name, + "version", state.resolvedRevisionMetadata.Version) + } + state.imageFS = imageFS return nil, nil } diff --git a/internal/operator-controller/controllers/suite_test.go b/internal/operator-controller/controllers/suite_test.go index 6258a5d307..57305a75fd 100644 --- a/internal/operator-controller/controllers/suite_test.go +++ b/internal/operator-controller/controllers/suite_test.go @@ -106,7 +106,7 @@ func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Clie } reconciler.ReconcileSteps = []controllers.ReconcileStepFunc{controllers.HandleFinalizers(d.Finalizers), controllers.RetrieveRevisionStates(d.RevisionStatesGetter)} if r := d.Resolver; r != nil { - reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.ResolveBundle(r)) + reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.ResolveBundle(r, cl)) } if i := d.ImagePuller; i != nil { reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.UnpackBundle(i, d.ImageCache)) diff --git a/test/e2e/features/recover.feature b/test/e2e/features/recover.feature index 0438f2d1a1..b03252eca5 100644 --- a/test/e2e/features/recover.feature +++ b/test/e2e/features/recover.feature @@ -115,3 +115,227 @@ Feature: Recover cluster extension from errors that might occur during its lifet Then ClusterExtension is available And ClusterExtension reports Progressing as True with Reason Succeeded And ClusterExtension reports Installed as True + + # CATALOG DELETION RESILIENCE SCENARIOS + + Scenario: Extension continues running after catalog deletion + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And resource "deployment/test-operator" is available + And resource "configmap/test-configmap" is available + And ClusterCatalog "test" is deleted + # Catalog deletion triggers automatic reconciliation via ClusterCatalog watch + # Wait for reconciliation to complete by checking the status condition + When ClusterExtension reports Installed as True + Then resource "deployment/test-operator" is available + And resource "configmap/test-configmap" is available + + Scenario: Resources are restored after catalog deletion + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And resource "configmap/test-configmap" exists + And ClusterCatalog "test" is deleted + When resource "configmap/test-configmap" is removed + Then resource "configmap/test-configmap" is eventually restored + + Scenario: Config changes are allowed even when the catalog does not exist anymore + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And ClusterCatalog "test" is deleted + When ClusterExtension is updated to add preflight config + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + install: + preflight: + crdUpgradeSafety: + enforcement: None + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + # Wait for reconciliation of the updated spec (config change should succeed without catalog) + And ClusterExtension is available + Then ClusterExtension reports Installed as True + + Scenario: Version upgrade does not proceed when catalog does not exist + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + version: "1.0.0" + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And bundle "test-operator.1.0.0" is installed in version "1.0.0" + And ClusterCatalog "test" is deleted + When ClusterExtension is updated to change version + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + version: "1.0.1" + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + # Wait for reconciliation after the version change request + And ClusterExtension reports Progressing as True with Reason Retrying + # Verify upgrade did not proceed: version remains at 1.0.0 (not 1.0.1) + Then bundle "test-operator.1.0.0" is installed in version "1.0.0" + And ClusterExtension reports Installed as True + # Note: Retrying status means controller will auto-upgrade when catalog becomes available + + @BoxcutterRuntime + Scenario: Extension with revisions continues running after catalog deletion + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And ClusterExtensionRevision "${NAME}-1" reports Available as True with Reason ProbesSucceeded + And resource "deployment/test-operator" is available + And ClusterCatalog "test" is deleted + # Wait for reconciliation to complete after catalog deletion + When ClusterExtension reports Installed as True + Then resource "deployment/test-operator" is available + And ClusterExtensionRevision "${NAME}-1" reports Available as True with Reason ProbesSucceeded + + @BoxcutterRuntime + Scenario: Revision remains available after catalog deletion + Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + version: "1.2.0" + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + And ClusterExtension is rolled out + And ClusterExtension is available + And ClusterExtensionRevision "${NAME}-1" reports Available as True with Reason ProbesSucceeded + And ClusterCatalog "test" is deleted + # Catalog deletion should NOT affect workload availability + # Wait for reconciliation to complete after catalog deletion + When ClusterExtension reports Installed as True + Then ClusterExtension is available + And ClusterExtensionRevision "${NAME}-1" reports Available as True with Reason ProbesSucceeded + And resource "deployment/test-operator" is available diff --git a/test/e2e/steps/steps.go b/test/e2e/steps/steps.go index e1a55934dd..ea988bd0de 100644 --- a/test/e2e/steps/steps.go +++ b/test/e2e/steps/steps.go @@ -85,6 +85,7 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)ClusterCatalog "([^"]+)" serves bundles$`, CatalogServesBundles) sc.Step(`^"([^"]+)" catalog image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage) sc.Step(`^(?i)ClusterCatalog "([^"]+)" image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage) + sc.Step(`^(?i)ClusterCatalog "([^"]+)" is deleted$`, CatalogIsDeleted) sc.Step(`^(?i)operator "([^"]+)" target namespace is "([^"]+)"$`, OperatorTargetNamespace) sc.Step(`^(?i)Prometheus metrics are returned in the response$`, PrometheusMetricsAreReturned) @@ -664,6 +665,17 @@ func TagCatalogImage(name, oldTag, newTag string) error { return crane.Tag(imageRef, newTag, crane.Insecure) } +func CatalogIsDeleted(ctx context.Context, catalogName string) error { + catalogFullName := fmt.Sprintf("%s-catalog", catalogName) + // Using --wait=true makes kubectl wait for the resource to be fully deleted, + // eliminating the need for manual polling + _, err := k8sClient("delete", "clustercatalog", catalogFullName, "--ignore-not-found=true", "--wait=true") + if err != nil { + return fmt.Errorf("failed to delete catalog: %v", err) + } + return nil +} + func PrometheusMetricsAreReturned(ctx context.Context) error { sc := scenarioCtx(ctx) for podName, mr := range sc.metricsResponse { diff --git a/test/utils.go b/test/utils.go index 22a50b2b8d..60cfaa38a0 100644 --- a/test/utils.go +++ b/test/utils.go @@ -14,6 +14,7 @@ func NewEnv() *envtest.Environment { testEnv := &envtest.Environment{ CRDDirectoryPaths: []string{ pathFromProjectRoot("helm/olmv1/base/operator-controller/crd/experimental"), + pathFromProjectRoot("helm/olmv1/base/catalogd/crd/experimental"), }, ErrorIfCRDPathMissing: true, }