From a8de32f2995f3d4995f9b72ea9e507431c6c5f0f Mon Sep 17 00:00:00 2001 From: Maxim Alter Date: Tue, 23 Jun 2026 19:44:48 +0300 Subject: [PATCH] feat: add SBR HyperShift e2e job using persistent management cluster (hypershift-aws) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds e2e-sbr-hypershift-persistent-aws-odf — an optional presubmit that runs the SBR smoke + acceptance suite on a HyperShift hosted cluster backed by ODF storage, using the shared persistent management cluster (cluster_profile: hypershift-aws) instead of provisioning a new management cluster per run. New workflow medik8s-sbr-hypershift-persistent (in step-registry/medik8s/sbr/hypershift-persistent/): pre: ipi-install-rbac hypershift-setup-root-management-cluster (~2s vs ~17m for nested) hypershift-aws-create medik8s-sbr-hypershift-apply-image-sources test: (identical to e2e-sbr-hypershift-aws-odf) medik8s-sbr-hypershift-switch-kubeconfig medik8s-sbr-hypershift-wait-nodes medik8s-catalogsource / medik8s-operator-subscribe odf-prepare-cluster / operatorhub-subscribe-odf-operator odf-apply-storage-cluster e2e-test (make run-tests, ECO_TEST_FEATURES=sbr-operator) post: medik8s-sbr-hypershift-restore-kubeconfig hypershift-dump / hypershift-debug / hypershift-k8sgpt hypershift-aws-destroy (no destroy-management-cluster) Job config: cluster_profile=hypershift-aws, m5.4xlarge x3, ODF stable-4.21, optional=true, trigger= /test 4.22-konflux-e2e-sbr-hypershift-persistent-aws-odf. Expected savings vs nested variant: ~23 min/run (management cluster create + hypershift-install + management cluster destroy eliminated). Related: #80372 (nested variant; introduces shared step-registry steps) --- ...dik8s-system-tests-main__4.22-konflux.yaml | 76 ++++++++ .../medik8s-system-tests-main-presubmits.yaml | 176 ++++++++++++++++++ .../medik8s-catalogsource-commands.sh | 7 +- .../medik8s-catalogsource-ref.yaml | 7 + ci-operator/step-registry/medik8s/sbr/OWNERS | 16 ++ .../medik8s/sbr/hypershift-persistent/OWNERS | 16 ++ ...pershift-persistent-workflow.metadata.json | 23 +++ ...8s-sbr-hypershift-persistent-workflow.yaml | 17 ++ .../medik8s/sbr/hypershift/OWNERS | 16 ++ .../sbr/hypershift/apply-image-sources/OWNERS | 16 ++ ...hypershift-apply-image-sources-commands.sh | 62 ++++++ ...hift-apply-image-sources-ref.metadata.json | 23 +++ ...br-hypershift-apply-image-sources-ref.yaml | 24 +++ ...ik8s-sbr-hypershift-workflow.metadata.json | 23 +++ .../medik8s-sbr-hypershift-workflow.yaml | 20 ++ .../sbr/hypershift/restore-kubeconfig/OWNERS | 16 ++ ...-hypershift-restore-kubeconfig-commands.sh | 13 ++ ...shift-restore-kubeconfig-ref.metadata.json | 23 +++ ...sbr-hypershift-restore-kubeconfig-ref.yaml | 12 ++ .../sbr/hypershift/switch-kubeconfig/OWNERS | 16 ++ ...r-hypershift-switch-kubeconfig-commands.sh | 10 + ...rshift-switch-kubeconfig-ref.metadata.json | 23 +++ ...-sbr-hypershift-switch-kubeconfig-ref.yaml | 12 ++ .../medik8s/sbr/hypershift/wait-nodes/OWNERS | 16 ++ ...ik8s-sbr-hypershift-wait-nodes-commands.sh | 55 ++++++ ...br-hypershift-wait-nodes-ref.metadata.json | 23 +++ ...medik8s-sbr-hypershift-wait-nodes-ref.yaml | 30 +++ .../odf-apply-storage-cluster-commands.sh | 20 +- .../odf-apply-storage-cluster-ref.yaml | 3 + 29 files changed, 791 insertions(+), 3 deletions(-) create mode 100644 ci-operator/step-registry/medik8s/sbr/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift-persistent/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.yaml create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-commands.sh create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.yaml create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.yaml create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-commands.sh create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.yaml create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-commands.sh create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.yaml create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/OWNERS create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-commands.sh create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.metadata.json create mode 100644 ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.yaml diff --git a/ci-operator/config/medik8s/system-tests/medik8s-system-tests-main__4.22-konflux.yaml b/ci-operator/config/medik8s/system-tests/medik8s-system-tests-main__4.22-konflux.yaml index 5589757708116..1e146b7d5edf1 100644 --- a/ci-operator/config/medik8s/system-tests/medik8s-system-tests-main__4.22-konflux.yaml +++ b/ci-operator/config/medik8s/system-tests/medik8s-system-tests-main__4.22-konflux.yaml @@ -3,6 +3,10 @@ base_images: name: "4.22" namespace: ocp tag: aws-efs-csi-operator-create-efs + hypershift-operator: + name: hypershift-operator + namespace: hypershift + tag: latest upi-installer: name: "4.22" namespace: ocp @@ -199,6 +203,78 @@ tests: cpu: 100m memory: 200Mi workflow: ipi-aws +- always_run: false + as: e2e-sbr-hypershift-aws-odf + capabilities: + - intranet + optional: true + steps: + cluster_profile: medik8s-aws + env: + ECO_TEST_FEATURES: sbr-operator + HYPERSHIFT_INSTANCE_TYPE: m5.4xlarge + HYPERSHIFT_NODE_COUNT: "3" + OCP_VERSION: "422" + ODF_OPERATOR_SUB_CHANNEL: stable-4.21 + OO_CHANNEL: stable + OPERATORS: storage-based-remediation + SC_WAIT_TIMEOUT: 10m + SKIP_IDMS: "true" + test: + - ref: medik8s-sbr-hypershift-switch-kubeconfig + - ref: medik8s-sbr-hypershift-wait-nodes + - ref: medik8s-catalogsource + - ref: medik8s-operator-subscribe + - ref: odf-prepare-cluster + - ref: operatorhub-subscribe-odf-operator + - ref: odf-apply-storage-cluster + - as: e2e-test + cli: latest + commands: make run-tests + env: + - name: ECO_TEST_FEATURES + from: src + resources: + requests: + cpu: 100m + memory: 200Mi + workflow: medik8s-sbr-hypershift +- always_run: false + as: e2e-sbr-hypershift-persistent-aws-odf + capabilities: + - intranet + optional: true + steps: + cluster_profile: hypershift-aws + env: + ECO_TEST_FEATURES: sbr-operator + HYPERSHIFT_INSTANCE_TYPE: m5.4xlarge + HYPERSHIFT_NODE_COUNT: "3" + OCP_VERSION: "422" + ODF_OPERATOR_SUB_CHANNEL: stable-4.21 + OO_CHANNEL: stable + OPERATORS: storage-based-remediation + SC_WAIT_TIMEOUT: 10m + SKIP_IDMS: "true" + test: + - ref: medik8s-sbr-hypershift-switch-kubeconfig + - ref: medik8s-sbr-hypershift-wait-nodes + - ref: medik8s-catalogsource + - ref: medik8s-operator-subscribe + - ref: odf-prepare-cluster + - ref: operatorhub-subscribe-odf-operator + - ref: odf-apply-storage-cluster + - as: e2e-test + cli: latest + commands: make run-tests + env: + - name: ECO_TEST_FEATURES + from: src + resources: + requests: + cpu: 100m + memory: 200Mi + workflow: medik8s-sbr-hypershift-persistent - always_run: false as: e2e-snr-aws capabilities: diff --git a/ci-operator/jobs/medik8s/system-tests/medik8s-system-tests-main-presubmits.yaml b/ci-operator/jobs/medik8s/system-tests/medik8s-system-tests-main-presubmits.yaml index 50b661ace140a..af7eca991383c 100644 --- a/ci-operator/jobs/medik8s/system-tests/medik8s-system-tests-main-presubmits.yaml +++ b/ci-operator/jobs/medik8s/system-tests/medik8s-system-tests-main-presubmits.yaml @@ -528,6 +528,182 @@ presubmits: secret: secretName: result-aggregator trigger: (?m)^/test( | .* )4.22-konflux-e2e-sbr-aws-odf,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build05 + context: ci/prow/4.22-konflux-e2e-sbr-hypershift-aws-odf + decorate: true + decoration_config: + sparse_checkout_files: + - .ci-operator.yaml + labels: + capability/intranet: intranet + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: medik8s-aws + ci-operator.openshift.io/variant: 4.22-konflux + ci.openshift.io/generator: prowgen + job-release: "4.22" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-medik8s-system-tests-main-4.22-konflux-e2e-sbr-hypershift-aws-odf + optional: true + rerun_command: /test 4.22-konflux-e2e-sbr-hypershift-aws-odf + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=e2e-sbr-hypershift-aws-odf + - --variant=4.22-konflux + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )4.22-konflux-e2e-sbr-hypershift-aws-odf,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build05 + context: ci/prow/4.22-konflux-e2e-sbr-hypershift-persistent-aws-odf + decorate: true + decoration_config: + sparse_checkout_files: + - .ci-operator.yaml + labels: + capability/intranet: intranet + ci-operator.openshift.io/cloud: hypershift-aws + ci-operator.openshift.io/cloud-cluster-profile: hypershift-aws + ci-operator.openshift.io/variant: 4.22-konflux + ci.openshift.io/generator: prowgen + job-release: "4.22" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-medik8s-system-tests-main-4.22-konflux-e2e-sbr-hypershift-persistent-aws-odf + optional: true + rerun_command: /test 4.22-konflux-e2e-sbr-hypershift-persistent-aws-odf + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=e2e-sbr-hypershift-persistent-aws-odf + - --variant=4.22-konflux + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )4.22-konflux-e2e-sbr-hypershift-persistent-aws-odf,?($|\s.*) - agent: kubernetes always_run: false branches: diff --git a/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-commands.sh b/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-commands.sh index 58d8ba96bc9a1..95aff933824d6 100644 --- a/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-commands.sh +++ b/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-commands.sh @@ -14,6 +14,7 @@ declare CATALOG_MODE="${CATALOG_MODE:-konflux}" declare CATALOG_SOURCE_NAME="${CATALOG_SOURCE_NAME:-medik8s-catalog}" declare CATALOG_IMAGE="" declare IDMS_NAME="${IDMS_NAME:-medik8s-konflux}" +declare SKIP_IDMS="${SKIP_IDMS:-false}" declare OCP_VERSION="${OCP_VERSION:-}" declare FBC_COMMIT_SHA="${FBC_COMMIT_SHA:-}" declare FBC_SHA_PINNED="${FBC_COMMIT_SHA:+true}" @@ -267,7 +268,11 @@ main() { fi resolve_commit_sha verify_fbc_image - apply_idms + if [[ "${SKIP_IDMS}" == "true" ]]; then + log "Skipping IDMS (SKIP_IDMS=true) — cluster can pull from quay.io directly" + else + apply_idms + fi CATALOG_IMAGE="${FBC_IMAGE_REPO}/${FBC_IMAGE_PREFIX}-${OCP_VERSION}:${FBC_COMMIT_SHA}" fi diff --git a/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-ref.yaml b/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-ref.yaml index 13281772bef9e..96b964f1369f8 100644 --- a/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-ref.yaml +++ b/ci-operator/step-registry/medik8s/catalogsource/medik8s-catalogsource-ref.yaml @@ -43,6 +43,13 @@ ref: - name: IDMS_NAME documentation: The name of the ImageDigestMirrorSet to create. Only used in konflux mode. default: "medik8s-konflux" + - name: SKIP_IDMS + documentation: |- + When "true", skip applying the ImageDigestMirrorSet in konflux mode. + Required on HyperShift hosted clusters where IDMS must go through the HostedCluster + object and the ValidatingAdmissionPolicy blocks direct application. + Safe to use on connected clusters where the FBC image is reachable from quay.io directly. + default: "false" documentation: |- Creates a CatalogSource for medik8s/RHWA operator testing. Two modes: "konflux" (default) resolves Konflux FBC artifacts from Quay with IDMS diff --git a/ci-operator/step-registry/medik8s/sbr/OWNERS b/ci-operator/step-registry/medik8s/sbr/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.metadata.json new file mode 100644 index 0000000000000..5ca32ce9f859c --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.yaml new file mode 100644 index 0000000000000..179efdf5d84f2 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift-persistent/medik8s-sbr-hypershift-persistent-workflow.yaml @@ -0,0 +1,17 @@ +workflow: + as: medik8s-sbr-hypershift-persistent + documentation: |- + HyperShift AWS cluster lifecycle for medik8s SBR e2e tests using the + shared persistent management cluster (hypershift-aws cluster profile). + Skips management cluster provisioning and teardown — only the HostedCluster + is created and destroyed per run. Requires cluster_profile: hypershift-aws. + steps: + pre: + - ref: ipi-install-rbac + - chain: hypershift-setup-root-management-cluster + - chain: hypershift-aws-create + - ref: medik8s-sbr-hypershift-apply-image-sources + post: + - ref: medik8s-sbr-hypershift-restore-kubeconfig + - chain: hypershift-dump + - chain: hypershift-aws-destroy diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-commands.sh b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-commands.sh new file mode 100644 index 0000000000000..100ab53488bbc --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-commands.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -eu -o pipefail + +declare GIT_REF="${GIT_REF:-main}" +declare FBC_COMMIT_SHA="${FBC_COMMIT_SHA:-}" +declare GITLAB_PROJECT="dragonfly%2Frhwa-fbc" +declare GITLAB_API="https://gitlab.cee.redhat.com/api/v4" +declare GITLAB_RAW="https://gitlab.cee.redhat.com/dragonfly/rhwa-fbc/-/raw" + +log() { echo "[$(date --utc +%FT%T.%3NZ)] $*"; } + +# Resolve FBC commit SHA if not provided +if [[ -z "$FBC_COMMIT_SHA" ]]; then + encoded_ref=$(jq -rn --arg ref "$GIT_REF" '$ref | @uri') + FBC_COMMIT_SHA=$(curl --insecure -sSf --retry 3 --retry-delay 2 --connect-timeout 10 --max-time 30 \ + "${GITLAB_API}/projects/${GITLAB_PROJECT}/repository/commits/${encoded_ref}" | jq -r .id) + if [[ -z "$FBC_COMMIT_SHA" || "$FBC_COMMIT_SHA" == "null" ]]; then + echo "ERROR: failed to resolve FBC commit SHA for ref '${GIT_REF}' (got: '${FBC_COMMIT_SHA}')" + exit 1 + fi + log "Resolved FBC_COMMIT_SHA: ${FBC_COMMIT_SHA}" +else + log "Using provided FBC_COMMIT_SHA: ${FBC_COMMIT_SHA}" +fi + +# Fetch IDMS yaml from rhwa-fbc +idms_file=$(mktemp) +# --insecure: gitlab.cee uses internal RH CA not trusted by CI pods +curl --insecure -sSf --retry 3 --retry-delay 2 --connect-timeout 10 --max-time 60 \ + "${GITLAB_RAW}/${FBC_COMMIT_SHA}/.tekton/images-mirror-set.yaml" -o "$idms_file" +log "Fetched IDMS from rhwa-fbc commit ${FBC_COMMIT_SHA}" + +# Convert imageDigestMirrors → imageContentSources (same structure, drop mirrorSourcePolicy). +# Use yq to convert YAML→JSON then jq to reshape, avoiding yq version compatibility issues. +image_content_sources=$(yq-v4 -o=json '.' "$idms_file" | \ + jq '[.spec.imageDigestMirrors[] | {source: .source, mirrors: .mirrors}]') +log "Extracted $(echo "$image_content_sources" | jq 'length') image mirror entries" + +# HostedCluster name written by hypershift-aws-create; namespace is always "clusters" +HC_NAME="$(cat "${SHARED_DIR}/cluster-name")" +HC_NAMESPACE="clusters" + +log "Patching HostedCluster ${HC_NAMESPACE}/${HC_NAME} with imageContentSources..." +oc patch hostedcluster "${HC_NAME}" -n "${HC_NAMESPACE}" \ + --type=merge \ + --patch "{\"spec\":{\"imageContentSources\":${image_content_sources}}}" +log "Patch applied — waiting for HyperShift NodePool to finish rolling out new nodes (up to 20m)..." + +# HyperShift hosted clusters have no MachineConfigPool objects; node rotation is driven +# by the NodePool controller on the management cluster. Wait for all NodePools belonging +# to this HostedCluster to report AllNodesHealthy. +# Use the label selector instead of the name: when --zones is passed to +# "hypershift create cluster aws", NodePools are named "${cluster}-${zone}", not "${cluster}". +oc wait nodepool -n "${HC_NAMESPACE}" \ + -l "hypershift.openshift.io/auto-created-for-infra=${HC_NAME}" \ + --for=condition=AllNodesHealthy --timeout=20m || { + log "ERROR: NodePool(s) did not reach AllNodesHealthy in 20m" + oc get nodepool -n "${HC_NAMESPACE}" -l "hypershift.openshift.io/auto-created-for-infra=${HC_NAME}" -o wide || true + exit 1 +} + +log "Image content sources active on hosted cluster workers" diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.metadata.json new file mode 100644 index 0000000000000..6d54fd20f9c6d --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.yaml new file mode 100644 index 0000000000000..df2b602b49a3f --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/apply-image-sources/medik8s-sbr-hypershift-apply-image-sources-ref.yaml @@ -0,0 +1,24 @@ +ref: + as: medik8s-sbr-hypershift-apply-image-sources + from: upi-installer + cli: latest + commands: medik8s-sbr-hypershift-apply-image-sources-commands.sh + resources: + requests: + cpu: 100m + memory: 100Mi + timeout: 25m0s + env: + - name: GIT_REF + default: "main" + documentation: Branch or git ref to resolve the rhwa-fbc FBC commit from. + - name: FBC_COMMIT_SHA + default: "" + documentation: Pin to a specific rhwa-fbc commit SHA. If empty, resolves latest from GIT_REF. + documentation: |- + Patches the HyperShift HostedCluster with imageContentSources derived from the + rhwa-fbc ImageDigestMirrorSet, so hosted cluster workers can pull Konflux operator + bundle images via the internal registry mirror instead of quay.io directly. + Runs against the management cluster kubeconfig (before the kubeconfig switch step). + Waits up to 20m for the HyperShift NodePool to reach AllNodesHealthy, confirming that + all worker nodes have been rotated with the new imageContentSources config. diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.metadata.json new file mode 100644 index 0000000000000..2b6e8c23df3c6 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.yaml new file mode 100644 index 0000000000000..e289808e7e5f8 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/medik8s-sbr-hypershift-workflow.yaml @@ -0,0 +1,20 @@ +workflow: + as: medik8s-sbr-hypershift + documentation: |- + HyperShift AWS cluster lifecycle for medik8s SBR e2e tests. + Provisions a nested management cluster + hosted cluster (3x m5.4xlarge workers) + and tears them down after tests. Equivalent to hypershift-optional-operators + minus the optional-operators-operator-sdk pre step; medik8s uses its own + operator installation refs in the test phase. + steps: + pre: + - ref: ipi-install-rbac + - chain: hypershift-setup-nested-management-cluster + - ref: hypershift-install + - chain: hypershift-aws-create + - ref: medik8s-sbr-hypershift-apply-image-sources + post: + - ref: medik8s-sbr-hypershift-restore-kubeconfig + - chain: hypershift-dump + - chain: hypershift-aws-destroy + - chain: hypershift-destroy-nested-management-cluster diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-commands.sh b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-commands.sh new file mode 100644 index 0000000000000..eb9265f8aadfc --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-commands.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -eu -o pipefail + +# Restore the management cluster kubeconfig saved by the switch-kubeconfig step. +# Must run as the first post step so hypershift-dump/destroy target the management cluster. +# Guard: management_kubeconfig is only written by switch-kubeconfig (test phase). +# If the test phase was never reached (pre-phase failure), skip silently so that +# downstream post steps (hypershift-dump, hypershift-aws-destroy) can still run. +if [[ -f "${SHARED_DIR}/management_kubeconfig" ]]; then + cp "${SHARED_DIR}/management_kubeconfig" "${SHARED_DIR}/kubeconfig" +else + echo "management_kubeconfig not found — kubeconfig switch was never made, no restore needed" +fi diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.metadata.json new file mode 100644 index 0000000000000..a7410252e6eac --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.yaml new file mode 100644 index 0000000000000..46b2626a1de3d --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/restore-kubeconfig/medik8s-sbr-hypershift-restore-kubeconfig-ref.yaml @@ -0,0 +1,12 @@ +ref: + as: medik8s-sbr-hypershift-restore-kubeconfig + from: cli + commands: medik8s-sbr-hypershift-restore-kubeconfig-commands.sh + resources: + requests: + cpu: 10m + memory: 50Mi + documentation: |- + Restores ${SHARED_DIR}/kubeconfig to the management cluster kubeconfig saved by + medik8s-sbr-hypershift-switch-kubeconfig. Run as the first post step so that + hypershift-dump and hypershift-aws-destroy target the management cluster. diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-commands.sh b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-commands.sh new file mode 100644 index 0000000000000..62c18759c6f25 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-commands.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -eu -o pipefail + +# Save management cluster kubeconfig so the restore step can put it back before +# the post phase runs (hypershift-dump/destroy need the management cluster). +cp "${SHARED_DIR}/kubeconfig" "${SHARED_DIR}/management_kubeconfig" + +# ci-operator points KUBECONFIG at ${SHARED_DIR}/kubeconfig for every step. +# Overwriting the file makes all subsequent test steps target the hosted cluster. +cp "${SHARED_DIR}/nested_kubeconfig" "${SHARED_DIR}/kubeconfig" diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.metadata.json new file mode 100644 index 0000000000000..64eca57103e64 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.yaml new file mode 100644 index 0000000000000..d2d25fb8b6874 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/switch-kubeconfig/medik8s-sbr-hypershift-switch-kubeconfig-ref.yaml @@ -0,0 +1,12 @@ +ref: + as: medik8s-sbr-hypershift-switch-kubeconfig + from: cli + commands: medik8s-sbr-hypershift-switch-kubeconfig-commands.sh + resources: + requests: + cpu: 10m + memory: 50Mi + documentation: |- + Switches KUBECONFIG from the HyperShift management cluster to the hosted + cluster by overwriting ${SHARED_DIR}/kubeconfig with nested_kubeconfig. + Run first in the test phase so all subsequent refs target the hosted cluster. diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/OWNERS b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/OWNERS new file mode 100644 index 0000000000000..820604a7bc5e3 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/OWNERS @@ -0,0 +1,16 @@ +approvers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener +reviewers: + - beekhof + - clobrano + - maximunited + - mshitrit + - razo7 + - slintes + - ugreener diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-commands.sh b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-commands.sh new file mode 100644 index 0000000000000..4ae3e63ddb592 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-commands.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -eu -o pipefail + +# apply-image-sources patches the HostedCluster imageContentSources, which triggers +# the Hypershift NodePool controller to rotate all worker nodes. The MCP "Updated" +# wait in that step can complete while rotation is still in progress, because MCO and +# the NodePool controller operate independently. Rotating nodes lose their ODF labels, +# so odf-prepare-cluster must not run until all nodes have finished their revision update. +# +# This step waits for: +# 1. All expected worker nodes to be Ready. +# 2. No node to carry the node.cluster.x-k8s.io/outdated-revision taint (rotation done). + +declare EXPECTED_NODES="${HYPERSHIFT_NODE_COUNT:-3}" +declare TIMEOUT=1200 # 20 minutes +declare INTERVAL=15 + +log() { echo "[$(date --utc +%FT%T.%3NZ)] $*"; } + +if ! [[ "${EXPECTED_NODES}" =~ ^[0-9]+$ ]] || [[ "${EXPECTED_NODES}" -lt 1 ]]; then + log "ERROR: HYPERSHIFT_NODE_COUNT must be a positive integer (got: ${EXPECTED_NODES})" + exit 1 +fi + +log "Waiting for ${EXPECTED_NODES} node(s) to be Ready (timeout 15m)..." +oc wait nodes --all --for=condition=Ready --timeout=15m + +log "Waiting for NodePool rotation to complete (no outdated-revision taint, timeout ${TIMEOUT}s)..." +elapsed=0 +while true; do + outdated=$(oc get nodes -o json | \ + jq '[.items[] | select((.spec.taints // []) | map(.key) | contains(["node.cluster.x-k8s.io/outdated-revision"]))] | length') + if [[ "$outdated" -eq 0 ]]; then + log "All nodes are at current revision — NodePool rotation complete" + break + fi + log "${outdated} node(s) still have outdated-revision taint (${elapsed}s elapsed)..." + if [[ $elapsed -ge $TIMEOUT ]]; then + log "ERROR: Timed out after ${TIMEOUT}s waiting for NodePool rotation" + oc get nodes -o wide + oc get nodes -o json | jq '.items[] | {name: .metadata.name, taints: .spec.taints}' + exit 1 + fi + sleep "${INTERVAL}" + elapsed=$((elapsed + INTERVAL)) +done + +ready_count=$(oc get nodes --no-headers 2>/dev/null | grep -c ' Ready ' || true) +log "${ready_count} node(s) Ready and at current revision" + +if [[ "$ready_count" -lt "$EXPECTED_NODES" ]]; then + log "ERROR: expected ${EXPECTED_NODES} Ready node(s) but found ${ready_count}" + oc get nodes -o wide + exit 1 +fi diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.metadata.json b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.metadata.json new file mode 100644 index 0000000000000..22d06240691e8 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.yaml", + "owners": { + "approvers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ], + "reviewers": [ + "beekhof", + "clobrano", + "maximunited", + "mshitrit", + "razo7", + "slintes", + "ugreener" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.yaml b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.yaml new file mode 100644 index 0000000000000..dfb93285e95d7 --- /dev/null +++ b/ci-operator/step-registry/medik8s/sbr/hypershift/wait-nodes/medik8s-sbr-hypershift-wait-nodes-ref.yaml @@ -0,0 +1,30 @@ +ref: + as: medik8s-sbr-hypershift-wait-nodes + from_image: + namespace: ocp + name: cli-jq + tag: latest + commands: medik8s-sbr-hypershift-wait-nodes-commands.sh + resources: + requests: + cpu: 10m + memory: 50Mi + timeout: 25m0s + env: + - name: HYPERSHIFT_NODE_COUNT + default: "3" + documentation: Expected number of hosted cluster worker nodes; inherited from the job env. + documentation: |- + Waits for the HyperShift hosted cluster worker nodes to finish their NodePool + revision rotation before ODF node labeling begins. + + The medik8s-sbr-hypershift-apply-image-sources step patches the HostedCluster + imageContentSources, triggering the NodePool controller to replace all worker + nodes. The MCP "Updated" wait in that step can complete while rotation is still + in progress (MCO and the NodePool controller operate independently). Running + odf-prepare-cluster while nodes are being replaced causes the ODF labels to be + applied to nodes that are immediately deleted, leaving the replacement nodes + unlabeled and making the Ceph mon pods unschedulable. + + This step must run (against the hosted cluster kubeconfig) immediately after + medik8s-sbr-hypershift-switch-kubeconfig and before odf-prepare-cluster. diff --git a/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-commands.sh b/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-commands.sh index b41bc20c2560a..9dc31b76fa89a 100644 --- a/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-commands.sh +++ b/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-commands.sh @@ -41,8 +41,24 @@ EOF sleep 60 echo "⏳ Wait for StorageCluster to be deployed" -oc wait "storagecluster.ocs.openshift.io/ocs-storagecluster" \ - -n $ODF_INSTALL_NAMESPACE --for=condition='Available' --timeout='10m' +if ! oc wait "storagecluster.ocs.openshift.io/ocs-storagecluster" \ + -n "$ODF_INSTALL_NAMESPACE" --for=condition='Available' --timeout="${SC_WAIT_TIMEOUT:-10m}"; then + echo "StorageCluster Available condition not met within ${SC_WAIT_TIMEOUT:-10m}; falling back to OSD readiness check" + # On HyperShift, OCSInitialization owner-ref resolution fails in the API server, which + # prevents the Available condition from ever being set even when Ceph is healthy. + # Wait for all 3 OSD deployments to be Available as a proxy for storage readiness. + # StorageCluster spec: count=1, replica=3 → 3 OSD deployments expected. + expected_osd=3 + actual_osd=$(oc get deploy -n "$ODF_INSTALL_NAMESPACE" -l app=rook-ceph-osd \ + --no-headers 2>/dev/null | wc -l | tr -d ' ') + if [[ "$actual_osd" -lt "$expected_osd" ]]; then + echo "Expected ${expected_osd} OSD deployments, found ${actual_osd} — storage not fully provisioned" + exit 1 + fi + oc wait deploy -l app=rook-ceph-osd -n "$ODF_INSTALL_NAMESPACE" \ + --for=condition=Available --timeout="${SC_WAIT_TIMEOUT:-10m}" + echo "OSD deployments are Available; storage is ready" +fi echo "Remove is-default-class annotation from all the storage classes" oc get sc -o name | xargs -I{} oc annotate {} storageclass.kubernetes.io/is-default-class- diff --git a/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-ref.yaml b/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-ref.yaml index 925f4dbcfa378..8f0e22be5a38a 100644 --- a/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-ref.yaml +++ b/ci-operator/step-registry/odf/apply-storage-cluster/odf-apply-storage-cluster-ref.yaml @@ -18,5 +18,8 @@ ref: default: 100Gi - name: TEST_PLATFORM default: aws + - name: SC_WAIT_TIMEOUT + default: "10m" + documentation: Timeout for waiting for the StorageCluster to become Available. Increase for slower environments (e.g. HyperShift). documentation: |- Apply Storage Cluster in order to complete ODF operator installation \ No newline at end of file