From b108d233ed77f0376e62283ac1f823b453b170fa Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 01/16] [ci_gen_kustomize_values] Add multi-namespace SKMO scenario and playbooks Add support for Shared Keystone Multi-region OpenStack (SKMO) deployments with cross-region Barbican keystone listener: Playbooks (in hooks/playbooks/skmo/): - prepare-leaf.yaml: Pre-stage hook that creates a TransportURL CR in the central region for the leaf's barbican-keystone-listener, copies the generated secret to the leaf namespace, extracts rootca-internal CA cert from central and adds it to the leaf's custom-ca-certs bundle, and waits for central Keystone and openstackclient readiness with retry logic - configure-leaf-listener.yaml: Post-stage hook that patches the leaf OpenStackControlPlane with the cross-region transport_url for the barbican-keystone-listener - trust-leaf-ca.yaml: Post-stage hook that extracts the leaf region's rootca-public and rootca-internal CA certs and adds them to the central region's custom-ca-certs bundle - ensure-central-ca-bundle.yaml: Ensures the central CA bundle secret exists before the leaf control plane deployment Scenario: - va-multi-skmo.yml reproducer scenario configuration - multi-namespace-skmo architecture scenario symlink Co-Authored-By: Claude Signed-off-by: Ade Lee Made-with: Cursor --- .../skmo/configure-leaf-listener.yaml | 36 ++ .../skmo/ensure-central-ca-bundle.yaml | 28 ++ hooks/playbooks/skmo/prepare-leaf.yaml | 209 +++++++++ hooks/playbooks/skmo/trust-leaf-ca.yaml | 51 +++ .../templates/multi-namespace-skmo | 1 + scenarios/reproducers/va-multi-skmo.yml | 406 ++++++++++++++++++ 6 files changed, 731 insertions(+) create mode 100644 hooks/playbooks/skmo/configure-leaf-listener.yaml create mode 100644 hooks/playbooks/skmo/ensure-central-ca-bundle.yaml create mode 100644 hooks/playbooks/skmo/prepare-leaf.yaml create mode 100644 hooks/playbooks/skmo/trust-leaf-ca.yaml create mode 120000 roles/ci_gen_kustomize_values/templates/multi-namespace-skmo create mode 100644 scenarios/reproducers/va-multi-skmo.yml diff --git a/hooks/playbooks/skmo/configure-leaf-listener.yaml b/hooks/playbooks/skmo/configure-leaf-listener.yaml new file mode 100644 index 0000000000..2bfa8aee05 --- /dev/null +++ b/hooks/playbooks/skmo/configure-leaf-listener.yaml @@ -0,0 +1,36 @@ +--- +- name: Patch leaf control plane with barbican-keystone-listener transport URL + hosts: localhost + gather_facts: false + vars: + central_namespace: openstack + leaf_namespace: openstack2 + leaf_transport_url_name: rabbitmq-transport-url-barbican-keystone-listener-regiontwo + tasks: + - name: Get transport URL secret from central namespace + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ central_namespace }}" + name: "{{ leaf_transport_url_name }}" + register: _transport_secret + + - name: Patch OpenStackControlPlane in leaf region with notifications transport_url + vars: + _transport_url: "{{ _transport_secret.resources[0].data['transport_url'] | b64decode }}" + kubernetes.core.k8s: + state: patched + api_version: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + name: controlplane + namespace: "{{ leaf_namespace }}" + definition: + spec: + barbican: + template: + barbicanKeystoneListener: + customServiceConfig: | + [DEFAULT] + transport_url = {{ _transport_url }} + [keystone_notifications] + pool_name = barbican-listener-regionTwo diff --git a/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml b/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml new file mode 100644 index 0000000000..a37bccb458 --- /dev/null +++ b/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml @@ -0,0 +1,28 @@ +--- +- name: Ensure central control plane uses custom CA bundle + hosts: localhost + gather_facts: false + vars: + central_namespace: openstack + controlplane_name: controlplane + ca_bundle_secret_name: custom-ca-certs + tasks: + - name: Check current caBundleSecretName + ansible.builtin.shell: | + set -euo pipefail + oc -n {{ central_namespace }} get osctlplane {{ controlplane_name }} \ + -o jsonpath='{.spec.tls.caBundleSecretName}' + args: + executable: /bin/bash + register: ca_bundle_name + changed_when: false + failed_when: false + + - name: Patch control plane to use custom CA bundle when unset + ansible.builtin.shell: | + set -euo pipefail + oc -n {{ central_namespace }} patch osctlplane {{ controlplane_name }} \ + --type json -p '[{"op":"add","path":"/spec/tls","value":{}},{"op":"add","path":"/spec/tls/caBundleSecretName","value":"{{ ca_bundle_secret_name }}"}]' + args: + executable: /bin/bash + when: ca_bundle_name.stdout | trim == "" diff --git a/hooks/playbooks/skmo/prepare-leaf.yaml b/hooks/playbooks/skmo/prepare-leaf.yaml new file mode 100644 index 0000000000..d32fe2457a --- /dev/null +++ b/hooks/playbooks/skmo/prepare-leaf.yaml @@ -0,0 +1,209 @@ +--- +- name: Prepare SKMO leaf prerequisites in regionZero + hosts: localhost + gather_facts: false + vars: + skmo_values_file: "{{ cifmw_architecture_repo }}/examples/va/multi-namespace-skmo/control-plane2/skmo-values.yaml" + osp_secrets_env_file: "{{ cifmw_architecture_repo }}/lib/control-plane/base/osp-secrets.env" + central_namespace: openstack + leaf_namespace: openstack2 + leaf_secret_name: osp-secret + central_rootca_secret: rootca-public + central_rootca_internal_secret: rootca-internal + leaf_transport_url_name: barbican-keystone-listener-regiontwo + leaf_transport_url_name_secret: rabbitmq-transport-url-barbican-keystone-listener-regiontwo + leaf_transport_url_secret_copy: barbican-keystone-listener-regiontwo-transport + tasks: + - name: Wait for central Keystone API to be ready + kubernetes.core.k8s_info: + api_version: keystone.openstack.org/v1beta1 + kind: KeystoneAPI + namespace: "{{ central_namespace }}" + register: _keystoneapi_info + retries: 60 + delay: 10 + until: + - _keystoneapi_info.resources | length > 0 + - _keystoneapi_info.resources[0].status.conditions is defined + - _keystoneapi_info.resources[0].status.conditions | + selectattr('type', 'equalto', 'Ready') | + selectattr('status', 'equalto', 'True') | list | length > 0 + + - name: Wait for openstackclient pod to be ready in central region + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ central_namespace }}" + name: openstackclient + register: _osc_pod_info + retries: 30 + delay: 10 + until: + - _osc_pod_info.resources | length > 0 + - _osc_pod_info.resources[0].status.conditions is defined + - _osc_pod_info.resources[0].status.conditions | + selectattr('type', 'equalto', 'Ready') | + selectattr('status', 'equalto', 'True') | list | length > 0 + + - name: Load SKMO values + ansible.builtin.set_fact: + skmo_values: "{{ lookup('file', skmo_values_file) | from_yaml }}" + + - name: Set SKMO leaf facts + ansible.builtin.set_fact: + leaf_region: "{{ skmo_values.data.leafRegion }}" + leaf_admin_user: "{{ skmo_values.data.leafAdminUser }}" + leaf_admin_project: "{{ skmo_values.data.leafAdminProject }}" + leaf_admin_password_key: "{{ skmo_values.data.leafAdminPasswordKey }}" + keystone_internal_url: "{{ skmo_values.data.keystoneInternalURL }}" + keystone_public_url: "{{ skmo_values.data.keystonePublicURL }}" + ca_bundle_secret_name: "{{ skmo_values.data.leafCaBundleSecretName }}" + + - name: Ensure leaf osp-secret exists (pre-create from env file) + ansible.builtin.shell: | + set -euo pipefail + if ! oc -n {{ leaf_namespace }} get secret {{ leaf_secret_name }} >/dev/null 2>&1; then + oc -n {{ leaf_namespace }} create secret generic {{ leaf_secret_name }} \ + --from-env-file="{{ osp_secrets_env_file }}" \ + --dry-run=client -o yaml | oc apply -f - + fi + args: + executable: /bin/bash + + - name: Read leaf admin password from leaf secret + ansible.builtin.shell: | + set -euo pipefail + oc -n {{ leaf_namespace }} get secret {{ leaf_secret_name }} \ + -o jsonpath='{.data.{{ leaf_admin_password_key }}}' | base64 -d + args: + executable: /bin/bash + register: leaf_admin_password + changed_when: false + + - name: Ensure leaf region exists in central Keystone + ansible.builtin.shell: | + set -euo pipefail + oc -n {{ central_namespace }} rsh openstackclient \ + openstack region show {{ leaf_region }} >/dev/null 2>&1 || \ + oc -n {{ central_namespace }} rsh openstackclient \ + openstack region create {{ leaf_region }} + args: + executable: /bin/bash + + - name: Ensure keystone catalog endpoints exist for leaf region + ansible.builtin.shell: | + set -euo pipefail + if ! oc -n {{ central_namespace }} rsh openstackclient \ + openstack endpoint list --service keystone --interface public --region {{ leaf_region }} \ + -f value -c ID | head -1 | grep -q .; then + oc -n {{ central_namespace }} rsh openstackclient \ + openstack endpoint create --region {{ leaf_region }} identity public "{{ keystone_public_url }}" + fi + if ! oc -n {{ central_namespace }} rsh openstackclient \ + openstack endpoint list --service keystone --interface internal --region {{ leaf_region }} \ + -f value -c ID | head -1 | grep -q .; then + oc -n {{ central_namespace }} rsh openstackclient \ + openstack endpoint create --region {{ leaf_region }} identity internal "{{ keystone_internal_url }}" + fi + args: + executable: /bin/bash + + - name: Ensure leaf admin project exists in central Keystone + ansible.builtin.shell: | + set -euo pipefail + oc -n {{ central_namespace }} rsh openstackclient \ + openstack project show {{ leaf_admin_project }} >/dev/null 2>&1 || \ + oc -n {{ central_namespace }} rsh openstackclient \ + openstack project create {{ leaf_admin_project }} + args: + executable: /bin/bash + + - name: Ensure leaf admin user exists and has admin role + ansible.builtin.shell: | + set -euo pipefail + if ! oc -n {{ central_namespace }} rsh openstackclient \ + openstack user show {{ leaf_admin_user }} >/dev/null 2>&1; then + oc -n {{ central_namespace }} rsh openstackclient \ + openstack user create --domain Default --password "{{ leaf_admin_password.stdout | trim }}" {{ leaf_admin_user }} + fi + oc -n {{ central_namespace }} rsh openstackclient \ + openstack role add --project {{ leaf_admin_project }} --user {{ leaf_admin_user }} admin + args: + executable: /bin/bash + + - name: Get existing leaf CA bundle secret if present + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ leaf_namespace }}" + name: "{{ ca_bundle_secret_name }}" + register: _existing_bundle + + - name: Get central rootca certs + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ central_namespace }}" + name: "{{ item }}" + register: _central_certs + loop: + - "{{ central_rootca_secret }}" + - "{{ central_rootca_internal_secret }}" + + - name: Create or update leaf CA bundle secret + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ ca_bundle_secret_name }}" + namespace: "{{ leaf_namespace }}" + data: "{{ (_existing_bundle.resources[0].data | default({})) | combine({ + 'skmo-central-rootca.crt': _central_certs.results[0].resources[0].data['tls.crt'], + 'skmo-central-rootca-internal.crt': _central_certs.results[1].resources[0].data['tls.crt'] + }) }}" + + - name: Create TransportURL CR in central region for leaf listener + ansible.builtin.shell: | + set -euo pipefail + oc apply -f - < + osp_trunk + + + + + + + osptrunk2: | + + osptrunk2 + + + + + + + ocpbm: | + + ocpbm + + + + + + + ocppr: | + + ocppr + + + + vms: + ocp: + amount: 3 + admin_user: core + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "ocp_master" + disksize: "100" + extra_disks_num: 3 + extra_disks_size: "50G" + cpus: 16 + memory: 32 + root_part_id: 4 + uefi: true + nets: + - ocppr + - ocpbm + - osp_trunk # ctlplane and isolated networks for openstack namespace cloud + - osptrunk2 # ctlplane and isolated networks for openstack2 namespace cloud + - osp_trunk # OVN datacentre for openstack namespace cloud + - osptrunk2 # OVN datacentre for openstack2 namespace cloud + compute: + uefi: "{{ cifmw_use_uefi }}" + root_part_id: "{{ cifmw_root_partition_id }}" + amount: "{{ [cifmw_libvirt_manager_compute_amount|int, 2] | max }}" + image_url: "{{ cifmw_discovered_image_url }}" + sha256_image_name: "{{ cifmw_discovered_hash }}" + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "base-os.qcow2" + disksize: "{{ [cifmw_libvirt_manager_compute_disksize|int, 50] | max }}" + memory: "{{ [cifmw_libvirt_manager_compute_memory|int, 8] | max }}" + cpus: "{{ [cifmw_libvirt_manager_compute_cpus|int, 4] | max }}" + nets: + - ocpbm + - osp_trunk + compute2: + uefi: "{{ cifmw_use_uefi }}" + root_part_id: "{{ cifmw_root_partition_id }}" + amount: "{{ [cifmw_libvirt_manager_compute_amount|int, 2] | max }}" + image_url: "{{ cifmw_discovered_image_url }}" + sha256_image_name: "{{ cifmw_discovered_hash }}" + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "base-os.qcow2" + disksize: "{{ [cifmw_libvirt_manager_compute_disksize|int, 50] | max }}" + memory: "{{ [cifmw_libvirt_manager_compute_memory|int, 8] | max }}" + cpus: "{{ [cifmw_libvirt_manager_compute_cpus|int, 4] | max }}" + nets: + - ocpbm + - osptrunk2 + controller: + uefi: "{{ cifmw_use_uefi }}" + root_part_id: "{{ cifmw_root_partition_id }}" + image_url: "{{ cifmw_discovered_image_url }}" + sha256_image_name: "{{ cifmw_discovered_hash }}" + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "base-os.qcow2" + disksize: 50 + memory: 8 + cpus: 4 + nets: + - ocpbm + - osp_trunk + - osptrunk2 + +## devscript support for OCP deploy +cifmw_devscripts_config_overrides: + fips_mode: "{{ cifmw_fips_enabled | default(false) | bool }}" + +# Set Logical Volume Manager Storage by default for local storage +cifmw_use_lvms: true +cifmw_lvms_disk_list: + - /dev/vda + - /dev/vdb + - /dev/vdc + +cifmw_networking_definition: + networks: + ctlplane: + network: "192.168.122.0/24" + gateway: "192.168.122.1" + dns: + - "192.168.122.1" + mtu: 1500 + tools: + multus: + ranges: + - start: 30 + end: 70 + netconfig: + ranges: + - start: 100 + end: 120 + - start: 150 + end: 170 + metallb: + ranges: + - start: 80 + end: 90 + ctlplane2: + network: "192.168.133.0/24" + gateway: "192.168.133.1" + dns: + - "192.168.133.1" + mtu: 1500 + tools: + multus: + ranges: + - start: 30 + end: 70 + netconfig: + ranges: + - start: 100 + end: 120 + - start: 150 + end: 170 + metallb: + ranges: + - start: 80 + end: 90 + internalapi: + network: "172.17.0.0/24" + vlan: 20 + mtu: 1496 + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + internalapi2: + network: "172.17.10.0/24" + vlan: 30 + mtu: 1496 + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + storage: + network: "172.18.0.0/24" + vlan: 21 + mtu: 1496 + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + storage2: + network: "172.18.10.0/24" + vlan: 31 + mtu: 1496 + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + tenant: + network: "172.19.0.0/24" + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + vlan: 22 + mtu: 1496 + tenant2: + network: "172.19.10.0/24" + tools: + metallb: + ranges: + - start: 80 + end: 90 + netconfig: + ranges: + - start: 100 + end: 250 + multus: + ranges: + - start: 30 + end: 70 + vlan: 32 + mtu: 1496 + external: + network: "10.0.0.0/24" + tools: + netconfig: + ranges: + - start: 100 + end: 250 + vlan: 22 + mtu: 1500 + external2: + network: "10.10.0.0/24" + tools: + netconfig: + ranges: + - start: 100 + end: 250 + vlan: 32 + mtu: 1500 + + group-templates: + ocps: + network-template: + range: + start: 10 + length: 10 + networks: &ocps_nets + ctlplane: {} + internalapi: + trunk-parent: ctlplane + tenant: + trunk-parent: ctlplane + storage: + trunk-parent: ctlplane + ctlplane2: {} + internalapi2: + trunk-parent: ctlplane2 + tenant2: + trunk-parent: ctlplane2 + storage2: + trunk-parent: ctlplane2 + ocp_workers: + network-template: + range: + start: 20 + length: 10 + networks: *ocps_nets + computes: + network-template: + range: + start: 100 + length: 21 + networks: + ctlplane: {} + internalapi: + trunk-parent: ctlplane + tenant: + trunk-parent: ctlplane + storage: + trunk-parent: ctlplane + compute2s: + network-template: + range: + start: 200 + length: 21 + networks: + ctlplane2: {} + internalapi2: + trunk-parent: ctlplane2 + tenant2: + trunk-parent: ctlplane2 + storage2: + trunk-parent: ctlplane2 + instances: + controller-0: + networks: + ctlplane: + ip: "192.168.122.9" + ctlplane2: + ip: "192.168.133.9" + +# Hooks +post_deploy: + - name: Discover hypervisors for openstack2 namespace + type: playbook + source: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/ci-framework/hooks/playbooks/nova_manage_discover_hosts.yml" + extra_vars: + namespace: openstack2 + _cell_conductors: nova-cell0-conductor-0 + +pre_admin_setup: + - name: Prepare OSP networks in openstack2 namespace + type: playbook + source: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/ci-framework/playbooks/multi-namespace/ns2_osp_networks.yaml" + extra_vars: + cifmw_os_net_setup_namespace: openstack2 + cifmw_os_net_setup_public_cidr: "192.168.133.0/24" + cifmw_os_net_setup_public_start: "192.168.133.230" + cifmw_os_net_setup_public_end: "192.168.133.250" + cifmw_os_net_setup_public_gateway: "192.168.133.1" + +post_tests: + - name: Run tempest against openstack2 namespace + type: playbook + source: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/ci-framework/playbooks/multi-namespace/ns2_validation.yaml" + extra_vars: + cifmw_test_operator_tempest_name: tempest-tests2 + cifmw_test_operator_namespace: openstack2 From e55683601ab8ca2aaf08e79bd4293bc27dcd14a3 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 02/16] [ci_gen_kustomize_values] Add Cinder LVM backend support for multi-namespace SKMO scenario Add a 4th extra disk to OCP VMs in the SKMO reproducer and enable the devscripts MachineConfig-based cinder-volumes LVM VG setup: - extra_disks_num: 3 -> 4 to provide a dedicated disk (/dev/vdd) for Cinder - cifmw_devscripts_create_logical_volume: true to generate the MachineConfig that creates the cinder-volumes VG via a systemd unit at boot time - cifmw_devscripts_cinder_volume_pvs: [/dev/vdd] to target the 4th disk - cifmw_devscripts_enable_iscsi_on_ocp_nodes: true to enable iscsid on OCP nodes (required for the iSCSI target created by cinder-volume) LVMS continues to use the original three disks (/dev/vda, /dev/vdb, /dev/vdc). Co-Authored-By: Claude Signed-off-by: Ade Lee --- scenarios/reproducers/va-multi-skmo.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scenarios/reproducers/va-multi-skmo.yml b/scenarios/reproducers/va-multi-skmo.yml index 86ad2007b6..6b19e135c5 100644 --- a/scenarios/reproducers/va-multi-skmo.yml +++ b/scenarios/reproducers/va-multi-skmo.yml @@ -84,7 +84,7 @@ cifmw_libvirt_manager_configuration: image_local_dir: "{{ cifmw_basedir }}/images/" disk_file_name: "ocp_master" disksize: "100" - extra_disks_num: 3 + extra_disks_num: 4 extra_disks_size: "50G" cpus: 16 memory: 32 @@ -151,6 +151,12 @@ cifmw_lvms_disk_list: - /dev/vdb - /dev/vdc +# /dev/vdd is reserved for Cinder LVM backend (set up via MachineConfig at install time) +cifmw_devscripts_create_logical_volume: true +cifmw_devscripts_cinder_volume_pvs: + - /dev/vdd +cifmw_devscripts_enable_iscsi_on_ocp_nodes: true + cifmw_networking_definition: networks: ctlplane: From 3802ea31586c7ea88de55e9fce6a06eac3d8ef60 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 03/16] [federation] Support merging Keycloak CA into an existing CA bundle secret Add a new variable cifmw_federation_ca_bundle_secret_name (default: "") to the federation role. When set, hook_controlplane_config.yml merges the Keycloak CA certificate as a new key (keycloak-ca.crt) into the named secret rather than creating a separate 'keycloakca' secret. If the named secret does not yet exist it is created automatically. In merge mode the kustomization patch omits the spec.tls.caBundleSecretName op-add, since the OpenStackControlPlane CR is assumed to already reference the correct secret (e.g. custom-ca-certs in SKMO deployments). When cifmw_federation_ca_bundle_secret_name is empty the original behaviour is preserved for backward compatibility: a dedicated 'keycloakca' secret is created and the kustomization patches spec.tls.caBundleSecretName to point at it. Signed-off-by: Ade Lee Co-Authored-By: Claude --- roles/federation/defaults/main.yml | 16 ++ .../tasks/hook_controlplane_config.yml | 151 +++++++++++++++--- 2 files changed, 146 insertions(+), 21 deletions(-) diff --git a/roles/federation/defaults/main.yml b/roles/federation/defaults/main.yml index acab89258d..bda9968883 100644 --- a/roles/federation/defaults/main.yml +++ b/roles/federation/defaults/main.yml @@ -39,6 +39,22 @@ cifmw_federation_keycloak_url_validate_certs: false # Deploy one realm by default. Add true to job vars for multirealm deploys. cifmw_federation_deploy_multirealm: false +# ============================================================================= +# CA CERTIFICATE HANDLING +# ============================================================================= +# When set to a non-empty string, the federation role will look for an existing +# Kubernetes Secret with this name in cifmw_federation_run_osp_cmd_namespace. +# If the secret exists, the Keycloak CA certificate is added as a new key +# (keycloak-ca.crt) without disturbing existing keys. If the secret does not +# exist it is created with just the Keycloak CA. In both cases the kustomization +# patch does NOT override spec.tls.caBundleSecretName, assuming the control plane +# CR already points to this secret. +# +# When left empty (the default) the original behaviour is preserved: a dedicated +# 'keycloakca' secret is created and the kustomization patch sets +# spec.tls.caBundleSecretName to 'keycloakca'. +cifmw_federation_ca_bundle_secret_name: "" + # ============================================================================= # KEYCLOAK TEST USERS AND GROUPS - REALM 1 # ============================================================================= diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index eadc6f01ed..00eaed6a2a 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -14,9 +14,135 @@ # License for the specific language governing permissions and limitations # under the License. -- name: Create file to customize keystone for Federation resources deployed in the control plane +# --------------------------------------------------------------------------- +# Step 1 – read the Keycloak CA cert written by federation-pre-deploy +# --------------------------------------------------------------------------- +- name: Get ingress operator CA cert + ansible.builtin.slurp: + src: "{{ [ansible_user_dir, 'ci-framework-data', 'tmp', 'ingress-operator-ca.crt'] | path_join }}" + register: federation_sso_ca + +# --------------------------------------------------------------------------- +# Step 2 – determine CA handling mode +# +# When cifmw_federation_ca_bundle_secret_name is set we merge the Keycloak CA +# into that secret (creating it if absent). The kustomization patch then +# does NOT touch spec.tls.caBundleSecretName because the control plane CR is +# assumed to already reference the correct secret (e.g. custom-ca-certs). +# +# When cifmw_federation_ca_bundle_secret_name is empty we fall back to the +# original behaviour: create a dedicated 'keycloakca' secret and patch +# spec.tls.caBundleSecretName to point at it. +# --------------------------------------------------------------------------- +- name: Set federation CA merge mode flag + ansible.builtin.set_fact: + _federation_merge_ca: "{{ cifmw_federation_ca_bundle_secret_name | length > 0 }}" + +- name: Look up existing CA bundle secret + when: _federation_merge_ca | bool + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + api_version: v1 + kind: Secret + name: "{{ cifmw_federation_ca_bundle_secret_name }}" + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + register: _federation_existing_ca_bundle + +- name: Capture existing CA bundle secret data + when: _federation_merge_ca | bool + ansible.builtin.set_fact: + _federation_ca_bundle_existing_data: >- + {{ + (_federation_existing_ca_bundle.resources | first).data + if _federation_existing_ca_bundle.resources | length > 0 + else {} + }} + +# --------------------------------------------------------------------------- +# Step 3a – merge Keycloak CA into the existing / new bundle secret +# --------------------------------------------------------------------------- +- name: Create or update CA bundle secret with Keycloak CA cert + when: _federation_merge_ca | bool + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + state: present + definition: + apiVersion: v1 + kind: Secret + type: Opaque + metadata: + name: "{{ cifmw_federation_ca_bundle_secret_name }}" + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + data: >- + {{ + _federation_ca_bundle_existing_data | + combine({'keycloak-ca.crt': federation_sso_ca.content}) + }} + +# --------------------------------------------------------------------------- +# Step 3b – original path: create a dedicated keycloakca secret +# --------------------------------------------------------------------------- +- name: Add dedicated Keycloak CA secret + when: not (_federation_merge_ca | bool) + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + state: present + definition: + apiVersion: v1 + kind: Secret + type: Opaque + metadata: + name: keycloakca + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + data: + KeyCloakCA: "{{ federation_sso_ca.content }}" + +# --------------------------------------------------------------------------- +# Step 4 – create the kustomization that patches the Keystone OSCP. +# +# Merge mode: only patch httpdCustomization and customServiceConfig. +# caBundleSecretName is already correctly set in the CR. +# +# Dedicated-secret mode: also patch spec.tls.caBundleSecretName (original +# behaviour). +# --------------------------------------------------------------------------- +- name: Create Keystone federation kustomization (merge into existing CA bundle) + when: _federation_merge_ca | bool + ansible.builtin.copy: + dest: "{{ cifmw_manifests_dir }}/kustomizations/controlplane/keystone_federation.yaml" + mode: "0644" + content: |- + apiVersion: kustomize.config.k8s.io/v1beta1 + kind: Kustomization + resources: + - namespace: {{ cifmw_federation_run_osp_cmd_namespace }} + patches: + - target: + kind: OpenStackControlPlane + name: .* + patch: |- + - op: add + path: /spec/keystone/template/httpdCustomization + value: + customConfigSecret: keystone-httpd-override + - op: add + path: /spec/keystone/template/customServiceConfig + value: | + [DEFAULT] + insecure_debug=true + debug=true + [federation] + trusted_dashboard={{ cifmw_federation_horizon_url }}/dashboard/auth/websso/ + [openid] + remote_id_attribute=HTTP_OIDC_ISS + [auth] + methods = password,token,oauth1,mapped,application_credential,openid + +- name: Create Keystone federation kustomization (dedicated keycloakca secret) + when: not (_federation_merge_ca | bool) ansible.builtin.copy: dest: "{{ cifmw_manifests_dir }}/kustomizations/controlplane/keystone_federation.yaml" + mode: "0644" content: |- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization @@ -49,27 +175,10 @@ remote_id_attribute=HTTP_OIDC_ISS [auth] methods = password,token,oauth1,mapped,application_credential,openid - mode: "0644" - -- name: Get ingress operator CA cert - ansible.builtin.slurp: - src: "{{ [ ansible_user_dir, 'ci-framework-data', 'tmp', 'ingress-operator-ca.crt'] | path_join }}" - register: federation_sso_ca - -- name: Add Keycloak CA secret - kubernetes.core.k8s: - kubeconfig: "{{ cifmw_openshift_kubeconfig }}" - state: present - definition: - apiVersion: v1 - kind: Secret - type: Opaque - metadata: - name: keycloakca - namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" - data: - KeyCloakCA: "{{ federation_sso_ca.content }}" +# --------------------------------------------------------------------------- +# Step 5 – create the Keystone httpd override secret (always needed) +# --------------------------------------------------------------------------- - name: Create Keystone httpd override secret for Federation kubernetes.core.k8s: kubeconfig: "{{ cifmw_openshift_kubeconfig }}" From 162634949043ed4ab6496443a7f33ac7c3530435 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 04/16] [federation] Fix broken 'until' condition waiting for RHSSO InstallPlan Two bugs in run_keycloak_setup.yml: 1. The 'until' condition wrapped its expression in {{ }} delimiters, which Ansible forbids in conditionals (causes a parse error). 2. map(attribute='metadata.labels') returns a dict per resource; select('match', ...) cannot regex-match a dict, causing 'dict object has no attribute labels' at runtime. Fix by removing the {{ }} and using dict2items + flatten to extract label keys before applying the regex selector. Signed-off-by: Ade Lee Co-Authored-By: Claude --- roles/federation/tasks/run_keycloak_setup.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/roles/federation/tasks/run_keycloak_setup.yml b/roles/federation/tasks/run_keycloak_setup.yml index 41cd8ef218..56db4e6b04 100644 --- a/roles/federation/tasks/run_keycloak_setup.yml +++ b/roles/federation/tasks/run_keycloak_setup.yml @@ -54,11 +54,13 @@ kind: InstallPlan register: ip_list until: >- - {{ - ip_list.resources | - map(attribute='metadata.labels') | - select('match', '.*rhsso-operator.*') - }} + ip_list.resources | + selectattr('metadata.labels', 'defined') | + map(attribute='metadata.labels') | + map('dict2items') | + flatten | + selectattr('key', 'match', '.*rhsso-operator.*') | + list | length > 0 retries: 30 delay: 40 From 18926eb9630a7b052867776b447db186447d3d30 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 05/16] [federation] Ensure kustomizations/controlplane directory exists before writing The ansible.builtin.copy task that writes keystone_federation.yaml fails if the destination directory does not yet exist. Add an explicit ansible.builtin.file task (state: directory) immediately before the two copy tasks so the directory is created on demand. Signed-off-by: Ade Lee Co-Authored-By: Claude --- roles/federation/tasks/hook_controlplane_config.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index 00eaed6a2a..4948ea7654 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -106,6 +106,12 @@ # Dedicated-secret mode: also patch spec.tls.caBundleSecretName (original # behaviour). # --------------------------------------------------------------------------- +- name: Ensure kustomization controlplane directory exists + ansible.builtin.file: + path: "{{ cifmw_manifests_dir }}/kustomizations/controlplane" + state: directory + mode: "0755" + - name: Create Keystone federation kustomization (merge into existing CA bundle) when: _federation_merge_ca | bool ansible.builtin.copy: From b573e86534cfce88651b73476baf35928ea8081d Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 06/16] [federation] Wait for Keystone Ready before running post-deploy auth test The customServiceConfig patch that adds 'openid' to Keystone's [auth] methods is applied during the control-plane kustomize deploy (stage 5). By the time the leaf control-plane post_stage_run hooks execute (including federation-post-deploy.yml), Keystone may not have finished reconciling with the new config. Domain/IdP/mapping/protocol creation succeed because they use the existing password auth path; only get-token.sh (which authenticates via openid) fails with HTTP 401 'unsupported method'. Add a wait-for-Ready loop on the KeystoneAPI CR at the start of hook_post_deploy.yml (retries=30, delay=20s = up to 10 min) so the auth test only runs once Keystone has restarted with federation configuration active. Signed-off-by: Ade Lee Co-Authored-By: Claude --- roles/federation/tasks/hook_post_deploy.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/roles/federation/tasks/hook_post_deploy.yml b/roles/federation/tasks/hook_post_deploy.yml index 7b49c46330..b6ea2c91df 100644 --- a/roles/federation/tasks/hook_post_deploy.yml +++ b/roles/federation/tasks/hook_post_deploy.yml @@ -14,6 +14,22 @@ # License for the specific language governing permissions and limitations # under the License. +- name: Wait for Keystone to be Ready with federation config applied + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + api_version: keystone.openstack.org/v1beta1 + kind: KeystoneAPI + name: keystone + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + register: _keystone_ready + until: >- + _keystone_ready.resources | length > 0 and + (_keystone_ready.resources[0].status.conditions | default([]) | + selectattr('type', 'equalto', 'Ready') | + selectattr('status', 'equalto', 'True') | list | length > 0) + retries: 30 + delay: 20 + - name: Build realm configurations for single realm OpenStack setup ansible.builtin.set_fact: _federation_openstack_realms_to_process: From 8af7be1b9558b6eab182a08b3d26751e9a6ee607 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 07/16] [federation] Directly patch OSCP with Keystone federation config The kustomizations/controlplane/ directory is only consumed by the edpm_prepare / ci_kustomize flow (CRC/devscripts deployments). In the kustomize_deploy flow used by SKMO (deploy-architecture.sh), nothing reads that directory, so the keystone_federation.yaml file was written but never applied - leaving the OSCP unmodified. Add Step 6 to hook_controlplane_config.yml that: 1. Checks whether the OpenStackControlPlane CR already exists. 2. If so, patches it directly via kubernetes.core.k8s (state: patched) with the httpdCustomization, customServiceConfig (openid methods), and (in dedicated-secret mode) spec.tls.caBundleSecretName. The kustomization file is still written for backward compatibility with deployments that use edpm_prepare (CRC/devscripts flow). The direct patch is a no-op when the OSCP does not yet exist (fresh install with CRC flow), making both paths safe. Signed-off-by: Ade Lee Co-Authored-By: Claude --- .../tasks/hook_controlplane_config.yml | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index 4948ea7654..82c549f8d9 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -198,3 +198,68 @@ type: Opaque stringData: federation.conf: "{{ lookup('template', 'federation-single.conf.j2') }}" + +# --------------------------------------------------------------------------- +# Step 6 – directly patch the OpenStackControlPlane CR. +# +# The kustomization file written in Step 4 is only consumed by the +# edpm_prepare / ci_kustomize flow (CRC/devscripts). In the +# kustomize_deploy flow (SKMO), nothing reads that directory, so we +# also apply the patch directly here. +# +# This task requires the OSCP to already exist, so the hook must be +# called from a post_stage_run (after the control-plane stage has +# applied the CR), not a pre_stage_run. +# --------------------------------------------------------------------------- +- name: Check if OpenStackControlPlane exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + api_version: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + name: controlplane + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + register: _federation_oscp_info + +- name: Patch OpenStackControlPlane with Keystone federation config + when: _federation_oscp_info.resources | length > 0 + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + state: patched + definition: + apiVersion: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + metadata: + name: controlplane + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + spec: + keystone: + template: + httpdCustomization: + customConfigSecret: keystone-httpd-override + customServiceConfig: | + [DEFAULT] + insecure_debug=true + debug=true + [federation] + trusted_dashboard={{ cifmw_federation_horizon_url }}/dashboard/auth/websso/ + [openid] + remote_id_attribute=HTTP_OIDC_ISS + [auth] + methods = password,token,oauth1,mapped,application_credential,openid + +- name: Patch OpenStackControlPlane to set caBundleSecretName (dedicated keycloakca secret mode) + when: + - not (_federation_merge_ca | bool) + - _federation_oscp_info.resources | length > 0 + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + state: patched + definition: + apiVersion: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + metadata: + name: controlplane + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + spec: + tls: + caBundleSecretName: keycloakca From f139f6fa601ff13c003e69dcc76353ddba8dd7ff Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 08/16] [federation] Make OpenStack resource creation idempotent When deploy-architecture.sh is re-run against an existing deployment, the federation domain, identity provider, mapping, group, project and protocol may already exist in Keystone. The plain 'openstack X create' commands fail with HTTP 409 Conflict in that case. Fix by checking for the existence of each resource with 'openstack X show' (failed_when: false, changed_when: false) before attempting to create it. The create task is only run when the show returned rc != 0 (i.e. the resource was not found). Role-add is repeated unconditionally with failed_when: false because the Keystone API makes it idempotent already. Signed-off-by: Ade Lee Co-Authored-By: Claude --- .../federation/tasks/run_openstack_setup.yml | 97 ++++++++++++++++++- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/roles/federation/tasks/run_openstack_setup.yml b/roles/federation/tasks/run_openstack_setup.yml index 07f40baba4..5752d102c2 100644 --- a/roles/federation/tasks/run_openstack_setup.yml +++ b/roles/federation/tasks/run_openstack_setup.yml @@ -21,12 +21,38 @@ mode: "0640" when: cifmw_federation_deploy_type == "crc" +- name: Check if federation domain already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack domain show {{ cifmw_federation_keystone_domain }} -f value -c id + register: _federation_domain_check + failed_when: false + changed_when: false + - name: Run federation create domain + when: _federation_domain_check.rc != 0 vars: _osp_cmd: "openstack domain create {{ cifmw_federation_keystone_domain }}" ansible.builtin.include_tasks: run_osp_cmd.yml +- name: Check if federation identity provider already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack identity provider show {{ cifmw_federation_IdpName }} -f value -c id + register: _federation_idp_check + failed_when: false + changed_when: false + - name: Run federation identity provider create + when: _federation_idp_check.rc != 0 vars: _osp_cmd: "openstack identity provider create --remote-id {{ cifmw_federation_remote_id }} @@ -47,38 +73,99 @@ remote_path: "/home/cloud-admin/{{ cifmw_federation_rules_file }}" local_path: "{{ [ ansible_user_dir, 'ci-framework-data', 'tmp', cifmw_federation_rules_file ] | path_join }}" +- name: Check if federation mapping already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack mapping show {{ cifmw_federation_mapping_name }} -f value -c id + register: _federation_mapping_check + failed_when: false + changed_when: false + - name: Run federation mapping create + when: _federation_mapping_check.rc != 0 vars: _osp_cmd: "openstack mapping create --rules {{ cifmw_federation_rules_file }} {{ cifmw_federation_mapping_name }}" ansible.builtin.include_tasks: run_osp_cmd.yml +- name: Check if federation group already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack group show --domain {{ cifmw_federation_keystone_domain }} + {{ cifmw_federation_group_name }} -f value -c id + register: _federation_group_check + failed_when: false + changed_when: false + - name: Run federation group create + when: _federation_group_check.rc != 0 vars: _osp_cmd: "openstack group create --domain {{ cifmw_federation_keystone_domain }} {{ cifmw_federation_group_name }}" ansible.builtin.include_tasks: run_osp_cmd.yml +- name: Check if federation project already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack project show --domain {{ cifmw_federation_keystone_domain }} + {{ cifmw_federation_project_name }} -f value -c id + register: _federation_project_check + failed_when: false + changed_when: false + - name: Run federation project create + when: _federation_project_check.rc != 0 vars: _osp_cmd: "openstack project create --domain {{ cifmw_federation_keystone_domain }} {{ cifmw_federation_project_name }}" ansible.builtin.include_tasks: run_osp_cmd.yml -- name: Run federation rule add - vars: - _osp_cmd: "openstack role add +- name: Run federation role add (safe to repeat - role add is idempotent) + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack role add --group {{ cifmw_federation_group_name }} --group-domain {{ cifmw_federation_keystone_domain }} --project {{ cifmw_federation_project_name }} --project-domain {{ cifmw_federation_keystone_domain }} - member" - ansible.builtin.include_tasks: run_osp_cmd.yml + member + failed_when: false + changed_when: true + +- name: Check if federation protocol already exists + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + oc exec -n {{ cifmw_federation_run_osp_cmd_namespace }} -t openstackclient -- + openstack federation protocol show openid + --identity-provider {{ cifmw_federation_IdpName }} -f value -c id + register: _federation_protocol_check + failed_when: false + changed_when: false - name: Run federation protocol create + when: _federation_protocol_check.rc != 0 vars: _osp_cmd: "openstack federation protocol create openid --mapping {{ cifmw_federation_mapping_name }} From fc2038233901f06aa5ed6749713a0beb6da95154 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 09/16] [ci_gen_kustomize_values] Fix 3-run death spiral in edpm-nodeset2-values template The edpm-nodeset2-values template derives _vm_type by splitting the first node name from the existing values.yaml (e.g. edpm-compute-0 -> compute). It then uses _vm_type to find matching instances (startswith compute2-). This creates a self-poisoning 3-run death spiral: Run 1: nodes have git placeholder names (edpm-compute-0) -> _vm_type=compute -> finds compute2-* instances -> writes real hostnames (edpm-compute2-XXXXX-0) back to values.yaml Run 2: nodes now have real CI hostnames (edpm-compute2-XXXXX-0) -> _vm_type=compute2 -> searches for compute22-* (does not exist) -> instances_names=[] -> writes nodes: null back to values.yaml Run 3: nodes is null (Python None) -> None | default({}) returns None (default only fires for Undefined) -> None.keys() -> CRASH: None has no attribute keys Fix with two changes: 1. Replace | default({}) with explicit None-safe conditional so that an explicit YAML null does not sneak through as Python None. 2. Strip trailing digits from the derived _vm_type so that after run 1 rewrites node names, compute2 strips back to compute and the instance lookup continues to find compute2-* entries correctly on all subsequent runs. Signed-off-by: Ade Lee Co-Authored-By: Claude --- .../multi-namespace/edpm-nodeset2-values/values.yaml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/ci_gen_kustomize_values/templates/multi-namespace/edpm-nodeset2-values/values.yaml.j2 b/roles/ci_gen_kustomize_values/templates/multi-namespace/edpm-nodeset2-values/values.yaml.j2 index f3316b92ff..492f2f3dce 100644 --- a/roles/ci_gen_kustomize_values/templates/multi-namespace/edpm-nodeset2-values/values.yaml.j2 +++ b/roles/ci_gen_kustomize_values/templates/multi-namespace/edpm-nodeset2-values/values.yaml.j2 @@ -3,9 +3,9 @@ {% set _ipv = cifmw_ci_gen_kustomize_values_ip_version_var_mapping %} {% set instances_names = [] %} {% set _original_nodeset = (original_content.data | default({})).nodeset | default({}) %} -{% set _original_nodes = _original_nodeset.nodes | default({}) %} +{% set _original_nodes = _original_nodeset.nodes if _original_nodeset.nodes else {} %} {% set _original_services = _original_nodeset['services'] | default([]) %} -{% set _vm_type = (_original_nodes.keys() | first).split('-')[1] %} +{% set _vm_type = ((_original_nodes.keys() | first).split('-')[1] | regex_replace('\\d+$', '')) if _original_nodes else 'compute' %} {{ '#vmtype: ' ~ _vm_type }} {% for _inst in cifmw_networking_env_definition.instances.keys() %} {% if _inst.startswith(_vm_type ~ "2-") %} From 7b45647d68351e056fdb7785bdedf11e389b2ed2 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 10/16] [multiple] Uniquify OSDPD names per deployment run An OpenStackDataPlaneDeployment (OSDPD) is an immutable record of a single deployment run. Once its Status.Deployed is true, the operator short-circuits reconciliation with "Already deployed" and will never re-run jobs, even if the referenced nodesets have since been updated with new content (e.g. new SSH keys, new node config). When ci-framework re-applies a deployment stage with oc apply and the OSDPD already exists from a previous run, the operator ignores it. Meanwhile the nodeset operator resets DeploymentReady=False because it detects that the nodeset"s generation has advanced since the last deployment. This produces a permanent deadlock: the nodeset waits for a deployment that will never run, and the wait condition times out after 60 minutes. The correct model is: one OSDPD per deployment *run*, not per nodeset. Fix by auto-generating a timestamp suffix (YYYYMMDDHHMMSS) once at the start of the first deployment stage and appending it to the name of every OpenStackDataPlaneDeployment resource found in the kustomize build output before applying it. The suffix is stable within a single ansible run (so both edpm-deployment and edpm-deployment2 share the same suffix) but differs across runs, producing names like: edpm-deployment-20260313215236 edpm-deployment-20260314093012 Old OSDPDs are left in place as an audit trail of past runs. The operator only acts on the new CR, so the deadlock cannot occur. The suffix can be pinned by setting cifmw_kustomize_deploy_osdpd_suffix explicitly (useful for idempotent re-runs of the same logical deployment). Leave it empty (the default) for automatic timestamp generation. Signed-off-by: Ade Lee Co-Authored-By: Claude Made-with: Cursor --- roles/kustomize_deploy/defaults/main.yml | 11 ++++++ roles/kustomize_deploy/tasks/execute_step.yml | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/roles/kustomize_deploy/defaults/main.yml b/roles/kustomize_deploy/defaults/main.yml index 5236fe3467..2565d04bc6 100644 --- a/roles/kustomize_deploy/defaults/main.yml +++ b/roles/kustomize_deploy/defaults/main.yml @@ -218,6 +218,17 @@ cifmw_kustomize_deploy_dp_dest_file: >- # timeouts and retry configuration + +# Suffix appended to OpenStackDataPlaneDeployment resource names when applying +# a deployment stage. Each run produces a uniquely named OSDPD, preventing the +# "Already deployed" deadlock that occurs when an existing OSDPD with +# Status.Deployed=true is re-applied on subsequent runs. +# When empty (the default), a timestamp is auto-generated once at the start of +# the first deployment stage and reused for all subsequent stages in the same +# run, so all OSDPDs in a given run share the same suffix. +# Set explicitly to pin a known value (e.g. for idempotent re-runs). +cifmw_kustomize_deploy_osdpd_suffix: "" + cifmw_kustomize_deploy_delay: 10 cifmw_kustomize_deploy_retries_subscription: 90 cifmw_kustomize_deploy_retries_install_plan: 60 diff --git a/roles/kustomize_deploy/tasks/execute_step.yml b/roles/kustomize_deploy/tasks/execute_step.yml index 76bd5b82bf..98874ff6c9 100644 --- a/roles/kustomize_deploy/tasks/execute_step.yml +++ b/roles/kustomize_deploy/tasks/execute_step.yml @@ -93,6 +93,16 @@ - _tag_name not in _skip_tags - _tag_name_id not in _skip_tags block: + - name: Generate OSDPD run suffix (once per play) + when: _cifmw_kustomize_deploy_run_suffix is not defined + ansible.builtin.set_fact: + _cifmw_kustomize_deploy_run_suffix: >- + {{ + cifmw_kustomize_deploy_osdpd_suffix + if (cifmw_kustomize_deploy_osdpd_suffix | default('') | length > 0) + else (lookup('pipe', 'date +%Y%m%d%H%M%S')) + }} + - name: Ensure source files exists register: _src when: @@ -241,6 +251,30 @@ content: "{{ _kustomize_output.stdout }}" mode: "0644" + + - name: "Uniquify OpenStackDataPlaneDeployment names in {{ stage.path }}" + when: _cifmw_kustomize_deploy_run_suffix | default('') | length > 0 + changed_when: "'Renamed:' in _rename_osdpd.stdout" + register: _rename_osdpd + ansible.builtin.shell: + executable: /bin/bash + cmd: | + python3 - << 'PYEOF' + import yaml, sys + path = "{{ _output }}" + suffix = "{{ _cifmw_kustomize_deploy_run_suffix }}" + with open(path) as f: + docs = [d for d in yaml.safe_load_all(f) if d is not None] + for doc in docs: + if doc.get('kind') == 'OpenStackDataPlaneDeployment': + name = doc['metadata']['name'] + if not name.endswith('-' + suffix): + doc['metadata']['name'] = name + '-' + suffix + print('Renamed: ' + name + ' -> ' + doc['metadata']['name']) + with open(path, 'w') as f: + yaml.dump_all(docs, f, default_flow_style=False) + PYEOF + - name: "Store kustomized content in artifacts for {{ stage.path }}" ansible.builtin.copy: remote_src: true From 48d4dc67194a64b895540f2a449079601ef70311 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 11/16] [multiple] Fix MCO stuck-uncordon deadlock MachineConfigs applied during devscripts install trigger an MCO update cycle that runs asynchronously after the cluster becomes reachable. On compact 3-master clusters the MCO controller can enter a permanent deadlock: all nodes reboot, apply the new config, and report state=Done with desiredDrain=lastAppliedDrain=uncordon-*, but the controller never issues the final kubectl uncordon. This leaves all nodes SchedulingDisabled indefinitely, causing every subsequent cluster operator to degrade and the deployment to time out. Add a retry loop in wait_for_cluster.yml (run as part of the openshift_adm 'stable' operation after devscripts post-install) that: - Polls MachineConfigPool status every 30 s for up to 30 minutes. - If a pool is updating normally (nodes being drained/rebooted in sequence) it waits without interrupting the MCO mid-cycle. - If it detects the stuck state (updatedMachineCount == machineCount but readyMachineCount == 0) it runs 'oc adm uncordon' on all nodes to break the deadlock, then continues polling. - Only proceeds to 'oc adm wait-for-stable-cluster' once all pools report Updated=True. Signed-off-by: Ade Lee Co-Authored-By: Claude --- .../openshift_adm/tasks/wait_for_cluster.yml | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/roles/openshift_adm/tasks/wait_for_cluster.yml b/roles/openshift_adm/tasks/wait_for_cluster.yml index 8fd5838dde..6695a0ae9d 100644 --- a/roles/openshift_adm/tasks/wait_for_cluster.yml +++ b/roles/openshift_adm/tasks/wait_for_cluster.yml @@ -50,6 +50,73 @@ retries: "{{ cifmw_openshift_adm_retry_count }}" delay: 30 +# MachineConfigs applied during devscripts install (e.g. iSCSI, Cinder LVM) +# trigger an MCO update cycle that continues asynchronously after the cluster +# is first reachable. On compact (3-master) clusters the MCO controller can +# get stuck: all nodes reboot and report state=Done / desiredDrain=uncordon-*, +# but the controller never issues the final kubectl-uncordon, leaving every +# node SchedulingDisabled indefinitely. We handle this with a loop that: +# 1. Waits until no MCP is mid-update (unavailableMachineCount drops to 0) +# OR detects the stuck state (all updated, none ready). +# 2. If stuck, uncordons all nodes to break the deadlock. +# 3. Repeats until all MCPs report Updated=True. +- name: Wait for MachineConfigPools to complete, fixing stuck cordons if needed. + when: + - not cifmw_openshift_adm_dry_run + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.shell: | + set -eo pipefail + MCP_JSON=$(oc get mcp -o json) + + UPDATING=$(echo "$MCP_JSON" | \ + python3 -c " + import json, sys + data = json.load(sys.stdin) + updating = [ + i['metadata']['name'] for i in data['items'] + if next((c['status'] for c in i['status'].get('conditions', []) + if c['type'] == 'Updating'), 'False') == 'True' + ] + print('\n'.join(updating)) + ") + + if [ -z "$UPDATING" ]; then + echo "All MCPs are up to date." + exit 0 + fi + + # At least one MCP is still Updating. Check for the stuck-uncordon case: + # updatedMachineCount == machineCount but readyMachineCount == 0. + STUCK=$(echo "$MCP_JSON" | \ + python3 -c " + import json, sys + data = json.load(sys.stdin) + stuck = [ + i['metadata']['name'] for i in data['items'] + if (i['status'].get('updatedMachineCount', 0) == + i['status'].get('machineCount', 0) and + i['status'].get('readyMachineCount', 0) == 0 and + i['status'].get('machineCount', 0) > 0) + ] + print('\n'.join(stuck)) + ") + + if [ -n "$STUCK" ]; then + echo "Stuck MCPs detected: $STUCK -- uncordoning all nodes to break deadlock." + oc adm uncordon $(oc get nodes -o jsonpath='{.items[*].metadata.name}') + else + echo "MCPs still updating (normal progress): $UPDATING" + fi + exit 1 + register: _mcp_wait + until: _mcp_wait.rc == 0 + retries: 60 + delay: 30 + changed_when: "'uncordoning' in _mcp_wait.stdout" + failed_when: false + - name: Check for pending certificate approval. when: - _openshift_adm_check_cert_approve | default(false) | bool From f4a6aef6857488173eb5d76c46930e9006fa8d80 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 12/16] [multiple] Fix SKMO federation CA bundle handling and SSL trust Refactor how the CA bundle secret is managed across federation hooks to avoid relying on kustomize timing and make the logic self-healing: - federation/hook_controlplane_config.yml: Dynamically resolve the CA bundle secret name by reading the live OSCP state (using the existing caBundleSecretName if set, falling back to cifmw_custom_ca_certs_secret_name or 'custom-ca-certs'). Create or update the secret with the Keycloak CA, and patch the OSCP to set caBundleSecretName only when it is not yet set. - federation/run_openstack_auth_setup.yml: Build the full CA list used for auth testing by fetching the openstackclient pod's own system CA bundle as the base (which already trusts RHOSO internal CAs), then appending the ingress-operator CA. This avoids trust mismatches between controller-0 and the pod. - federation/defaults/main.yml: Rename cifmw_federation_ca_bundle_secret_name to cifmw_custom_ca_certs_secret_name to reflect that the variable is not federation-specific. - hooks/playbooks/skmo/update-central-ca-bundle.yaml: Merge the two stage-6 post-deploy playbooks (trust-leaf-ca.yaml and ensure-central-ca-bundle.yaml) into a single idempotent playbook that resolves the secret name dynamically, creates or updates the bundle with leaf region root CAs, patches the OSCP when caBundleSecretName is unset, and waits for the leaf CA fingerprint to appear in combined-ca-bundle before continuing. - kustomize_deploy/execute_step.yml: Add | string filters to OSDPD suffix handling so that YAML integer interpretation does not cause a TypeError when the timestamp suffix is checked or concatenated. Signed-off-by: Ade Lee Co-Authored-By: Claude --- .../skmo/ensure-central-ca-bundle.yaml | 28 --- hooks/playbooks/skmo/trust-leaf-ca.yaml | 51 ------ .../skmo/update-central-ca-bundle.yaml | 162 ++++++++++++++++++ roles/federation/defaults/main.yml | 2 +- .../tasks/hook_controlplane_config.yml | 162 ++++++------------ .../tasks/run_openstack_auth_setup.yml | 17 +- roles/kustomize_deploy/tasks/execute_step.yml | 8 +- 7 files changed, 235 insertions(+), 195 deletions(-) delete mode 100644 hooks/playbooks/skmo/ensure-central-ca-bundle.yaml delete mode 100644 hooks/playbooks/skmo/trust-leaf-ca.yaml create mode 100644 hooks/playbooks/skmo/update-central-ca-bundle.yaml diff --git a/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml b/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml deleted file mode 100644 index a37bccb458..0000000000 --- a/hooks/playbooks/skmo/ensure-central-ca-bundle.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -- name: Ensure central control plane uses custom CA bundle - hosts: localhost - gather_facts: false - vars: - central_namespace: openstack - controlplane_name: controlplane - ca_bundle_secret_name: custom-ca-certs - tasks: - - name: Check current caBundleSecretName - ansible.builtin.shell: | - set -euo pipefail - oc -n {{ central_namespace }} get osctlplane {{ controlplane_name }} \ - -o jsonpath='{.spec.tls.caBundleSecretName}' - args: - executable: /bin/bash - register: ca_bundle_name - changed_when: false - failed_when: false - - - name: Patch control plane to use custom CA bundle when unset - ansible.builtin.shell: | - set -euo pipefail - oc -n {{ central_namespace }} patch osctlplane {{ controlplane_name }} \ - --type json -p '[{"op":"add","path":"/spec/tls","value":{}},{"op":"add","path":"/spec/tls/caBundleSecretName","value":"{{ ca_bundle_secret_name }}"}]' - args: - executable: /bin/bash - when: ca_bundle_name.stdout | trim == "" diff --git a/hooks/playbooks/skmo/trust-leaf-ca.yaml b/hooks/playbooks/skmo/trust-leaf-ca.yaml deleted file mode 100644 index 5a7b001e0d..0000000000 --- a/hooks/playbooks/skmo/trust-leaf-ca.yaml +++ /dev/null @@ -1,51 +0,0 @@ ---- -- name: Trust SKMO leaf CA in central region - hosts: localhost - gather_facts: false - vars: - skmo_values_file: "{{ cifmw_architecture_repo }}/examples/va/multi-namespace-skmo/control-plane2/skmo-values.yaml" - central_namespace: openstack - leaf_namespace: openstack2 - leaf_rootca_secret: rootca-public - leaf_rootca_internal_secret: rootca-internal - tasks: - - name: Load SKMO values - ansible.builtin.set_fact: - skmo_values: "{{ lookup('file', skmo_values_file) | from_yaml }}" - - - name: Set central CA bundle secret name - ansible.builtin.set_fact: - central_ca_bundle_secret_name: "{{ skmo_values.data.centralCaBundleSecretName }}" - - - name: Get existing central CA bundle if present - kubernetes.core.k8s_info: - api_version: v1 - kind: Secret - namespace: "{{ central_namespace }}" - name: "{{ central_ca_bundle_secret_name }}" - register: _existing_bundle - - - name: Get leaf rootca certs - kubernetes.core.k8s_info: - api_version: v1 - kind: Secret - namespace: "{{ leaf_namespace }}" - name: "{{ item }}" - register: _leaf_certs - loop: - - "{{ leaf_rootca_secret }}" - - "{{ leaf_rootca_internal_secret }}" - - - name: Create or update central CA bundle secret - kubernetes.core.k8s: - state: present - definition: - apiVersion: v1 - kind: Secret - metadata: - name: "{{ central_ca_bundle_secret_name }}" - namespace: "{{ central_namespace }}" - data: "{{ (_existing_bundle.resources[0].data | default({})) | combine({ - 'skmo-leaf-rootca.crt': _leaf_certs.results[0].resources[0].data['tls.crt'], - 'skmo-leaf-rootca-internal.crt': _leaf_certs.results[1].resources[0].data['tls.crt'] - }) }}" diff --git a/hooks/playbooks/skmo/update-central-ca-bundle.yaml b/hooks/playbooks/skmo/update-central-ca-bundle.yaml new file mode 100644 index 0000000000..fe0bced2db --- /dev/null +++ b/hooks/playbooks/skmo/update-central-ca-bundle.yaml @@ -0,0 +1,162 @@ +--- +- name: Update central CA bundle with leaf region CAs and wait for reconciliation + hosts: localhost + gather_facts: false + vars: + central_namespace: openstack + leaf_namespace: openstack2 + controlplane_name: controlplane + leaf_rootca_secret: rootca-public + leaf_rootca_internal_secret: rootca-internal + tasks: + # ------------------------------------------------------------------------- + # Step 1 - determine which secret holds the central CA bundle. + # + # Priority: + # 1. spec.tls.caBundleSecretName already set on the OSCP. + # 2. cifmw_custom_ca_certs_secret_name variable (if set by caller). + # 3. Hard default: "custom-ca-certs". + # ------------------------------------------------------------------------- + - name: Read current OpenStackControlPlane state + kubernetes.core.k8s_info: + api_version: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + name: "{{ controlplane_name }}" + namespace: "{{ central_namespace }}" + register: _central_oscp_info + + - name: Resolve CA bundle secret name + ansible.builtin.set_fact: + _ca_bundle_secret_name: >- + {{ + ((_central_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName + | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs')) + | default('custom-ca-certs') + }} + _oscp_has_ca_bundle: >- + {{ + ( + ((_central_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName + | default('') + ) | length > 0 + }} + + # ------------------------------------------------------------------------- + # Step 2 - fetch the leaf region CA certs + # ------------------------------------------------------------------------- + - name: Get leaf region rootca certs + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ leaf_namespace }}" + name: "{{ item }}" + register: _leaf_certs + loop: + - "{{ leaf_rootca_secret }}" + - "{{ leaf_rootca_internal_secret }}" + + # ------------------------------------------------------------------------- + # Step 3 - get existing central CA bundle data (if secret already exists) + # ------------------------------------------------------------------------- + - name: Look up existing central CA bundle secret + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ central_namespace }}" + name: "{{ _ca_bundle_secret_name }}" + register: _existing_bundle + + - name: Capture existing CA bundle secret data + ansible.builtin.set_fact: + _existing_bundle_data: >- + {{ + (_existing_bundle.resources | first).data + if _existing_bundle.resources | length > 0 + else {} + }} + + # ------------------------------------------------------------------------- + # Step 4 - create or update the secret, merging in the leaf CAs + # ------------------------------------------------------------------------- + - name: Create or update central CA bundle secret with leaf region CAs + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ _ca_bundle_secret_name }}" + namespace: "{{ central_namespace }}" + data: >- + {{ + _existing_bundle_data | combine({ + 'skmo-leaf-rootca.crt': + _leaf_certs.results[0].resources[0].data['tls.crt'], + 'skmo-leaf-rootca-internal.crt': + _leaf_certs.results[1].resources[0].data['tls.crt'] + }) + }} + + # ------------------------------------------------------------------------- + # Step 5 - patch the OSCP to reference the secret when not already set + # ------------------------------------------------------------------------- + - name: Patch OpenStackControlPlane to set caBundleSecretName (when unset) + when: not _oscp_has_ca_bundle | bool + kubernetes.core.k8s: + state: patched + definition: + apiVersion: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + metadata: + name: "{{ controlplane_name }}" + namespace: "{{ central_namespace }}" + spec: + tls: + caBundleSecretName: "{{ _ca_bundle_secret_name }}" + + # ------------------------------------------------------------------------- + # Step 6 - wait for RHOSO to reconcile combined-ca-bundle. + # + # We compare the fingerprint of the leaf rootca cert we just added against + # every cert in combined-ca-bundle, retrying until it appears. + # ------------------------------------------------------------------------- + - name: Wait for leaf region CA to appear in combined-ca-bundle + ansible.builtin.shell: | + set -euo pipefail + TMPDIR=$(mktemp -d) + trap "rm -rf $TMPDIR" EXIT + + echo "{{ _leaf_certs.results[0].resources[0].data['tls.crt'] }}" | \ + base64 -d > "$TMPDIR/leaf-ca.crt" + FINGERPRINT=$(openssl x509 -noout -fingerprint -in "$TMPDIR/leaf-ca.crt" \ + | cut -d= -f2) + + oc get secret combined-ca-bundle \ + -n {{ central_namespace }} \ + -o jsonpath='{.data.tls-ca-bundle\.pem}' \ + | base64 -d > "$TMPDIR/bundle.pem" + + python3 - "$FINGERPRINT" "$TMPDIR/bundle.pem" <<'PYEOF' + import sys, subprocess, re + target, bundle_file = sys.argv[1], sys.argv[2] + bundle = open(bundle_file).read() + certs = re.findall( + r'-----BEGIN CERTIFICATE-----.*?-----END CERTIFICATE-----', + bundle, re.DOTALL + ) + for cert in certs: + r = subprocess.run( + ['openssl', 'x509', '-noout', '-fingerprint'], + input=cert.encode(), capture_output=True + ) + if target in r.stdout.decode(): + sys.exit(0) + sys.exit(1) + PYEOF + args: + executable: /bin/bash + register: _ca_reconciled + until: _ca_reconciled.rc == 0 + retries: 30 + delay: 10 + changed_when: false diff --git a/roles/federation/defaults/main.yml b/roles/federation/defaults/main.yml index bda9968883..c691046fa8 100644 --- a/roles/federation/defaults/main.yml +++ b/roles/federation/defaults/main.yml @@ -53,7 +53,7 @@ cifmw_federation_deploy_multirealm: false # When left empty (the default) the original behaviour is preserved: a dedicated # 'keycloakca' secret is created and the kustomization patch sets # spec.tls.caBundleSecretName to 'keycloakca'. -cifmw_federation_ca_bundle_secret_name: "" +cifmw_custom_ca_certs_secret_name: "" # ============================================================================= # KEYCLOAK TEST USERS AND GROUPS - REALM 1 diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index 82c549f8d9..4d8bb104a0 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -15,7 +15,7 @@ # under the License. # --------------------------------------------------------------------------- -# Step 1 – read the Keycloak CA cert written by federation-pre-deploy +# Step 1 - read the Keycloak CA cert written by federation-pre-deploy # --------------------------------------------------------------------------- - name: Get ingress operator CA cert ansible.builtin.slurp: @@ -23,33 +23,55 @@ register: federation_sso_ca # --------------------------------------------------------------------------- -# Step 2 – determine CA handling mode +# Step 2 - read the live OSCP to determine where the CA bundle lives. # -# When cifmw_federation_ca_bundle_secret_name is set we merge the Keycloak CA -# into that secret (creating it if absent). The kustomization patch then -# does NOT touch spec.tls.caBundleSecretName because the control plane CR is -# assumed to already reference the correct secret (e.g. custom-ca-certs). +# Priority for the secret name: +# 1. spec.tls.caBundleSecretName already set on the OSCP (use it as-is). +# 2. cifmw_custom_ca_certs_secret_name variable (if set by caller). +# 3. Hard default: "custom-ca-certs". # -# When cifmw_federation_ca_bundle_secret_name is empty we fall back to the -# original behaviour: create a dedicated 'keycloakca' secret and patch -# spec.tls.caBundleSecretName to point at it. +# This makes the hook self-healing: it does not rely on the kustomize having +# correctly propagated caBundleSecretName, and it works on fresh installs +# where the secret does not yet exist. # --------------------------------------------------------------------------- -- name: Set federation CA merge mode flag +- name: Read current OpenStackControlPlane state + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + api_version: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + name: controlplane + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + register: _federation_oscp_info + +- name: Resolve CA bundle secret name and check if OSCP already references one ansible.builtin.set_fact: - _federation_merge_ca: "{{ cifmw_federation_ca_bundle_secret_name | length > 0 }}" + _federation_ca_bundle_secret_name: >- + {{ + ((_federation_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName + | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs')) + | default('custom-ca-certs') + }} + _federation_oscp_has_ca_bundle: >- + {{ + ( + ((_federation_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName + | default('') + ) | length > 0 + }} +# --------------------------------------------------------------------------- +# Step 3 - preserve any keys already in the target secret +# --------------------------------------------------------------------------- - name: Look up existing CA bundle secret - when: _federation_merge_ca | bool kubernetes.core.k8s_info: kubeconfig: "{{ cifmw_openshift_kubeconfig }}" api_version: v1 kind: Secret - name: "{{ cifmw_federation_ca_bundle_secret_name }}" + name: "{{ _federation_ca_bundle_secret_name }}" namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" register: _federation_existing_ca_bundle - name: Capture existing CA bundle secret data - when: _federation_merge_ca | bool ansible.builtin.set_fact: _federation_ca_bundle_existing_data: >- {{ @@ -59,10 +81,9 @@ }} # --------------------------------------------------------------------------- -# Step 3a – merge Keycloak CA into the existing / new bundle secret +# Step 4 - create / update the secret, adding keycloak-ca.crt # --------------------------------------------------------------------------- - name: Create or update CA bundle secret with Keycloak CA cert - when: _federation_merge_ca | bool kubernetes.core.k8s: kubeconfig: "{{ cifmw_openshift_kubeconfig }}" state: present @@ -71,7 +92,7 @@ kind: Secret type: Opaque metadata: - name: "{{ cifmw_federation_ca_bundle_secret_name }}" + name: "{{ _federation_ca_bundle_secret_name }}" namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" data: >- {{ @@ -80,31 +101,25 @@ }} # --------------------------------------------------------------------------- -# Step 3b – original path: create a dedicated keycloakca secret +# Step 5 - patch the OSCP to reference the secret when not already set # --------------------------------------------------------------------------- -- name: Add dedicated Keycloak CA secret - when: not (_federation_merge_ca | bool) +- name: Patch OpenStackControlPlane to set caBundleSecretName (when unset) + when: not _federation_oscp_has_ca_bundle | bool kubernetes.core.k8s: kubeconfig: "{{ cifmw_openshift_kubeconfig }}" - state: present + state: patched definition: - apiVersion: v1 - kind: Secret - type: Opaque + apiVersion: core.openstack.org/v1beta1 + kind: OpenStackControlPlane metadata: - name: keycloakca + name: controlplane namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" - data: - KeyCloakCA: "{{ federation_sso_ca.content }}" + spec: + tls: + caBundleSecretName: "{{ _federation_ca_bundle_secret_name }}" # --------------------------------------------------------------------------- -# Step 4 – create the kustomization that patches the Keystone OSCP. -# -# Merge mode: only patch httpdCustomization and customServiceConfig. -# caBundleSecretName is already correctly set in the CR. -# -# Dedicated-secret mode: also patch spec.tls.caBundleSecretName (original -# behaviour). +# Step 6 - kustomization for CRC/devscripts flow (not consumed by kustomize_deploy) # --------------------------------------------------------------------------- - name: Ensure kustomization controlplane directory exists ansible.builtin.file: @@ -112,8 +127,7 @@ state: directory mode: "0755" -- name: Create Keystone federation kustomization (merge into existing CA bundle) - when: _federation_merge_ca | bool +- name: Create Keystone federation kustomization ansible.builtin.copy: dest: "{{ cifmw_manifests_dir }}/kustomizations/controlplane/keystone_federation.yaml" mode: "0644" @@ -127,44 +141,9 @@ kind: OpenStackControlPlane name: .* patch: |- - - op: add - path: /spec/keystone/template/httpdCustomization - value: - customConfigSecret: keystone-httpd-override - - op: add - path: /spec/keystone/template/customServiceConfig - value: | - [DEFAULT] - insecure_debug=true - debug=true - [federation] - trusted_dashboard={{ cifmw_federation_horizon_url }}/dashboard/auth/websso/ - [openid] - remote_id_attribute=HTTP_OIDC_ISS - [auth] - methods = password,token,oauth1,mapped,application_credential,openid - -- name: Create Keystone federation kustomization (dedicated keycloakca secret) - when: not (_federation_merge_ca | bool) - ansible.builtin.copy: - dest: "{{ cifmw_manifests_dir }}/kustomizations/controlplane/keystone_federation.yaml" - mode: "0644" - content: |- - apiVersion: kustomize.config.k8s.io/v1beta1 - kind: Kustomization - resources: - - namespace: {{ cifmw_federation_run_osp_cmd_namespace }} - patches: - - target: - kind: OpenStackControlPlane - name: .* - patch: |- - - op: add - path: /spec/tls - value: {} - op: add path: /spec/tls/caBundleSecretName - value: keycloakca + value: {{ _federation_ca_bundle_secret_name }} - op: add path: /spec/keystone/template/httpdCustomization value: @@ -183,7 +162,7 @@ methods = password,token,oauth1,mapped,application_credential,openid # --------------------------------------------------------------------------- -# Step 5 – create the Keystone httpd override secret (always needed) +# Step 7 - Keystone httpd override secret (always needed) # --------------------------------------------------------------------------- - name: Create Keystone httpd override secret for Federation kubernetes.core.k8s: @@ -200,26 +179,8 @@ federation.conf: "{{ lookup('template', 'federation-single.conf.j2') }}" # --------------------------------------------------------------------------- -# Step 6 – directly patch the OpenStackControlPlane CR. -# -# The kustomization file written in Step 4 is only consumed by the -# edpm_prepare / ci_kustomize flow (CRC/devscripts). In the -# kustomize_deploy flow (SKMO), nothing reads that directory, so we -# also apply the patch directly here. -# -# This task requires the OSCP to already exist, so the hook must be -# called from a post_stage_run (after the control-plane stage has -# applied the CR), not a pre_stage_run. +# Step 8 - patch the OSCP for Keystone OIDC settings (kustomize_deploy flow) # --------------------------------------------------------------------------- -- name: Check if OpenStackControlPlane exists - kubernetes.core.k8s_info: - kubeconfig: "{{ cifmw_openshift_kubeconfig }}" - api_version: core.openstack.org/v1beta1 - kind: OpenStackControlPlane - name: controlplane - namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" - register: _federation_oscp_info - - name: Patch OpenStackControlPlane with Keystone federation config when: _federation_oscp_info.resources | length > 0 kubernetes.core.k8s: @@ -246,20 +207,3 @@ remote_id_attribute=HTTP_OIDC_ISS [auth] methods = password,token,oauth1,mapped,application_credential,openid - -- name: Patch OpenStackControlPlane to set caBundleSecretName (dedicated keycloakca secret mode) - when: - - not (_federation_merge_ca | bool) - - _federation_oscp_info.resources | length > 0 - kubernetes.core.k8s: - kubeconfig: "{{ cifmw_openshift_kubeconfig }}" - state: patched - definition: - apiVersion: core.openstack.org/v1beta1 - kind: OpenStackControlPlane - metadata: - name: controlplane - namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" - spec: - tls: - caBundleSecretName: keycloakca diff --git a/roles/federation/tasks/run_openstack_auth_setup.yml b/roles/federation/tasks/run_openstack_auth_setup.yml index 55c2a30ce1..558558206a 100644 --- a/roles/federation/tasks/run_openstack_auth_setup.yml +++ b/roles/federation/tasks/run_openstack_auth_setup.yml @@ -53,9 +53,21 @@ remote_path: "/home/cloud-admin/{{ cifmw_federation_keycloak_testuser2_username }}" local_path: "{{ [ ansible_user_dir, 'ci-framework-data', 'tmp', cifmw_federation_keycloak_testuser2_username ] | path_join }}" -- name: Copy system CA bundle +# Build full-ca-list.crt starting from the openstackclient pod's own system CA +# bundle (which already trusts all RHOSO/OCP internal CAs including rootca-public), +# then append the ingress-operator CA so that Keycloak — accessed via the OCP +# ingress route — is also trusted. Using the pod's bundle as the base avoids +# any mismatch between what controller-0 trusts and what the pod trusts. +- name: Fetch system CA bundle from openstackclient pod + kubernetes.core.k8s_exec: + namespace: "{{ cifmw_federation_run_osp_cmd_namespace }}" + pod: openstackclient + command: cat /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem + register: _pod_ca_bundle + +- name: Write pod CA bundle locally as base for full-ca-list.crt ansible.builtin.copy: - src: "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem" + content: "{{ _pod_ca_bundle.stdout }}" dest: "{{ [ ansible_user_dir, 'ci-framework-data', 'tmp', 'full-ca-list.crt' ] | path_join }}" mode: "0444" @@ -67,6 +79,7 @@ - name: Add ingress operator CA to bundle ansible.builtin.blockinfile: path: "{{ [ ansible_user_dir, 'ci-framework-data', 'tmp', 'full-ca-list.crt' ] | path_join }}" + marker: "# {mark} ingress-operator CA" block: "{{ federation_sso_ca.content | b64decode }}" - name: Copy CA bundle to openstackclient pod diff --git a/roles/kustomize_deploy/tasks/execute_step.yml b/roles/kustomize_deploy/tasks/execute_step.yml index 98874ff6c9..10c0197b37 100644 --- a/roles/kustomize_deploy/tasks/execute_step.yml +++ b/roles/kustomize_deploy/tasks/execute_step.yml @@ -98,9 +98,9 @@ ansible.builtin.set_fact: _cifmw_kustomize_deploy_run_suffix: >- {{ - cifmw_kustomize_deploy_osdpd_suffix - if (cifmw_kustomize_deploy_osdpd_suffix | default('') | length > 0) - else (lookup('pipe', 'date +%Y%m%d%H%M%S')) + cifmw_kustomize_deploy_osdpd_suffix | string + if (cifmw_kustomize_deploy_osdpd_suffix | default('') | string | length > 0) + else (lookup('pipe', 'date +%Y%m%d%H%M%S') | string) }} - name: Ensure source files exists @@ -253,7 +253,7 @@ - name: "Uniquify OpenStackDataPlaneDeployment names in {{ stage.path }}" - when: _cifmw_kustomize_deploy_run_suffix | default('') | length > 0 + when: _cifmw_kustomize_deploy_run_suffix | default('') | string | length > 0 changed_when: "'Renamed:' in _rename_osdpd.stdout" register: _rename_osdpd ansible.builtin.shell: From 587f9e597521ee794bc23f53b25e1a6da39a4a8f Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 13/16] [federation] Fix CA bundle secret name resolving to empty string Ansible's default() filter (without boolean=True) only substitutes Undefined values, not empty strings. cifmw_custom_ca_certs_secret_name is defined as "" in defaults/main.yml, so: | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs')) evaluated the inner default() to "" (defined, not undefined), and the outer default() then received "" instead of Undefined, leaving the secret name empty and causing the kubernetes.core.k8s task to fail with "metadata.name: Required value". Fix by passing true as the second argument to both default() calls so that falsy values (including empty strings) are also replaced. Affects hook_controlplane_config.yml and update-central-ca-bundle.yaml. Made-with: Cursor Signed-off-by: Ade Lee Co-Authored-By: Claude --- hooks/playbooks/skmo/update-central-ca-bundle.yaml | 4 ++-- roles/federation/tasks/hook_controlplane_config.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hooks/playbooks/skmo/update-central-ca-bundle.yaml b/hooks/playbooks/skmo/update-central-ca-bundle.yaml index fe0bced2db..1b57b3c38b 100644 --- a/hooks/playbooks/skmo/update-central-ca-bundle.yaml +++ b/hooks/playbooks/skmo/update-central-ca-bundle.yaml @@ -30,8 +30,8 @@ _ca_bundle_secret_name: >- {{ ((_central_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName - | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs')) - | default('custom-ca-certs') + | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs', true), true) + | default('custom-ca-certs', true) }} _oscp_has_ca_bundle: >- {{ diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index 4d8bb104a0..41bfc533c0 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -48,8 +48,8 @@ _federation_ca_bundle_secret_name: >- {{ ((_federation_oscp_info.resources | first).spec.tls | default({})).caBundleSecretName - | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs')) - | default('custom-ca-certs') + | default(cifmw_custom_ca_certs_secret_name | default('custom-ca-certs', true), true) + | default('custom-ca-certs', true) }} _federation_oscp_has_ca_bundle: >- {{ From 12f273400c2478f707562b3931ca263303636b03 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 01:27:36 +0000 Subject: [PATCH 14/16] [multiple] Handle already-unlocked state in usroverlay task rpm-ostree usroverlay returns exit code 1 with the message "Deployment is already in unlocked state: development" when the CoreOS node is already in the unlocked overlay state from a previous run. This caused the pcp_metrics hook to abort the entire deployment on re-runs without a full node reboot. Register the result and only treat non-zero exit codes as failures when the stderr does not contain the "already in unlocked state" message, making the task idempotent across multiple deploy attempts. Signed-off-by: Ade Lee Co-Authored-By: Claude --- roles/pcp_metrics/tasks/coreos.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/pcp_metrics/tasks/coreos.yaml b/roles/pcp_metrics/tasks/coreos.yaml index c60b8a733a..2d99c35a33 100644 --- a/roles/pcp_metrics/tasks/coreos.yaml +++ b/roles/pcp_metrics/tasks/coreos.yaml @@ -15,6 +15,12 @@ become: true ansible.builtin.command: cmd: rpm-ostree usroverlay + register: _pcp_usroverlay + changed_when: _pcp_usroverlay.rc == 0 + failed_when: + - _pcp_usroverlay.rc != 0 + - >- + "already in unlocked state" not in _pcp_usroverlay.stderr - name: Create required directory become: true From 6767ae66a0b65485ffbfc623147f5cc9b7369d8c Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Wed, 25 Mar 2026 21:17:23 +0000 Subject: [PATCH 15/16] [multiple] Refactor inline Python/shell patterns to cleaner alternatives Replace python3 -c JSON parsing in wait_for_cluster.yml with jq expressions. Move the inline python3 heredoc for OSDPD renaming in execute_step.yml to a standalone script (roles/kustomize_deploy/files/uniquify_osdpd.py) invoked via ansible.builtin.script. Replace the shell+openssl+python fingerprint loop in update-central-ca-bundle.yaml with a kubernetes.core.k8s_info until task that checks for the leaf cert PEM as a substring of the combined bundle using Jinja2. Signed-off-by: Ade Lee Co-Authored-By: Claude Made-with: Cursor --- .../skmo/update-central-ca-bundle.yaml | 47 +++++-------------- .../kustomize_deploy/files/uniquify_osdpd.py | 28 +++++++++++ roles/kustomize_deploy/tasks/execute_step.yml | 26 +++------- .../openshift_adm/tasks/wait_for_cluster.yml | 42 +++++++---------- 4 files changed, 65 insertions(+), 78 deletions(-) create mode 100644 roles/kustomize_deploy/files/uniquify_osdpd.py diff --git a/hooks/playbooks/skmo/update-central-ca-bundle.yaml b/hooks/playbooks/skmo/update-central-ca-bundle.yaml index 1b57b3c38b..784ed581a4 100644 --- a/hooks/playbooks/skmo/update-central-ca-bundle.yaml +++ b/hooks/playbooks/skmo/update-central-ca-bundle.yaml @@ -121,42 +121,19 @@ # every cert in combined-ca-bundle, retrying until it appears. # ------------------------------------------------------------------------- - name: Wait for leaf region CA to appear in combined-ca-bundle - ansible.builtin.shell: | - set -euo pipefail - TMPDIR=$(mktemp -d) - trap "rm -rf $TMPDIR" EXIT - - echo "{{ _leaf_certs.results[0].resources[0].data['tls.crt'] }}" | \ - base64 -d > "$TMPDIR/leaf-ca.crt" - FINGERPRINT=$(openssl x509 -noout -fingerprint -in "$TMPDIR/leaf-ca.crt" \ - | cut -d= -f2) - - oc get secret combined-ca-bundle \ - -n {{ central_namespace }} \ - -o jsonpath='{.data.tls-ca-bundle\.pem}' \ - | base64 -d > "$TMPDIR/bundle.pem" - - python3 - "$FINGERPRINT" "$TMPDIR/bundle.pem" <<'PYEOF' - import sys, subprocess, re - target, bundle_file = sys.argv[1], sys.argv[2] - bundle = open(bundle_file).read() - certs = re.findall( - r'-----BEGIN CERTIFICATE-----.*?-----END CERTIFICATE-----', - bundle, re.DOTALL + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ central_namespace }}" + name: combined-ca-bundle + register: _combined_bundle + until: >- + (_combined_bundle.resources | length > 0) and + ( + _leaf_certs.results[0].resources[0].data['tls.crt'] | b64decode + in + (_combined_bundle.resources | first).data['tls-ca-bundle.pem'] | b64decode ) - for cert in certs: - r = subprocess.run( - ['openssl', 'x509', '-noout', '-fingerprint'], - input=cert.encode(), capture_output=True - ) - if target in r.stdout.decode(): - sys.exit(0) - sys.exit(1) - PYEOF - args: - executable: /bin/bash - register: _ca_reconciled - until: _ca_reconciled.rc == 0 retries: 30 delay: 10 changed_when: false diff --git a/roles/kustomize_deploy/files/uniquify_osdpd.py b/roles/kustomize_deploy/files/uniquify_osdpd.py new file mode 100644 index 0000000000..a9bef41353 --- /dev/null +++ b/roles/kustomize_deploy/files/uniquify_osdpd.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +"""Append a run suffix to OpenStackDataPlaneDeployment resource names. + +Usage: uniquify_osdpd.py + +Reads the multi-document YAML file at , appends +to the metadata.name of every OpenStackDataPlaneDeployment resource that +does not already end with that suffix, and writes the result back in place. +Prints a "Renamed: -> " line for each renamed resource so that +the calling Ansible task can use changed_when on stdout. +""" +import sys +import yaml + +path, suffix = sys.argv[1], sys.argv[2] + +with open(path) as f: + docs = [d for d in yaml.safe_load_all(f) if d is not None] + +for doc in docs: + if doc.get("kind") == "OpenStackDataPlaneDeployment": + name = doc["metadata"]["name"] + if not name.endswith("-" + suffix): + doc["metadata"]["name"] = name + "-" + suffix + print("Renamed: {} -> {}".format(name, doc["metadata"]["name"])) + +with open(path, "w") as f: + yaml.dump_all(docs, f, default_flow_style=False) diff --git a/roles/kustomize_deploy/tasks/execute_step.yml b/roles/kustomize_deploy/tasks/execute_step.yml index 10c0197b37..6ea15ea727 100644 --- a/roles/kustomize_deploy/tasks/execute_step.yml +++ b/roles/kustomize_deploy/tasks/execute_step.yml @@ -254,26 +254,14 @@ - name: "Uniquify OpenStackDataPlaneDeployment names in {{ stage.path }}" when: _cifmw_kustomize_deploy_run_suffix | default('') | string | length > 0 - changed_when: "'Renamed:' in _rename_osdpd.stdout" + ansible.builtin.script: + executable: python3 + cmd: >- + {{ role_path }}/files/uniquify_osdpd.py + {{ _output | quote }} + {{ _cifmw_kustomize_deploy_run_suffix | string | quote }} register: _rename_osdpd - ansible.builtin.shell: - executable: /bin/bash - cmd: | - python3 - << 'PYEOF' - import yaml, sys - path = "{{ _output }}" - suffix = "{{ _cifmw_kustomize_deploy_run_suffix }}" - with open(path) as f: - docs = [d for d in yaml.safe_load_all(f) if d is not None] - for doc in docs: - if doc.get('kind') == 'OpenStackDataPlaneDeployment': - name = doc['metadata']['name'] - if not name.endswith('-' + suffix): - doc['metadata']['name'] = name + '-' + suffix - print('Renamed: ' + name + ' -> ' + doc['metadata']['name']) - with open(path, 'w') as f: - yaml.dump_all(docs, f, default_flow_style=False) - PYEOF + changed_when: "'Renamed:' in _rename_osdpd.stdout" - name: "Store kustomized content in artifacts for {{ stage.path }}" ansible.builtin.copy: diff --git a/roles/openshift_adm/tasks/wait_for_cluster.yml b/roles/openshift_adm/tasks/wait_for_cluster.yml index 6695a0ae9d..cca0f83352 100644 --- a/roles/openshift_adm/tasks/wait_for_cluster.yml +++ b/roles/openshift_adm/tasks/wait_for_cluster.yml @@ -70,17 +70,15 @@ set -eo pipefail MCP_JSON=$(oc get mcp -o json) - UPDATING=$(echo "$MCP_JSON" | \ - python3 -c " - import json, sys - data = json.load(sys.stdin) - updating = [ - i['metadata']['name'] for i in data['items'] - if next((c['status'] for c in i['status'].get('conditions', []) - if c['type'] == 'Updating'), 'False') == 'True' - ] - print('\n'.join(updating)) - ") + UPDATING=$(echo "$MCP_JSON" | jq -r ' + .items[] | + select( + .status.conditions // [] | + map(select(.type == "Updating" and .status == "True")) | + length > 0 + ) | + .metadata.name + ') if [ -z "$UPDATING" ]; then echo "All MCPs are up to date." @@ -89,19 +87,15 @@ # At least one MCP is still Updating. Check for the stuck-uncordon case: # updatedMachineCount == machineCount but readyMachineCount == 0. - STUCK=$(echo "$MCP_JSON" | \ - python3 -c " - import json, sys - data = json.load(sys.stdin) - stuck = [ - i['metadata']['name'] for i in data['items'] - if (i['status'].get('updatedMachineCount', 0) == - i['status'].get('machineCount', 0) and - i['status'].get('readyMachineCount', 0) == 0 and - i['status'].get('machineCount', 0) > 0) - ] - print('\n'.join(stuck)) - ") + STUCK=$(echo "$MCP_JSON" | jq -r ' + .items[] | + select( + .status.updatedMachineCount == .status.machineCount and + .status.readyMachineCount == 0 and + .status.machineCount > 0 + ) | + .metadata.name + ') if [ -n "$STUCK" ]; then echo "Stuck MCPs detected: $STUCK -- uncordoning all nodes to break deadlock." From 49572b5769c2c987fc6f6fbc3d0b06dbf6892034 Mon Sep 17 00:00:00 2001 From: Ade Lee Date: Thu, 26 Mar 2026 19:17:44 +0000 Subject: [PATCH 16/16] [multiple] Use strategic merge patch in Keystone federation kustomization Replace the JSON Patch (op/path/value) entries in the kustomize file written by hook_controlplane_config.yml with a single strategic merge patch. The JSON Patch approach was fragile: `add /spec/tls/caBundleSecretName` would fail if spec.tls had no parent yet, and adding the parent first as an empty dict would clobber existing TLS fields. A strategic merge patch merges at each level, so it works regardless of whether spec.tls already exists and leaves any pre-existing TLS fields untouched. Signed-off-by: Ade Lee Co-Authored-By: Claude Made-with: Cursor --- .../tasks/hook_controlplane_config.yml | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/roles/federation/tasks/hook_controlplane_config.yml b/roles/federation/tasks/hook_controlplane_config.yml index 41bfc533c0..9ec02b056c 100644 --- a/roles/federation/tasks/hook_controlplane_config.yml +++ b/roles/federation/tasks/hook_controlplane_config.yml @@ -141,25 +141,27 @@ kind: OpenStackControlPlane name: .* patch: |- - - op: add - path: /spec/tls/caBundleSecretName - value: {{ _federation_ca_bundle_secret_name }} - - op: add - path: /spec/keystone/template/httpdCustomization - value: - customConfigSecret: keystone-httpd-override - - op: add - path: /spec/keystone/template/customServiceConfig - value: | - [DEFAULT] - insecure_debug=true - debug=true - [federation] - trusted_dashboard={{ cifmw_federation_horizon_url }}/dashboard/auth/websso/ - [openid] - remote_id_attribute=HTTP_OIDC_ISS - [auth] - methods = password,token,oauth1,mapped,application_credential,openid + apiVersion: core.openstack.org/v1beta1 + kind: OpenStackControlPlane + metadata: + name: controlplane + spec: + tls: + caBundleSecretName: {{ _federation_ca_bundle_secret_name }} + keystone: + template: + httpdCustomization: + customConfigSecret: keystone-httpd-override + customServiceConfig: | + [DEFAULT] + insecure_debug=true + debug=true + [federation] + trusted_dashboard={{ cifmw_federation_horizon_url }}/dashboard/auth/websso/ + [openid] + remote_id_attribute=HTTP_OIDC_ISS + [auth] + methods = password,token,oauth1,mapped,application_credential,openid # --------------------------------------------------------------------------- # Step 7 - Keystone httpd override secret (always needed)