From 1eb40f34659ef47c88ae8ce9131b124d8ccba411 Mon Sep 17 00:00:00 2001 From: Richard Su Date: Wed, 18 Mar 2026 11:10:46 -0500 Subject: [PATCH] OCPBUGS-63472: add copy_network e2e test case for agent-tui network persistence Add a new 'copy_network' AGENT_TEST_CASES scenario that validates static network connections created via the agent-tui during boot persist into the installed OS. A NetworkManager keyfile is injected via SSH into the live environment on master_0 and master_1, simulating what a user would create via the agent-tui. The agent-tui exits quickly when connectivity to the release image succeeds, leaving no reliable window for key press automation, so SSH injection is used instead. The keyfile is in place before agent-set-host-copy-network-arg.service runs, so the service detects it and sets --copy-network per-host via the assisted-service REST API. master_1 is intentionally included as a non-rendezvous node, which is the scenario the bug affected. After installation, validate-copy-network.sh SSHes into both nodes and verifies the connection keyfile and nmcli entry are present, confirming that coreos-installer copied the keyfile into the installed OS. To run: export AGENT_E2E_TEST_SCENARIO=COMPACT_IPV4_DHCP export AGENT_TEST_CASES='copy_network' Assisted-by: Claude Sonnet 4.6 (1M context) --- agent/06_agent_create_cluster.sh | 49 +++++++++++++ agent/08_agent_post_install_validation.sh | 5 ++ agent/e2e/agent-tui/test-copy-network.sh | 75 ++++++++++++++++++++ agent/e2e/agent-tui/validate-copy-network.sh | 74 +++++++++++++++++++ config_example.sh | 12 ++++ 5 files changed, 215 insertions(+) create mode 100755 agent/e2e/agent-tui/test-copy-network.sh create mode 100755 agent/e2e/agent-tui/validate-copy-network.sh diff --git a/agent/06_agent_create_cluster.sh b/agent/06_agent_create_cluster.sh index 01385a8ca..c28c67d6d 100755 --- a/agent/06_agent_create_cluster.sh +++ b/agent/06_agent_create_cluster.sh @@ -436,6 +436,55 @@ function run_agent_test_cases() { echo "Finished fixing DNS through agent-tui" fi + + if [[ $AGENT_TEST_CASES =~ "copy_network" ]]; then + if [[ ${NUM_MASTERS} -lt 2 ]]; then + echo "ERROR: copy_network test case requires at least 2 master nodes (NUM_MASTERS=${NUM_MASTERS}). Not supported with SNO topology." + exit 1 + fi + if [[ "${IP_STACK}" != "v4" ]]; then + echo "ERROR: copy_network test case only supports IPv4 (IP_STACK=${IP_STACK})." + exit 1 + fi + echo "Running test scenario: inject static network keyfile on master_0 and master_1" + + # Inject a NetworkManager keyfile on master_0 and master_1 via SSH into the live + # environment, simulating what a user would create via the agent-tui. The keyfile + # must be in place before agent-set-host-copy-network-arg.service runs. + # The script waits for SSH to become available on each node. + # master_1 is intentionally included as a non-rendezvous node, which is the + # scenario the bug affected. + # master-0 is the rendezvous/bootstrap node whose IP is recorded in the etcd + # cluster during installation - preserve its DHCP IP as the static IP so etcd + # membership is not broken after reboot. + # master-1 uses a distinct static IP outside the DHCP range to prove the + # static config was copied and persists after installation. + subnet_prefix=$(echo "${EXTERNAL_SUBNET_V4}" | cut -d'/' -f2) + master0_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} 0) + master0_dhcp_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \ + "string(//dns[*]/host/hostname[. = '${master0_hostname}']/../@ip)" -) + # Find an unused static IP for master-1 by scanning virsh for unassigned offsets + master1_static_ip="" + for offset in $(seq 90 254); do + candidate=$(nth_ip ${EXTERNAL_SUBNET_V4} ${offset}) + if ! sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns[*]/host[@ip = '${candidate}']" - &>/dev/null; then + master1_static_ip=${candidate} + break + fi + done + if [ -z "${master1_static_ip}" ]; then + echo "ERROR: could not find an unused IP in ${EXTERNAL_SUBNET_V4} for master-1 static config" + exit 1 + fi + echo "Using static IP ${master1_static_ip} for master-1" + declare -A COPY_NETWORK_STATIC_IPS=([0]="${master0_dhcp_ip}/${subnet_prefix}" [1]="${master1_static_ip}/${subnet_prefix}") + for node_index in 0 1; do + echo "Injecting keyfile on master_${node_index}" + ./agent/e2e/agent-tui/test-copy-network.sh $node_index ${COPY_NETWORK_STATIC_IPS[$node_index]} + done + + echo "Finished injecting network keyfiles" + fi } # Setup the environment to allow iPXE booting, by reusing libvirt native features diff --git a/agent/08_agent_post_install_validation.sh b/agent/08_agent_post_install_validation.sh index a40e5245b..fbf0626fc 100755 --- a/agent/08_agent_post_install_validation.sh +++ b/agent/08_agent_post_install_validation.sh @@ -43,3 +43,8 @@ if (( $NUM_MASTERS != $installed_control_plane_nodes )); then fi oc get clusterversion + +if [[ "${AGENT_TEST_CASES:-}" =~ "copy_network" ]]; then + echo "Validating static network config persistence after installation" + ./agent/e2e/agent-tui/validate-copy-network.sh +fi diff --git a/agent/e2e/agent-tui/test-copy-network.sh b/agent/e2e/agent-tui/test-copy-network.sh new file mode 100755 index 000000000..f37fcf5e3 --- /dev/null +++ b/agent/e2e/agent-tui/test-copy-network.sh @@ -0,0 +1,75 @@ +#!/bin/bash +set -euxo pipefail + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )" +source $SCRIPTDIR/common.sh + +NODE_INDEX=${1:-0} +STATIC_IP=${2:-"192.168.111.90/24"} +CONNECTION_NAME="copy-network-static" + +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q" + +# Derive the DNS hostname from MASTER_HOSTNAME_FORMAT (e.g. "master-0") +node_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} ${NODE_INDEX}) + +# Get the node's current DHCP IP (assigned by virsh) to SSH into the live environment +node_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \ + "string(//dns[*]/host/hostname[. = '${node_hostname}']/../@ip)" -) + +if [ -z "$node_ip" ]; then + echo "ERROR: Could not resolve IP for ${node_hostname} on network ${BAREMETAL_NETWORK_NAME}" + exit 1 +fi + +echo "Waiting for live environment SSH on ${node_hostname} (${node_ip})..." +until ssh $SSH_OPTS core@${node_ip} true 2>/dev/null; do + sleep 10 +done + +echo "SSH available on ${node_hostname}, injecting static network keyfile" + +# Determine the MAC address of the interface that has the current DHCP IP, +# and the default gateway and DNS from the live environment. +iface_mac=$(ssh $SSH_OPTS core@${node_ip} \ + "ip -j addr show | jq -r '.[] | select(.addr_info[]? | .local == \"${node_ip}\") | .address'") +gateway=$(ssh $SSH_OPTS core@${node_ip} \ + "ip route show default | awk '/default/ {print \$3; exit}'") +dns=$(ssh $SSH_OPTS core@${node_ip} \ + "awk '/^nameserver/ {print \$2; exit}' /etc/resolv.conf") + +echo "Interface MAC: ${iface_mac}, Gateway: ${gateway}, DNS: ${dns}, Static IP: ${STATIC_IP}" + +if [ -z "$iface_mac" ] || [ -z "$gateway" ]; then + echo "ERROR: Could not determine interface MAC or gateway on ${node_hostname}" + exit 1 +fi + +# Write a static NetworkManager keyfile using a static IP distinct from the DHCP +# address, so the installed OS can be verified to be using the static config. +# Bound to the primary interface by MAC address and uses autoconnect-priority=1 +# to take precedence over auto-generated DHCP connections (priority -100). +ssh $SSH_OPTS core@${node_ip} \ + "sudo bash -c 'umask 177; cat > /etc/NetworkManager/system-connections/${CONNECTION_NAME}.nmconnection'" << EOF +[connection] +id=${CONNECTION_NAME} +type=ethernet +autoconnect=true +autoconnect-priority=1 + +[ethernet] +mac-address=${iface_mac} + +[ipv4] +address1=${STATIC_IP},${gateway} +dns=${dns}; +method=manual + +[ipv6] +method=disabled + +[proxy] +EOF + +echo "Injected static keyfile '${CONNECTION_NAME}.nmconnection' on ${node_hostname}" +echo " Static IP: ${STATIC_IP}, Gateway: ${gateway}, DNS: ${dns}" diff --git a/agent/e2e/agent-tui/validate-copy-network.sh b/agent/e2e/agent-tui/validate-copy-network.sh new file mode 100755 index 000000000..16fb1f92c --- /dev/null +++ b/agent/e2e/agent-tui/validate-copy-network.sh @@ -0,0 +1,74 @@ +#!/bin/bash +set -euxo pipefail + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )" +source $SCRIPTDIR/common.sh +source $SCRIPTDIR/agent/common.sh +source $SCRIPTDIR/network.sh + +CONNECTION_NAME="copy-network-static" + +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q" + +# master-0 keeps its DHCP IP as the static IP (to preserve etcd membership), +# so we reach it via its virsh-assigned IP. +# master-1 uses a distinct static IP outside the DHCP range, so we SSH to that +# IP directly to prove the installed OS is using the static config. +subnet_prefix=$(echo "${EXTERNAL_SUBNET_V4}" | cut -d'/' -f2) +master0_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} 0) +master0_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \ + "string(//dns[*]/host/hostname[. = '${master0_hostname}']/../@ip)" -) +master1_ip="" +for offset in $(seq 90 254); do + candidate=$(nth_ip ${EXTERNAL_SUBNET_V4} ${offset}) + if ! sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns[*]/host[@ip = '${candidate}']" - &>/dev/null; then + master1_ip=${candidate} + break + fi +done +if [ -z "${master1_ip}" ]; then + echo "ERROR: could not find the static IP for master-1 in ${EXTERNAL_SUBNET_V4}" + exit 1 +fi + +declare -A NODE_IPS=([0]="${master0_ip}" [1]="${master1_ip}") + +failed=0 +for node_index in 0 1; do + node_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} ${node_index}) + node_ip=${NODE_IPS[$node_index]} + + echo "Checking ${node_hostname} at ${node_ip} for connection '${CONNECTION_NAME}'..." + + if ! ssh $SSH_OPTS core@${node_ip} true 2>/dev/null; then + echo "FAIL: Cannot SSH to ${node_hostname} at ${node_ip}" + failed=1 + continue + fi + + # Verify the NetworkManager keyfile exists in the installed OS + if ! ssh $SSH_OPTS core@${node_ip} \ + "sudo ls /etc/NetworkManager/system-connections/ | grep -q '${CONNECTION_NAME}'"; then + echo "FAIL: Connection keyfile '${CONNECTION_NAME}' not found on ${node_hostname}" + failed=1 + continue + fi + + # Verify nmcli reports the connection with static method - this is the proof + # that --copy-network copied the user-created keyfile to the installed OS + if ! ssh $SSH_OPTS core@${node_ip} \ + "sudo nmcli -f ipv4.method connection show '${CONNECTION_NAME}' | grep -q 'manual'"; then + echo "FAIL: Connection '${CONNECTION_NAME}' does not have static IPv4 method on ${node_hostname}" + failed=1 + continue + fi + + echo "PASS: ${node_hostname} has connection '${CONNECTION_NAME}' with method=manual" +done + +if [ $failed -ne 0 ]; then + echo "FAIL: Network config persistence validation failed on one or more nodes" + exit 1 +fi + +echo "PASS: Static network config persisted after installation on all nodes" diff --git a/config_example.sh b/config_example.sh index 0ef83949b..35df25601 100755 --- a/config_example.sh +++ b/config_example.sh @@ -924,7 +924,19 @@ set -x # then the wait-for commands should timeout and fail. # This test case is only supported when IP_STACK=v4. # +# 2. 'copy_network' test case: +# Validates that static network connections created manually via the agent-tui during boot +# persist into the installed OS (OCPBUGS-63472). Use a DHCP scenario so that no static +# networking is pre-configured in the manifests. During boot, the test uses console key +# presses to create a new static connection via nmtui on master_0 and master_1. After +# installation, the post-install validation checks that the connection keyfile and nmcli +# entry are present on both nodes, confirming that --copy-network was set per-host by +# agent-set-host-copy-network-arg.service and coreos-installer copied the keyfile into the OS. +# Requires: AGENT_E2E_TEST_SCENARIO=COMPACT_IPV4_DHCP (or any HA_IPV4_DHCP), IP_STACK=v4. +# Not supported with SNO topology (requires at least 2 master nodes) or IPv6. +# # export AGENT_TEST_CASES='bad_dns' +# export AGENT_TEST_CASES='copy_network' # Uncomment the following line to deploy the cluster using the appliance model # The appliance model boots the host using the unconfigured ignition. It then mounts