diff --git a/test/extended/edge_topologies/tnf_recovery.go b/test/extended/edge_topologies/tnf_recovery.go index 3ef8dfeaa6e4..188140505a41 100644 --- a/test/extended/edge_topologies/tnf_recovery.go +++ b/test/extended/edge_topologies/tnf_recovery.go @@ -21,6 +21,7 @@ import ( exutil "github.com/openshift/origin/test/extended/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8srand "k8s.io/apimachinery/pkg/util/rand" "k8s.io/kubernetes/test/e2e/framework" ) @@ -414,6 +415,112 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual memberPromotedVotingTimeout, utils.FiveSecondPollInterval) }) + g.It("should update fencing credentials and validate fencing with updated credentials", func() { + bmcNode := targetNode + survivedNode := peerNode + + g.By(fmt.Sprintf("Reading current fencing credentials for node %s", bmcNode.Name)) + creds, err := apis.FindFencingCredentialsByNodeName(oc, bmcNode.Name) + o.Expect(err).ToNot(o.HaveOccurred(), "expected to find fencing credentials secret") + framework.Logf("Found fencing credentials secret %s (address: %s, username: %s)", + creds.SecretName, creds.Address, creds.Username) + + g.By("Parsing Redfish address from fencing credentials") + redfishHost, redfishPort, redfishPath, err := apis.ParseRedfishAddress(creds.Address) + o.Expect(err).ToNot(o.HaveOccurred(), "expected to parse Redfish address") + framework.Logf("Redfish endpoint: host=%s port=%s path=%s", redfishHost, redfishPort, redfishPath) + + sslInsecure := creds.CertificateVerification == "Disabled" + originalPassword := creds.Password + newPassword := k8srand.String(32) + + scriptPath := "/etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/update-fencing-credentials.sh" + bashCmd := scriptPath + ` --node "$1" --username "$2" --password "$3" --address "$4"` + if sslInsecure { + bashCmd += " --ssl-insecure" + } + + g.DeferCleanup(func() { + framework.Logf("Restoring original BMC password via Redfish API") + if restoreErr := apis.ChangeBMCPasswordViaRedfish(oc, bmcNode.Name, redfishHost, redfishPort, + creds.Username, newPassword, originalPassword); restoreErr != nil { + fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restore BMC password via Redfish: %v\n", restoreErr) + return + } + + framework.Logf("Re-running update-fencing-credentials.sh with original credentials") + output, restoreErr := exutil.DebugNodeRetryWithOptionsAndChroot(oc, bmcNode.Name, "openshift-etcd", + "bash", "-c", bashCmd, "update-fencing-credentials", + bmcNode.Name, creds.Username, originalPassword, creds.Address) + if restoreErr != nil { + fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restore fencing credentials via script: %v\noutput: %s\n", + restoreErr, output) + } + }) + + g.By(fmt.Sprintf("Changing BMC password via Redfish API on %s", bmcNode.Name)) + err = apis.ChangeBMCPasswordViaRedfish(oc, bmcNode.Name, redfishHost, redfishPort, + creds.Username, originalPassword, newPassword) + o.Expect(err).ToNot(o.HaveOccurred(), "expected to change BMC password via Redfish API") + + g.By(fmt.Sprintf("Validating new BMC credentials via fence_redfish on %s", bmcNode.Name)) + err = apis.ValidateBMCCredentials(oc, bmcNode.Name, redfishHost, redfishPort, redfishPath, + creds.Username, newPassword, sslInsecure) + o.Expect(err).ToNot(o.HaveOccurred(), "expected new BMC credentials to be valid") + + g.By(fmt.Sprintf("Running update-fencing-credentials.sh on %s with new credentials", bmcNode.Name)) + output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, bmcNode.Name, "openshift-etcd", + "bash", "-c", bashCmd, "update-fencing-credentials", + bmcNode.Name, creds.Username, newPassword, creds.Address) + o.Expect(err).ToNot(o.HaveOccurred(), "expected update-fencing-credentials.sh to succeed") + framework.Logf("update-fencing-credentials.sh output:\n%s", output) + + g.By("Validating pacemaker health after credential update") + ctx, cancel := context.WithTimeout(context.Background(), nodeIsHealthyTimeout) + defer cancel() + pcsOutput, err := services.PcsStatusViaDebug(ctx, oc, bmcNode.Name) + o.Expect(err).ToNot(o.HaveOccurred(), "expected pcs status to succeed") + failedActions := services.ExtractPcsFailedActions(pcsOutput) + o.Expect(failedActions).To(o.BeEmpty(), "expected no failed pacemaker resource actions after credential update") + + g.By("Ensuring etcd members remain healthy after fencing credentials update") + o.Eventually(func() error { + if err := helpers.EnsureHealthyMember(g.GinkgoT(), etcdClientFactory, survivedNode.Name); err != nil { + return err + } + if err := helpers.EnsureHealthyMember(g.GinkgoT(), etcdClientFactory, bmcNode.Name); err != nil { + return err + } + return nil + }, nodeIsHealthyTimeout, utils.FiveSecondPollInterval).ShouldNot(o.HaveOccurred(), + "etcd members should be healthy after fencing credentials update") + + g.By(fmt.Sprintf("Triggering fencing-style network disruption between %s and %s", bmcNode.Name, survivedNode.Name)) + command, err := exutil.TriggerNetworkDisruption(oc.KubeClient(), &bmcNode, &survivedNode, networkDisruptionDuration) + o.Expect(err).To(o.BeNil(), "Expected to disrupt network without errors") + framework.Logf("network disruption command: %q", command) + + g.By(fmt.Sprintf("Ensuring cluster recovery after network disruption (timeout: %v)", memberIsLeaderTimeout)) + leaderNode, learnerNode, learnerStarted := validateEtcdRecoveryStateWithoutAssumingLeader(oc, etcdClientFactory, + &survivedNode, &bmcNode, memberIsLeaderTimeout, utils.FiveSecondPollInterval) + + if learnerStarted { + framework.Logf("Learner node %q already started as learner after disruption", learnerNode.Name) + } else { + g.By(fmt.Sprintf("Ensuring '%s' rejoins as learner (timeout: %v)", learnerNode.Name, memberRejoinedLearnerTimeout)) + validateEtcdRecoveryState(oc, etcdClientFactory, + leaderNode, + learnerNode, true, true, + memberRejoinedLearnerTimeout, utils.FiveSecondPollInterval) + } + + g.By(fmt.Sprintf("Ensuring learner node '%s' is promoted back as voting member (timeout: %v)", learnerNode.Name, memberPromotedVotingTimeout)) + validateEtcdRecoveryState(oc, etcdClientFactory, + leaderNode, + learnerNode, true, false, + memberPromotedVotingTimeout, utils.FiveSecondPollInterval) + }) + g.It("should compute etcd revision bump and preserve backup container after kernel panic recovery", func() { // Note: This test triggers a kernel panic on one node via sysrq trigger, then verifies // the surviving node computes the etcd revision bump as floor(maxRaftIndex * 0.2) per diff --git a/test/extended/edge_topologies/utils/apis/baremetalhost.go b/test/extended/edge_topologies/utils/apis/baremetalhost.go index 305ec7c7240d..93b521aadb68 100644 --- a/test/extended/edge_topologies/utils/apis/baremetalhost.go +++ b/test/extended/edge_topologies/utils/apis/baremetalhost.go @@ -20,10 +20,72 @@ import ( ) const ( - BMCSecretNamespace = "openshift-machine-api" - secretsDataPasswordKey = "password" + BMCSecretNamespace = "openshift-machine-api" + FencingCredentialsNamespace = "openshift-etcd" + fencingCredentialsPrefix = "fencing-credentials-" + secretsDataPasswordKey = "password" ) +// FencingCredentials holds the fields from a fencing-credentials secret in openshift-etcd. +type FencingCredentials struct { + SecretName string + Address string + Username string + Password string + CertificateVerification string +} + +// FindFencingCredentialsByNodeName discovers the fencing-credentials secret for a node +// by listing secrets in openshift-etcd and matching against the node's short name. +func FindFencingCredentialsByNodeName(oc *exutil.CLI, nodeName string) (*FencingCredentials, error) { + shortName := strings.Split(nodeName, ".")[0] + + ctx := context.Background() + list, err := oc.AdminKubeClient().CoreV1().Secrets(FencingCredentialsNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("list secrets in %s: %w", FencingCredentialsNamespace, err) + } + + expected := map[string]struct{}{ + fencingCredentialsPrefix + shortName: {}, + fencingCredentialsPrefix + nodeName: {}, + } + + for _, secret := range list.Items { + if _, ok := expected[secret.Name]; ok { + getRequired := func(key string) (string, error) { + v, exists := secret.Data[key] + if !exists || len(v) == 0 { + return "", fmt.Errorf("secret %s missing required key %q", secret.Name, key) + } + return string(v), nil + } + address, err := getRequired("address") + if err != nil { + return nil, err + } + username, err := getRequired("username") + if err != nil { + return nil, err + } + password, err := getRequired("password") + if err != nil { + return nil, err + } + return &FencingCredentials{ + SecretName: secret.Name, + Address: address, + Username: username, + Password: password, + CertificateVerification: string(secret.Data["certificateVerification"]), + }, nil + } + } + + return nil, fmt.Errorf("no fencing-credentials secret found matching node %q (prefix: %s, contains: %s) in %s", + nodeName, fencingCredentialsPrefix, shortName, FencingCredentialsNamespace) +} + // BMHGVR is the GroupVersionResource for BareMetalHost (metal3.io/v1alpha1). Use for API-based get/delete/patch. var BMHGVR = schema.GroupVersionResource{ Group: "metal3.io", Version: "v1alpha1", Resource: "baremetalhosts", diff --git a/test/extended/edge_topologies/utils/apis/redfish.go b/test/extended/edge_topologies/utils/apis/redfish.go new file mode 100644 index 000000000000..9825f9f231f5 --- /dev/null +++ b/test/extended/edge_topologies/utils/apis/redfish.go @@ -0,0 +1,153 @@ +package apis + +import ( + "encoding/json" + "fmt" + "net" + "net/url" + "strings" + + exutil "github.com/openshift/origin/test/extended/util" + "k8s.io/kubernetes/test/e2e/framework" +) + +// ParseRedfishAddress parses a Redfish address into its components. +// Input format: "redfish+https://host:port/redfish/v1/Systems/1" (IPv6 uses bracket notation). +func ParseRedfishAddress(address string) (host, port, path string, err error) { + if !strings.HasPrefix(address, "redfish+") { + return "", "", "", fmt.Errorf("invalid Redfish address: %q: missing redfish+ prefix", address) + } + stripped := strings.TrimPrefix(address, "redfish+") + parsed, err := url.Parse(stripped) + if err != nil { + return "", "", "", fmt.Errorf("parse redfish address %q: %w", address, err) + } + + host = parsed.Hostname() + port = parsed.Port() + path = parsed.Path + + if port == "" { + if parsed.Scheme == "https" { + port = "443" + } else { + port = "80" + } + } + + if host == "" { + return "", "", "", fmt.Errorf("empty host in redfish address %q", address) + } + if path == "" { + return "", "", "", fmt.Errorf("empty path in redfish address %q", address) + } + + return host, port, path, nil +} + +type redfishAccountCollection struct { + Members []struct { + OdataID string `json:"@odata.id"` + } `json:"Members"` +} + +type redfishAccount struct { + ID string `json:"Id"` + UserName string `json:"UserName"` +} + +// ChangeBMCPasswordViaRedfish changes the BMC password using the Redfish AccountService API. +// It discovers the account matching the given username, then PATCHes the password. +func ChangeBMCPasswordViaRedfish(oc *exutil.CLI, nodeName, redfishHost, redfishPort, username, currentPassword, newPassword string) error { + authority := net.JoinHostPort(redfishHost, redfishPort) + baseURL := fmt.Sprintf("https://%s", authority) + + accountURL, err := findRedfishAccountByUsername(oc, nodeName, baseURL, username, currentPassword) + if err != nil { + return fmt.Errorf("find redfish account for user %q: %w", username, err) + } + + framework.Logf("Changing BMC password for account %s on %s", accountURL, authority) + + patchScript := `curl -k -s -o /dev/null -w "%{http_code}" -X PATCH \ + -H 'Content-Type: application/json' \ + -u "$1:$2" \ + -d "{\"Password\": \"$3\"}" \ + "$4"` + + patchURL := baseURL + accountURL + output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd", + "bash", "-c", patchScript, "redfish-patch", username, currentPassword, newPassword, patchURL) + if err != nil { + return fmt.Errorf("PATCH %s failed: %w", patchURL, err) + } + + statusCode := strings.TrimSpace(output) + if statusCode != "200" && statusCode != "204" { + return fmt.Errorf("PATCH %s returned HTTP %s (expected 200 or 204)", patchURL, statusCode) + } + + framework.Logf("Successfully changed BMC password via Redfish API (HTTP %s)", statusCode) + return nil +} + +func findRedfishAccountByUsername(oc *exutil.CLI, nodeName, baseURL, username, password string) (string, error) { + accountsURL := baseURL + "/redfish/v1/AccountService/Accounts" + curlGet := `curl -k -s -u "$1:$2" "$3"` + + output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd", + "bash", "-c", curlGet, "redfish-list", username, password, accountsURL) + if err != nil { + return "", fmt.Errorf("GET %s failed: %w", accountsURL, err) + } + + var collection redfishAccountCollection + if err := json.Unmarshal([]byte(output), &collection); err != nil { + return "", fmt.Errorf("parse account collection: %w (body: %s)", err, output) + } + + for _, member := range collection.Members { + memberURL := baseURL + member.OdataID + acctOutput, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd", + "bash", "-c", curlGet, "redfish-get", username, password, memberURL) + if err != nil { + framework.Logf("Warning: failed to GET %s: %v", memberURL, err) + continue + } + + var account redfishAccount + if err := json.Unmarshal([]byte(acctOutput), &account); err != nil { + framework.Logf("Warning: failed to parse account at %s: %v", memberURL, err) + continue + } + + if account.UserName == username { + return member.OdataID, nil + } + } + + return "", fmt.Errorf("no Redfish account found with username %q", username) +} + +// ValidateBMCCredentials validates credentials against the BMC using fence_redfish --action status. +func ValidateBMCCredentials(oc *exutil.CLI, nodeName, redfishHost, redfishPort, redfishPath, username, password string, sslInsecure bool) error { + fenceScript := `/usr/sbin/fence_redfish --username "$1" --password "$2" --ip "$3" --ipport "$4" --systems-uri "$5" --action status` + if sslInsecure { + fenceScript += " --ssl-insecure" + } + + ipForFence := redfishHost + if strings.Contains(redfishHost, ":") { + ipForFence = "[" + redfishHost + "]" + } + + output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd", + "bash", "-c", fenceScript, "fence-validate", + username, password, ipForFence, redfishPort, redfishPath) + if err != nil { + return fmt.Errorf("fence_redfish validation failed: %w (output: %s)", err, output) + } + + framework.Logf("BMC credential validation passed: %s", strings.TrimSpace(output)) + return nil +}