From 73287484e9deda4c7cf9b3426a40225d9eb060e4 Mon Sep 17 00:00:00 2001 From: Bhargavi Gudi Date: Tue, 12 May 2026 14:50:28 +0530 Subject: [PATCH] Migrate OCP-67564: node drain should block when PodDisruptionBudget minAvailable equals 100% Migrates test from openshift-tests-private to origin. Test validates that node drain is properly blocked when a PodDisruptionBudget has minAvailable=100% with an empty selector. The test: 1. Creates a deployment with 1 replica 2. Creates a PodDisruptionBudget with minAvailable=100% and empty selector 3. Attempts to drain a node 4. Verifies the drain operation is blocked 5. Deletes the PDB and verifies drain succeeds This is a disruptive test marked as informing. Updates: - Add test to test/extended/node/node_e2e/node.go - Add helper functions to test/extended/node/node_utils.go: - GetSingleWorkerNode: retrieves a worker node name - WaitClusterOperatorAvailable: waits for cluster operators to be available - Document test in test/extended/node/README.md Relates: https://issues.redhat.com/browse/OCPBUGS-15035 Signed-off-by: Bhargavi Gudi --- test/extended/node/README.md | 1 + test/extended/node/node_e2e/pdb_drain.go | 192 +++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 test/extended/node/node_e2e/pdb_drain.go diff --git a/test/extended/node/README.md b/test/extended/node/README.md index 360c7256c128..56496f1da834 100644 --- a/test/extended/node/README.md +++ b/test/extended/node/README.md @@ -9,6 +9,7 @@ This directory contains OpenShift end-to-end tests for node-related features. - **kubeletconfig_features.go** - Tests applying KubeletConfig to custom machine config pools, requires node reboots - **kubelet_secret_pulled_images.go** - Tests kubelet credential verification for image pulls (`KubeletEnsureSecretPulledImages` feature gate). Covers multi-tenancy isolation, credential rotation, ImagePullPolicy behavior, credential verification policy (NeverVerify/AlwaysVerify), and registry availability scenarios. Requires `TechPreviewNoUpgrade` or `CustomNoUpgrade` FeatureSet. - **node_e2e/image_registry_config.go** - Container registry config change (OCP-44820) - Verifies search registry update triggers MCO rollout and lands on nodes [Disruptive] +- **node_e2e/pdb_drain.go** - PodDisruptionBudget drain blocking (OCP-67564) - Tests that node drain is blocked when PDB has minAvailable=100% with empty selector [Disruptive] [Lifecycle:informing] ### Suite: openshift/usernamespace diff --git a/test/extended/node/node_e2e/pdb_drain.go b/test/extended/node/node_e2e/pdb_drain.go new file mode 100644 index 000000000000..7636a98b7a51 --- /dev/null +++ b/test/extended/node/node_e2e/pdb_drain.go @@ -0,0 +1,192 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo" + + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + e2e "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/utils/ptr" + + exutil "github.com/openshift/origin/test/extended/util" + "github.com/openshift/origin/test/extended/util/operator" +) + +var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptive] PodDisruptionBudget", func() { + var ( + oc = exutil.NewCLIWithoutNamespace("pdb-drain") + ) + + //author: bgudi@redhat.com + g.It("[OTP] Node's drain should block when PodDisruptionBudget minAvailable equals 100 percentage and selector is empty [OCP-67564]", ote.Informing(), func() { + ctx := context.Background() + + // Skip on SNO/External topologies where there might not be dedicated worker nodes + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get cluster infrastructure") + if infra.Status.ControlPlaneTopology == "SingleReplica" || infra.Status.ControlPlaneTopology == "External" { + g.Skip("Skipping on SNO/External topology - requires dedicated worker nodes") + } + + oc.SetupProject() + namespace := oc.Namespace() + + g.By("Get a worker node to schedule pods on") + workers, err := exutil.GetReadySchedulableWorkerNodes(ctx, oc.AdminKubeClient()) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get worker nodes") + o.Expect(workers).NotTo(o.BeEmpty(), "no ready schedulable worker nodes found") + workerNode := workers[0].Name + e2e.Logf("Selected worker node: %s", workerNode) + + g.By("Create 6 pods on the selected worker node") + numPods := 6 + podBaseName := "pdb-drain-test-pod" + for i := 0; i < numPods; i++ { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d", podBaseName, i), + Namespace: namespace, + Labels: map[string]string{ + "app": "pdb-drain-test", + }, + }, + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "kubernetes.io/hostname": workerNode, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: ptr.To(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "test-container", + Image: "quay.io/openshifttest/hello-openshift@sha256:4200f438cf2e9446f6bcff9d67ceea1f69ed07a2f83363b7fb52529f7ddd8a83", + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: ptr.To(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + }, + }, + }, + } + _, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("failed to create pod %d", i)) + } + + g.By("Wait for all pods to be ready") + err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + podList, pollErr := oc.KubeClient().CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app=pdb-drain-test", + }) + if pollErr != nil { + e2e.Logf("Error getting pods: %v", pollErr) + return false, nil + } + readyPods := 0 + for _, pod := range podList.Items { + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { + readyPods++ + break + } + } + } + if readyPods == numPods { + e2e.Logf("All %d pods are ready", readyPods) + return true, nil + } + e2e.Logf("Waiting for pods to be ready: %d/%d", readyPods, numPods) + return false, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "pods did not become ready") + + g.By("Create PodDisruptionBudget with 100% minAvailable and empty selector") + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pdb-drain-test", + Namespace: namespace, + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &intstr.IntOrString{ + Type: intstr.String, + StrVal: "100%", + }, + Selector: &metav1.LabelSelector{}, + }, + } + _, err = oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Create(ctx, pdb, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create PodDisruptionBudget") + g.DeferCleanup(oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Delete, ctx, "pdb-drain-test", metav1.DeleteOptions{}) + + g.By("Verify all test pods are on the selected worker node") + podList, err := oc.KubeClient().CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app=pdb-drain-test", + }) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods") + podsOnWorker := 0 + for _, pod := range podList.Items { + if pod.Spec.NodeName == workerNode { + podsOnWorker++ + } + } + o.Expect(podsOnWorker).To(o.Equal(numPods), "not all pods are on the selected worker node") + + g.By("Make sure that PDB's DisruptionAllowed condition is False") + var pdbStatus string + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 30*time.Second, true, func(pollCtx context.Context) (bool, error) { + var pollErr error + pdbStatus, pollErr = oc.AsAdmin().WithoutNamespace().Run("get").Args("poddisruptionbudget", "pdb-drain-test", "-n", namespace, "-o=jsonpath={.status.conditions[?(@.type==\"DisruptionAllowed\")].status}").Output() + if pollErr != nil { + e2e.Logf("Error getting PDB status: %v", pollErr) + return false, nil + } + if pdbStatus != "" { + return true, nil + } + e2e.Logf("Waiting for PDB DisruptionAllowed condition to appear") + return false, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "PDB DisruptionAllowed condition not found") + o.Expect(pdbStatus).Should(o.Equal("False"), "PDB DisruptionAllowed should be False") + + g.By("Drain the selected worker node") + g.DeferCleanup(func() { + err := operator.WaitForOperatorsToSettle(ctx, oc.AdminConfigClient(), 10) + o.Expect(err).NotTo(o.HaveOccurred(), "cluster operators failed to return to available state after node drain") + }) + g.DeferCleanup(oc.AsAdmin().WithoutNamespace().Run("adm").Args("uncordon", workerNode).Execute) + + out, err := oc.AsAdmin().WithoutNamespace().Run("adm").Args("drain", workerNode, "--ignore-daemonsets", "--delete-emptydir-data", "--force", "--timeout=30s").Output() + o.Expect(err).To(o.HaveOccurred(), "drain operation should have been blocked but it wasn't") + o.Expect(strings.Contains(out, "Cannot evict pod as it would violate the pod's disruption budget")).Should(o.BeTrue(), "drain output missing PDB violation error message") + o.Expect(strings.Contains(out, "There are pending nodes to be drained")).Should(o.BeTrue(), "drain output missing pending nodes error message") + + g.By("Verify that test pods remain on the node after failed drain") + podsAfterDrain, err := oc.KubeClient().CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app=pdb-drain-test", + }) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods after drain attempt") + podsStillOnWorker := 0 + for _, pod := range podsAfterDrain.Items { + if pod.Spec.NodeName == workerNode { + podsStillOnWorker++ + } + } + o.Expect(podsStillOnWorker).To(o.Equal(numPods), "all test pods should still be on the worker node") + }) +})