Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/extended/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
- **node_e2e/probe_termination.go** - Probe-level terminationGracePeriodSeconds (OCP-44493) - Tests configurable termination grace period for liveness and startup probes. Includes 3 test cases: probe-level config for liveness probe, probe-level config for startup probe, and fallback to pod-level config when probe-level is not set [Lifecycle:informing]

## Directory Structure

Expand Down
305 changes: 305 additions & 0 deletions test/extended/node/node_e2e/probe_termination.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
package node

import (
"context"
"fmt"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/utils/ptr"

nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
)

var _ = g.Describe("[sig-node] Probe configuration", func() {
var (
oc = exutil.NewCLIWithoutNamespace("probe-termination")
)

//author: bgudi@redhat.com
g.It("[OTP] Liveness probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() {
ctx := context.Background()

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create pod with liveness probe having probe-level terminationGracePeriodSeconds=10s")
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-probe-level",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To[int64](60),
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: ptr.To(true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 5,
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: ptr.To[int64](10),
},
},
},
},
}

_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")

g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)")
expectedSec := 10
minSec := expectedSec - 3
maxSec := expectedSec + 10
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-probe-level", "test", expectedSec)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
o.Expect(timeDiff).To(o.BeNumerically(">=", minSec), fmt.Sprintf("time difference %ds is less than expected minimum %ds", timeDiff, minSec))
o.Expect(timeDiff).To(o.BeNumerically("<=", maxSec), fmt.Sprintf("time difference %ds is greater than expected maximum %ds", timeDiff, maxSec))
})

//author: bgudi@redhat.com
g.It("[OTP] Startup probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() {
ctx := context.Background()

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create pod with startup probe having probe-level terminationGracePeriodSeconds=10s")
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "startup-probe-level",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To[int64](60),
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: ptr.To(true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "teststartup",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 5,
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: ptr.To[int64](10),
},
},
},
},
}

_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")

g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)")
expectedSec := 10
minSec := expectedSec - 3
maxSec := expectedSec + 10
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "startup-probe-level", "teststartup", expectedSec)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
o.Expect(timeDiff).To(o.BeNumerically(">=", minSec), fmt.Sprintf("time difference %ds is less than expected minimum %ds", timeDiff, minSec))
o.Expect(timeDiff).To(o.BeNumerically("<=", maxSec), fmt.Sprintf("time difference %ds is greater than expected maximum %ds", timeDiff, maxSec))
})

//author: bgudi@redhat.com
g.It("[OTP] Liveness probe should fall back to pod-level terminationGracePeriodSeconds when probe-level is not set [OCP-44493]", ote.Informing(), func() {
ctx := context.Background()

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create pod with liveness probe without probe-level terminationGracePeriodSeconds")
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-pod-level",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To[int64](60),
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: ptr.To(true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 5,
FailureThreshold: 1,
PeriodSeconds: 60,
// No TerminationGracePeriodSeconds - should use pod-level (60s)
},
},
},
},
}

_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe-level termination")

g.By("Verify pod-level terminationGracePeriodSeconds is used (60s)")
expectedSec := 60
minSec := expectedSec - 3
maxSec := expectedSec + 10
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-pod-level", "test", expectedSec)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
o.Expect(timeDiff).To(o.BeNumerically(">=", minSec), fmt.Sprintf("time difference %ds is less than expected minimum %ds", timeDiff, minSec))
o.Expect(timeDiff).To(o.BeNumerically("<=", maxSec), fmt.Sprintf("time difference %ds is greater than expected maximum %ds", timeDiff, maxSec))
})
})

// findLatestEventByReason finds the latest event matching the given reason and message filter
func findLatestEventByReason(events *corev1.EventList, reason string, msgFilter func(string) bool) *corev1.Event {
var latestEvent *corev1.Event
for i := range events.Items {
event := &events.Items[i]
if event.Reason == reason && msgFilter(event.Message) {
if latestEvent == nil || event.LastTimestamp.Time.After(latestEvent.LastTimestamp.Time) {
latestEvent = event
}
}
}
return latestEvent
}

// findEarliestEventAfter finds the earliest event matching the reason and filter that occurred after the given time
func findEarliestEventAfter(events *corev1.EventList, reason string, msgFilter func(string) bool, afterTime time.Time) *corev1.Event {
var earliestEvent *corev1.Event
for i := range events.Items {
event := &events.Items[i]
if event.Reason == reason && msgFilter(event.Message) && event.FirstTimestamp.Time.After(afterTime) {
if earliestEvent == nil || event.FirstTimestamp.Time.Before(earliestEvent.FirstTimestamp.Time) {
earliestEvent = event
}
}
}
return earliestEvent
}

// verifyProbeTermination verifies that the probe termination grace period is honored
// by checking the time difference between probe failure (Killing) and container restart (Started) events
// Returns the time difference in seconds, or an error if events are not found
func verifyProbeTermination(ctx context.Context, oc *exutil.CLI, namespace, podName, containerName string, expectedTerminationSec int) (int, error) {
var timeDiff int
// Timeout needs to account for: pod start (~30s) + probe period (60s) + termination (up to 60s) + restart (~30s) = ~3 minutes minimum
// Use 6 minutes to be safe for tests with 60s termination grace period
err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 6*time.Minute, true, func(ctx context.Context) (bool, error) {
// Get events using the Events API
events, err := oc.KubeClient().CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", podName),
})
if err != nil {
e2e.Logf("Error getting events: %v", err)
return false, nil
}

// Find probe failure (Killing) event for the container
killingEvent := findLatestEventByReason(events, "Killing", func(msg string) bool {
return strings.Contains(msg, containerName) &&
strings.Contains(msg, "failed") &&
strings.Contains(msg, "probe")
})

if killingEvent == nil {
e2e.Logf("Waiting for probe failure (Killing) event")
return false, nil
}

// Find container restart (Started) event that occurred after the Killing event
startedEvent := findEarliestEventAfter(events, "Started", func(msg string) bool {
return strings.Contains(msg, "Container started")
}, killingEvent.LastTimestamp.Time)

if startedEvent == nil {
e2e.Logf("Waiting for container restart (Started) event after Killing event")
return false, nil
}

e2e.Logf("Killing event: %s at %v", killingEvent.Message, killingEvent.LastTimestamp)
e2e.Logf("Started event: %s at %v", startedEvent.Message, startedEvent.FirstTimestamp)

// Calculate time difference using the helper function
timeDiff = int(nodeutils.CalculateEventTimeDiff(killingEvent, startedEvent).Seconds())
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)

return true, nil
})
if err != nil {
return 0, err
}
return timeDiff, nil
}
15 changes: 15 additions & 0 deletions test/extended/node/node_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,3 +764,18 @@ func GetFirstReadyWorkerNode(oc *exutil.CLI) string {
o.Expect(false).To(o.BeTrue(), "no Ready worker node found among %v", workers)
return "" // unreachable; satisfies compiler
}

// CalculateEventTimeDiff calculates the time difference between two Kubernetes events.
// It uses LastTimestamp for the start event and FirstTimestamp for the end event.
// Falls back to FirstTimestamp/LastTimestamp respectively if the primary timestamp is zero.
func CalculateEventTimeDiff(startEvent, endEvent *corev1.Event) time.Duration {
startTime := startEvent.LastTimestamp.Time
if startTime.IsZero() {
startTime = startEvent.FirstTimestamp.Time
}
endTime := endEvent.FirstTimestamp.Time
if endTime.IsZero() {
endTime = endEvent.LastTimestamp.Time
}
return endTime.Sub(startTime)
}