Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 62 additions & 9 deletions openshift/tests-extension/test/olmv1-catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -72,18 +73,69 @@ func verifyCatalogEndpoint(ctx SpecContext, catalog, endpoint, query string) {

By(fmt.Sprintf("Creating curl Job to hit: %s", serviceURL))

jobNamePrefix := fmt.Sprintf("verify-%s-%s",
jobNamePrefix := fmt.Sprintf("verify-%s-%s-%s",
strings.ReplaceAll(endpoint, "?", ""),
strings.ReplaceAll(catalog, "-", ""))
strings.ReplaceAll(catalog, "-", ""),
rand.String(5), // unique id per job to avoid race condition problems
)

job := buildCurlJob(jobNamePrefix, "default", serviceURL)
err = k8sClient.Create(ctx, job)
Expect(err).NotTo(HaveOccurred(), "failed to create Job")
// Create the ServiceAccount first
serviceAccount := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: jobNamePrefix,
Namespace: "default",
},
}

// Create the Job
job := buildCurlJob(jobNamePrefix, "default", serviceURL, serviceAccount.Name)

// Use LIFO for DeferCleanup() to ensure the following order of deletion is followed.

// 2. Delete ServiceAccount (should happen second)
DeferCleanup(func(ctx SpecContext) {
// We should also poll for service account deletion too.
Eventually(func(ctx SpecContext) error {
err = k8sClient.Delete(ctx, serviceAccount)
return client.IgnoreNotFound(err)
}).WithContext(ctx).WithTimeout(helpers.DefaultTimeout).WithPolling(helpers.DefaultPolling).Should(Succeed())
// Wait for the ServiceAccount to actually be deleted before continuing
Eventually(func(g Gomega) {
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(serviceAccount), &corev1.ServiceAccount{})
g.Expect(err).To(WithTransform(apierrors.IsNotFound, BeTrue()), "Expected ServiceAccount to be deleted")
}).WithTimeout(helpers.DefaultTimeout).WithPolling(helpers.DefaultPolling).Should(Succeed())
})

// 1. Delete Job (should happen first)
DeferCleanup(func(ctx SpecContext) {
_ = k8sClient.Delete(ctx, job)
// Force delete job with zero grace period to ensure cleanup doesn't hang
// Use Foreground propagation to ensure Pods are deleted before the Job is removed,
// guaranteeing the ServiceAccount isn't deleted while Pods are still using it
deletePolicy := metav1.DeletePropagationForeground
gracePeriod := int64(0)
// Poll for service account deletion - in case we have race condtions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess here is waiting for job deletion, not SA. And, a typo error. If yes, it should be Poll for job deletion - in case we have race conditions.

// or a bad API call.
Eventually(func(ctx SpecContext) error {
err := k8sClient.Delete(ctx, job, &client.DeleteOptions{
GracePeriodSeconds: &gracePeriod,
PropagationPolicy: &deletePolicy,
})
return client.IgnoreNotFound(err)
}).WithContext(ctx).WithTimeout(helpers.DefaultTimeout).WithPolling(helpers.DefaultPolling).Should(Succeed())
// While the delete call may be successful, we need to ensure the deletion itself has
// occurred first before deleting the service account.
Eventually(func(g Gomega) {
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(job), &batchv1.Job{})
g.Expect(err).To(WithTransform(apierrors.IsNotFound, BeTrue()), "Expected a 'NotFound' error, but got: %v", err)
}).WithTimeout(helpers.DefaultTimeout).WithPolling(helpers.DefaultPolling).Should(Succeed())
})

err = k8sClient.Create(ctx, serviceAccount)
Expect(err).NotTo(HaveOccurred(), "failed to create ServiceAccount")

err = k8sClient.Create(ctx, job)
Expect(err).NotTo(HaveOccurred(), "failed to create Job")

By("Waiting for Job to succeed")
Eventually(func(g Gomega) {
recheck := &batchv1.Job{}
Expand All @@ -94,7 +146,7 @@ func verifyCatalogEndpoint(ctx SpecContext, catalog, endpoint, query string) {
return
}
if c.Type == batchv1.JobFailed && c.Status == corev1.ConditionTrue {
Fail(fmt.Sprintf("Job failed: %s", c.Message))
StopTrying(fmt.Sprintf("Job failed: %s", c.Message)).Now()
Comment on lines -97 to +149
Copy link
Contributor

@everettraven everettraven Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Is this change necessary? What does StopTrying().Now() get you over Fail?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I am aware, StopTrying() is better practice than Fail() in Eventually() blocks.

https://onsi.github.io/gomega/#bailing-out-early---polling-functions

}
}
}).WithTimeout(helpers.DefaultTimeout).WithPolling(helpers.DefaultPolling).Should(Succeed())
Expand Down Expand Up @@ -203,7 +255,7 @@ var _ = Describe("[sig-olmv1][OCPFeatureGate:NewOLM][Skipped:Disconnected] OLMv1
})
})

func buildCurlJob(prefix, namespace, url string) *batchv1.Job {
func buildCurlJob(prefix, namespace, url, serviceAccountName string) *batchv1.Job {
backoff := int32(1)
// This means the k8s garbage collector will automatically delete the job 5 minutes
// after it has completed or failed.
Expand Down Expand Up @@ -232,7 +284,8 @@ func buildCurlJob(prefix, namespace, url string) *batchv1.Job {
BackoffLimit: &backoff,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
ServiceAccountName: serviceAccountName,
RestartPolicy: corev1.RestartPolicyNever,
Containers: []corev1.Container{{
Name: "api-tester",
Image: "registry.redhat.io/rhel8/httpd-24:latest",
Expand Down