From df878a3cbff814d00d7a1c2cbdc164aa775a76e8 Mon Sep 17 00:00:00 2001 From: Kunal Memane Date: Fri, 27 Mar 2026 13:25:09 +0530 Subject: [PATCH 1/4] Revert "Only apply 5-minute sleep workaround for known multi-arch repos" This reverts commit 1d12ac7e5684ffbbbb593bb7b6d0f2bf93c7b470. --- pkg/steps/bundle_source.go | 2 +- pkg/steps/project_image.go | 2 +- pkg/steps/source.go | 40 +++++--------------------------------- 3 files changed, 7 insertions(+), 37 deletions(-) diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index 872f68cfc5a..84f5a120daf 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -82,7 +82,7 @@ func (s *bundleSourceStep) run(ctx context.Context) error { // Bundle images are not multi-arch by design. Here we build it without creating a manifest-listed image. // Note that we are not configuring a node selector here, so the build will be scheduled on any available // node no matter the architecture. - return handleBuild(ctx, s.client, s.podClient, *build, func() bool { return false }) + return handleBuild(ctx, s.client, s.podClient, *build, false) } func replaceCommand(pullSpec, with string) string { diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index 7848dd41b1b..4ef28beed73 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -72,7 +72,7 @@ func (s *projectDirectoryImageBuildStep) run(ctx context.Context) error { // Bundle images are non multi-arch by design. No manifest list is needed. Here we spawn a single build. if s.config.IsBundleImage() { - return handleBuild(ctx, s.client, s.podClient, *build, func() bool { return false }) + return handleBuild(ctx, s.client, s.podClient, *build, false) } return handleBuilds(ctx, s.client, s.podClient, *build, s.metricsAgent, newImageBuildOptions(s.architectures.UnsortedList())) diff --git a/pkg/steps/source.go b/pkg/steps/source.go index 786e592ef1d..d0fca968edd 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -467,31 +467,13 @@ func isBuildPhaseTerminated(phase buildapi.BuildPhase) bool { } type ImageBuildOptions struct { - Architectures []string - NeedsMultiArchWorkaround func() bool + Architectures []string } func newImageBuildOptions(archs []string) ImageBuildOptions { return ImageBuildOptions{Architectures: archs} } -// multiArchRepos are repos that need the sleep workaround for multi-arch builds. -var multiArchRepos = sets.New[string]( - "openshift/ci-tools", - "openshift/kueue-operator", - "openshift/loki", - "openshift/multiarch-tuning-operator", - "openshift/oadp-must-gather", - "openshift/oadp-operator", - "openshift/openshift-velero-plugin", - "openshift/velero", - "openshift/velero-plugin-for-aws", - "openshift/velero-plugin-for-gcp", - "openshift/velero-plugin-for-legacy-aws", - "openshift/velero-plugin-for-microsoft-azure", - "openshift-eng/baremetal-qe-infra", -) - func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubernetes.PodClient, build buildapi.Build, metricsAgent *metrics.MetricsAgent, opts ...ImageBuildOptions) error { var wg sync.WaitGroup @@ -500,17 +482,6 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubern o = opts[0] } - // Create closure that checks if this build is for a multi-arch repo - needsMultiArchWorkaround := o.NeedsMultiArchWorkaround - if needsMultiArchWorkaround == nil { - needsMultiArchWorkaround = func() bool { - if org, repo := build.Labels[LabelMetadataOrg], build.Labels[LabelMetadataRepo]; org != "" && repo != "" { - return multiArchRepos.Has(fmt.Sprintf("%s/%s", org, repo)) - } - return false - } - } - builds := constructMultiArchBuilds(build, o.Architectures) errChan := make(chan error, len(builds)) @@ -519,7 +490,7 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubern go func(b buildapi.Build) { defer wg.Done() metricsAgent.AddNodeWorkload(ctx, b.Namespace, fmt.Sprintf("%s-build", b.Name), b.Name, podClient) - if err := handleBuild(ctx, buildClient, podClient, b, needsMultiArchWorkaround); err != nil { + if err := handleBuild(ctx, buildClient, podClient, b, true); err != nil { errChan <- fmt.Errorf("error occurred handling build %s: %w", b.Name, err) } metricsAgent.RemoveNodeWorkload(b.Name) @@ -577,15 +548,14 @@ func constructMultiArchBuilds(build buildapi.Build, stepArchitectures []string) return ret } -func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build, needsMultiArchWorkaround func() bool) error { +func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build, waitOnMultiArch bool) error { const attempts = 5 ns, name := build.Namespace, build.Name var errs []error if err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { var attempt buildapi.Build - // TODO: This is a workaround to avoid race condition with multiple ci-operator instances building different architectures - // See https://issues.redhat.com/browse/OCPBUGS-65845 - if needsMultiArchWorkaround != nil && needsMultiArchWorkaround() { + if waitOnMultiArch { + // builds are using older src image, adding wait to avoid race condition time.Sleep(5 * time.Minute) } build.DeepCopyInto(&attempt) From 3ed7a4c101728bcf469984be62d216f8c836b2c9 Mon Sep 17 00:00:00 2001 From: Kunal Memane Date: Fri, 27 Mar 2026 15:00:39 +0530 Subject: [PATCH 2/4] Revert "pkg/steps: don't wait on builds in single-arch contexts" This reverts commit 8cf9cb7cb04c8e1af1979a62ad09fcfbfd2646db. --- pkg/steps/bundle_source.go | 2 +- pkg/steps/project_image.go | 2 +- pkg/steps/source.go | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index 84f5a120daf..e41d2256b49 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -82,7 +82,7 @@ func (s *bundleSourceStep) run(ctx context.Context) error { // Bundle images are not multi-arch by design. Here we build it without creating a manifest-listed image. // Note that we are not configuring a node selector here, so the build will be scheduled on any available // node no matter the architecture. - return handleBuild(ctx, s.client, s.podClient, *build, false) + return handleBuild(ctx, s.client, s.podClient, *build) } func replaceCommand(pullSpec, with string) string { diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index 4ef28beed73..3b9e68c2204 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -72,7 +72,7 @@ func (s *projectDirectoryImageBuildStep) run(ctx context.Context) error { // Bundle images are non multi-arch by design. No manifest list is needed. Here we spawn a single build. if s.config.IsBundleImage() { - return handleBuild(ctx, s.client, s.podClient, *build, false) + return handleBuild(ctx, s.client, s.podClient, *build) } return handleBuilds(ctx, s.client, s.podClient, *build, s.metricsAgent, newImageBuildOptions(s.architectures.UnsortedList())) diff --git a/pkg/steps/source.go b/pkg/steps/source.go index d0fca968edd..b550e2ddd6e 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -490,7 +490,7 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubern go func(b buildapi.Build) { defer wg.Done() metricsAgent.AddNodeWorkload(ctx, b.Namespace, fmt.Sprintf("%s-build", b.Name), b.Name, podClient) - if err := handleBuild(ctx, buildClient, podClient, b, true); err != nil { + if err := handleBuild(ctx, buildClient, podClient, b); err != nil { errChan <- fmt.Errorf("error occurred handling build %s: %w", b.Name, err) } metricsAgent.RemoveNodeWorkload(b.Name) @@ -548,16 +548,14 @@ func constructMultiArchBuilds(build buildapi.Build, stepArchitectures []string) return ret } -func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build, waitOnMultiArch bool) error { +func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build) error { const attempts = 5 ns, name := build.Namespace, build.Name var errs []error if err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { var attempt buildapi.Build - if waitOnMultiArch { - // builds are using older src image, adding wait to avoid race condition - time.Sleep(5 * time.Minute) - } + // builds are using older src image, adding wait to avoid race condition + time.Sleep(5 * time.Minute) build.DeepCopyInto(&attempt) if err := client.Create(ctx, &attempt); err == nil { logrus.Infof("Created build %q", name) From bc5fecd0d6b196b142d19e0fb6d90e194a2be04b Mon Sep 17 00:00:00 2001 From: Kunal Memane Date: Fri, 27 Mar 2026 15:44:13 +0530 Subject: [PATCH 3/4] Revert "Change sleep duration to 5 minutes in build creation" This reverts commit a3864a16bb1a6a493deae77e9936635b17116cd3. --- pkg/steps/source.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/steps/source.go b/pkg/steps/source.go index b550e2ddd6e..d86657c9c95 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -555,7 +555,7 @@ func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.P if err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { var attempt buildapi.Build // builds are using older src image, adding wait to avoid race condition - time.Sleep(5 * time.Minute) + time.Sleep(1 * time.Minute) build.DeepCopyInto(&attempt) if err := client.Create(ctx, &attempt); err == nil { logrus.Infof("Created build %q", name) From 543ae8ee568f49840fa908a0a875839aed8200a7 Mon Sep 17 00:00:00 2001 From: Kunal Memane Date: Fri, 27 Mar 2026 16:22:20 +0530 Subject: [PATCH 4/4] chore: disable one min. delay ' --- pkg/steps/source.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/steps/source.go b/pkg/steps/source.go index d86657c9c95..08304bb803b 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -555,7 +555,7 @@ func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.P if err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { var attempt buildapi.Build // builds are using older src image, adding wait to avoid race condition - time.Sleep(1 * time.Minute) + //time.Sleep(1 * time.Minute) build.DeepCopyInto(&attempt) if err := client.Create(ctx, &attempt); err == nil { logrus.Infof("Created build %q", name)