Skip to content

Commit 7f19dde

Browse files
matheuscscpclaude
andcommitted
ssa: remove DeadlineExceeded workaround in WaitForSetWithContext
The workaround filtered out DeadlineExceeded errors from lastStatus updates to prevent kstatus from polluting timeout error messages with all monitored resources. This is no longer needed since cli-utils v0.37.1-flux.1 (fluxcd/cli-utils#18) properly recognizes rate limiter deadline errors as cancellation signals. Add TestWaitForSet_RateLimiterError to verify that rate limiter errors no longer cause all resources to appear in the timeout error message. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 90151c0 commit 7f19dde

3 files changed

Lines changed: 118 additions & 6 deletions

File tree

ssa/manager_wait.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,7 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
101101
if rs == nil {
102102
continue
103103
}
104-
// skip DeadlineExceeded errors because kstatus emits that error
105-
// for every resource it's monitoring even when only one of them
106-
// actually fails.
107-
if !errors.Is(rs.Error, context.DeadlineExceeded) {
108-
lastStatus[rs.Identifier] = rs
109-
}
104+
lastStatus[rs.Identifier] = rs
110105

111106
rss = append(rss, rs)
112107
counts[rs.Status]++

ssa/manager_wait_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,73 @@ func TestWaitForSet_terminalState(t *testing.T) {
344344
})
345345
}
346346

347+
func TestWaitForSet_RateLimiterError(t *testing.T) {
348+
g := NewWithT(t)
349+
350+
id := generateName("ratelimit")
351+
352+
// Create real resources on the cluster: 4 ConfigMaps + 1 PVC.
353+
objects, err := readManifest("testdata/test14.yaml", id)
354+
g.Expect(err).NotTo(HaveOccurred())
355+
356+
manager.SetOwnerLabels(objects, "infra", "default")
357+
cs, err := manager.ApplyAllStaged(context.Background(), objects, DefaultApplyOptions())
358+
g.Expect(err).NotTo(HaveOccurred())
359+
360+
// Use a custom status reader that returns the rate limiter error
361+
// after initial polls. This exercises the real cli-utils error handling
362+
// code path (errResourceToResourceStatus / errIdentifierToResourceStatus)
363+
// that PR https://github.com/fluxcd/cli-utils/pull/18 fixes.
364+
//
365+
// On old cli-utils (before PR #18): the rate limiter error is NOT recognized
366+
// as cancellation, so it becomes a ResourceStatus with Unknown status for ALL
367+
// resources that encounter it. The timeout error message would list every
368+
// single resource with "Unknown: rate: Wait(n=1) would exceed context deadline".
369+
//
370+
// On new cli-utils (with PR #18): the rate limiter error IS recognized as
371+
// cancellation via isRateLimiterContextDeadlineExceeded(), so polling stops
372+
// cleanly and the error is propagated directly without dumping all resources.
373+
var callCount int
374+
manager.poller = polling.NewStatusPoller(manager.client, restMapper, polling.Options{
375+
CustomStatusReaders: []engine.StatusReader{
376+
kstatusreaders.NewGenericStatusReader(restMapper,
377+
func(u *unstructured.Unstructured) (*status.Result, error) {
378+
callCount++
379+
// After a few poll cycles, return the rate limiter error
380+
// for every resource. This is the exact error string that
381+
// old kstatus failed to handle properly.
382+
if callCount > 10 {
383+
return nil, fmt.Errorf("rate: Wait(n=1) would exceed context deadline")
384+
}
385+
// Use the built-in status computation for initial polls.
386+
return status.Compute(u)
387+
},
388+
),
389+
},
390+
})
391+
defer func() {
392+
manager.poller = poller
393+
}()
394+
395+
err = manager.WaitForSet(cs.ToObjMetadataSet(), WaitOptions{
396+
Interval: 100 * time.Millisecond,
397+
Timeout: 2 * time.Second,
398+
FailFast: false,
399+
})
400+
401+
g.Expect(err).To(HaveOccurred())
402+
errMsg := err.Error()
403+
t.Logf("error message: %s", errMsg)
404+
405+
// With the new cli-utils (PR #18 fix), the rate limiter error should NOT
406+
// produce a huge error message with all resources listed as Unknown.
407+
for i := 1; i <= 4; i++ {
408+
cmName := fmt.Sprintf("%s-cm%d", id, i)
409+
g.Expect(errMsg).NotTo(ContainSubstring(cmName),
410+
"error should not contain ConfigMap %s — rate limiter errors should not pollute the error message with all resources", cmName)
411+
}
412+
}
413+
347414
func TestWaitForSet_ErrorOnReaderError(t *testing.T) {
348415
g := NewWithT(t)
349416

ssa/testdata/test14.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
---
2+
apiVersion: v1
3+
kind: Namespace
4+
metadata:
5+
name: "%[1]s"
6+
---
7+
apiVersion: v1
8+
data:
9+
foo: bar
10+
kind: ConfigMap
11+
metadata:
12+
name: "%[1]s-cm1"
13+
namespace: "%[1]s"
14+
---
15+
apiVersion: v1
16+
data:
17+
foo: bar
18+
kind: ConfigMap
19+
metadata:
20+
name: "%[1]s-cm2"
21+
namespace: "%[1]s"
22+
---
23+
apiVersion: v1
24+
data:
25+
foo: bar
26+
kind: ConfigMap
27+
metadata:
28+
name: "%[1]s-cm3"
29+
namespace: "%[1]s"
30+
---
31+
apiVersion: v1
32+
data:
33+
foo: bar
34+
kind: ConfigMap
35+
metadata:
36+
name: "%[1]s-cm4"
37+
namespace: "%[1]s"
38+
---
39+
apiVersion: v1
40+
kind: PersistentVolumeClaim
41+
metadata:
42+
name: "%[1]s"
43+
namespace: "%[1]s"
44+
spec:
45+
storageClassName: manual
46+
accessModes:
47+
- ReadWriteOnce
48+
resources:
49+
requests:
50+
storage: 3Gi

0 commit comments

Comments
 (0)