Skip to content

Commit 6ec310c

Browse files
committed
fix: handle DeadlineExceeded in IsRetryable, fix ctx shadow, speed up tests
Signed-off-by: Zhao Chen <winters.zc@antgroup.com>
1 parent 971d25e commit 6ec310c

5 files changed

Lines changed: 42 additions & 14 deletions

File tree

pkg/backend/fetch.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ func (b *backend) Fetch(ctx context.Context, target string, cfg *config.Fetch) e
130130
}
131131

132132
logrus.Debugf("fetch: processing layer %s", layer.Digest)
133-
if err := retrypolicy.Do(ctx, func(ctx context.Context) error {
134-
return pullAndExtractFromRemote(ctx, pb, internalpb.NormalizePrompt("Fetching blob"), client, cfg.Output, layer)
133+
if err := retrypolicy.Do(ctx, func(rctx context.Context) error {
134+
return pullAndExtractFromRemote(rctx, pb, internalpb.NormalizePrompt("Fetching blob"), client, cfg.Output, layer)
135135
}, retrypolicy.DoOpts{
136136
FileSize: layer.Size,
137137
FileName: annoFilepath,

pkg/backend/fetch_by_d7y.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,10 @@ func fetchLayerByDragonfly(ctx context.Context, pb *internalpb.ProgressBar, clie
177177
}
178178
}
179179

180-
err := retrypolicy.Do(ctx, func(ctx context.Context) error {
180+
err := retrypolicy.Do(ctx, func(rctx context.Context) error {
181181
logrus.Debugf("fetch: processing layer %s", desc.Digest)
182182
cfg.Hooks.BeforePullLayer(desc, manifest) // Call before hook
183-
err := downloadAndExtractFetchLayer(ctx, pb, client, ref, desc, authToken, cfg)
183+
err := downloadAndExtractFetchLayer(rctx, pb, client, ref, desc, authToken, cfg)
184184
cfg.Hooks.AfterPullLayer(desc, err) // Call after hook
185185
if err != nil {
186186
err = fmt.Errorf("pull: failed to download and extract layer %s: %w", desc.Digest, err)

pkg/backend/pull_by_d7y.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,10 @@ func processLayer(ctx context.Context, pb *internalpb.ProgressBar, client dfdaem
201201
}
202202
}
203203

204-
err := retrypolicy.Do(ctx, func(ctx context.Context) error {
204+
err := retrypolicy.Do(ctx, func(rctx context.Context) error {
205205
logrus.Debugf("pull: processing layer %s", desc.Digest)
206206
cfg.Hooks.BeforePullLayer(desc, manifest) // Call before hook
207-
err := downloadAndExtractLayer(ctx, pb, client, ref, desc, authToken, cfg)
207+
err := downloadAndExtractLayer(rctx, pb, client, ref, desc, authToken, cfg)
208208
cfg.Hooks.AfterPullLayer(desc, err) // Call after hook
209209
if err != nil {
210210
err = fmt.Errorf("pull: failed to download and extract layer %s: %w", desc.Digest, err)

pkg/retrypolicy/retrypolicy.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const (
5151
type Config struct {
5252
MaxRetryTime time.Duration // 0 = dynamic based on file size
5353
NoRetry bool // disable retry entirely
54+
InitialDelay time.Duration // 0 = use default (5s), for testing
55+
MaxJitter time.Duration // -1 = no jitter, 0 = use default (5s), for testing
5456
}
5557

5658
// DoOpts configures a single Do call.
@@ -93,16 +95,27 @@ func Do(ctx context.Context, fn func(ctx context.Context) error, opts DoOpts) er
9395

9496
sizeStr := humanizeBytes(opts.FileSize)
9597

98+
delay := initialDelay
99+
jitter := maxJitter
100+
if cfg.InitialDelay > 0 {
101+
delay = cfg.InitialDelay
102+
}
103+
if cfg.MaxJitter < 0 {
104+
jitter = 0
105+
} else if cfg.MaxJitter > 0 {
106+
jitter = cfg.MaxJitter
107+
}
108+
96109
return retry.Do(
97110
func() error {
98111
return fn(deadlineCtx)
99112
},
100113
retry.Attempts(0),
101114
retry.Context(deadlineCtx),
102115
retry.DelayType(retry.BackOffDelay),
103-
retry.Delay(initialDelay),
116+
retry.Delay(delay),
104117
retry.MaxDelay(maxBackoff),
105-
retry.MaxJitter(maxJitter),
118+
retry.MaxJitter(jitter),
106119
retry.LastErrorOnly(true),
107120
retry.WrapContextErrorWithLastError(true),
108121
retry.RetryIf(func(err error) bool {
@@ -180,8 +193,8 @@ func IsRetryable(err error) bool {
180193
return false
181194
}
182195

183-
// context.Canceled is never retryable — it means user/system cancellation.
184-
if errors.Is(err, context.Canceled) {
196+
// context.Canceled and context.DeadlineExceeded are never retryable.
197+
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
185198
return false
186199
}
187200

pkg/retrypolicy/retrypolicy_test.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ func TestIsRetryable(t *testing.T) {
132132
err: fmt.Errorf("operation failed: %w", context.Canceled),
133133
want: false,
134134
},
135+
{
136+
name: "context.DeadlineExceeded",
137+
err: context.DeadlineExceeded,
138+
want: false,
139+
},
140+
{
141+
name: "wrapped context.DeadlineExceeded",
142+
err: fmt.Errorf("operation timed out: %w", context.DeadlineExceeded),
143+
want: false,
144+
},
135145
{
136146
name: "HTTP 500 server error",
137147
err: fmt.Errorf("PUT /blobs/uploads: response status code 500: internal server error"),
@@ -344,6 +354,7 @@ func TestDo_RetryOnTransientError(t *testing.T) {
344354
}, DoOpts{
345355
FileSize: 100,
346356
FileName: "test.bin",
357+
Config: &Config{InitialDelay: 10 * time.Millisecond, MaxJitter: -1},
347358
})
348359

349360
if err != nil {
@@ -406,6 +417,7 @@ func TestDo_ParentContextCancel(t *testing.T) {
406417
}, DoOpts{
407418
FileSize: 100,
408419
FileName: "test.bin",
420+
Config: &Config{InitialDelay: 10 * time.Millisecond, MaxJitter: -1},
409421
})
410422

411423
if err == nil {
@@ -427,6 +439,7 @@ func TestDo_OnRetryCallback(t *testing.T) {
427439
}, DoOpts{
428440
FileSize: 100,
429441
FileName: "test.bin",
442+
Config: &Config{InitialDelay: 10 * time.Millisecond, MaxJitter: -1},
430443
OnRetry: func(attempt uint, reason string, backoff time.Duration) {
431444
retryAttempts = append(retryAttempts, attempt)
432445
retryReasons = append(retryReasons, reason)
@@ -458,7 +471,9 @@ func TestDo_ConfigMaxRetryTimeOverride(t *testing.T) {
458471
FileSize: 100,
459472
FileName: "test.bin",
460473
Config: &Config{
461-
MaxRetryTime: 8 * time.Second,
474+
MaxRetryTime: 1 * time.Second,
475+
InitialDelay: 50 * time.Millisecond,
476+
MaxJitter: -1,
462477
},
463478
})
464479

@@ -467,9 +482,9 @@ func TestDo_ConfigMaxRetryTimeOverride(t *testing.T) {
467482
if err == nil {
468483
t.Fatal("expected error after retry timeout, got nil")
469484
}
470-
// Should have run for approximately MaxRetryTime (8s), not the dynamic default (10min).
471-
if elapsed > 30*time.Second {
472-
t.Errorf("expected retry to terminate within ~8s, but elapsed %v", elapsed)
485+
// Should have run for approximately MaxRetryTime (1s), not the dynamic default (10min).
486+
if elapsed > 5*time.Second {
487+
t.Errorf("expected retry to terminate within ~1s, but elapsed %v", elapsed)
473488
}
474489
if atomic.LoadInt32(&callCount) < 2 {
475490
t.Errorf("expected at least 2 attempts, got %d", callCount)

0 commit comments

Comments
 (0)