@@ -69,15 +69,6 @@ func logFallbackAttempt(agentName string, model modelWithFallback, attempt, maxR
6969 }
7070}
7171
72- // logRetryBackoff logs when we're backing off before a retry
73- func logRetryBackoff (agentName , modelID string , attempt int , backoff time.Duration ) {
74- slog .Debug ("Backing off before retry" ,
75- "agent" , agentName ,
76- "model" , modelID ,
77- "attempt" , attempt + 1 ,
78- "backoff" , backoff )
79- }
80-
8172// getCooldownState returns the current cooldown state for an agent (thread-safe).
8273// Returns nil if no cooldown is active or if cooldown has expired.
8374// Expired entries are evicted to prevent stale state accumulation.
@@ -228,15 +219,6 @@ func (r *LocalRuntime) tryModelWithFallback(
228219 return streamResult {}, nil , ctx .Err ()
229220 }
230221
231- // Apply backoff before retry (not on first attempt of each model)
232- if attempt > 0 {
233- backoff := modelerrors .CalculateBackoff (attempt - 1 )
234- logRetryBackoff (a .Name (), modelEntry .provider .ID (), attempt , backoff )
235- if ! modelerrors .SleepWithContext (ctx , backoff ) {
236- return streamResult {}, nil , ctx .Err ()
237- }
238- }
239-
240222 // Emit fallback event when transitioning to a new model (but not when starting in cooldown)
241223 if chainIdx > startIndex && attempt == 0 {
242224 logFallbackAttempt (a .Name (), modelEntry , attempt , fallbackRetries , lastErr )
@@ -272,7 +254,7 @@ func (r *LocalRuntime) tryModelWithFallback(
272254 return streamResult {}, nil , err
273255 }
274256
275- decision := r . handleModelError (ctx , err , a , modelEntry , attempt , hasFallbacks , & primaryFailedWithNonRetryable )
257+ decision := handleModelError (ctx , err , a , modelEntry , attempt , hasFallbacks , & primaryFailedWithNonRetryable )
276258 if decision == retryDecisionReturn {
277259 return streamResult {}, nil , ctx .Err ()
278260 } else if decision == retryDecisionBreak {
@@ -292,7 +274,7 @@ func (r *LocalRuntime) tryModelWithFallback(
292274 return streamResult {}, nil , err
293275 }
294276
295- decision := r . handleModelError (ctx , err , a , modelEntry , attempt , hasFallbacks , & primaryFailedWithNonRetryable )
277+ decision := handleModelError (ctx , err , a , modelEntry , attempt , hasFallbacks , & primaryFailedWithNonRetryable )
296278 if decision == retryDecisionReturn {
297279 return streamResult {}, nil , ctx .Err ()
298280 } else if decision == retryDecisionBreak {
@@ -335,10 +317,12 @@ func (r *LocalRuntime) tryModelWithFallback(
335317type retryDecision int
336318
337319const (
338- // retryDecisionContinue means retry the same model (backoff already applied).
339- retryDecisionContinue retryDecision = iota
340320 // retryDecisionBreak means skip to the next model in the fallback chain.
341- retryDecisionBreak
321+ // This is the zero value — safe default: skip to next model rather than
322+ // accidentally retrying or returning early.
323+ retryDecisionBreak retryDecision = iota
324+ // retryDecisionContinue means retry the same model (sleep already applied).
325+ retryDecisionContinue
342326 // retryDecisionReturn means context was cancelled; return immediately.
343327 retryDecisionReturn
344328)
@@ -348,9 +332,12 @@ const (
348332// - retryDecisionBreak — non-retryable error or 429 with fallbacks; skip to next model
349333// - retryDecisionContinue — retryable error or 429 without fallbacks; retry same model
350334//
335+ // All sleeping (both 5xx backoff and 429 Retry-After) is performed here so the
336+ // outer loop never needs its own sleep path.
337+ //
351338// Side-effect: sets *primaryFailedWithNonRetryable when the primary model fails with a
352339// non-retryable (or rate-limited-with-fallbacks) error.
353- func ( r * LocalRuntime ) handleModelError (
340+ func handleModelError (
354341 ctx context.Context ,
355342 err error ,
356343 a * agent.Agent ,
@@ -363,11 +350,12 @@ func (r *LocalRuntime) handleModelError(
363350
364351 if rateLimited {
365352 if hasFallbacks {
366- // Fallbacks available → skip to next model immediately (existing behaviour) .
367- slog .Warn ("Rate limited with fallbacks available , skipping to next model" ,
353+ // Fallbacks available → skip to next model immediately.
354+ slog .Warn ("Rate limited, skipping model" ,
368355 "agent" , a .Name (),
369356 "model" , modelEntry .provider .ID (),
370- "retry_after" , retryAfter )
357+ "retry_after" , retryAfter ,
358+ "error" , err )
371359 if ! modelEntry .isFallback {
372360 * primaryFailedWithNonRetryable = true
373361 }
@@ -391,7 +379,8 @@ func (r *LocalRuntime) handleModelError(
391379 "model" , modelEntry .provider .ID (),
392380 "attempt" , attempt + 1 ,
393381 "wait" , waitDuration ,
394- "retry_after_from_header" , retryAfter > 0 )
382+ "retry_after_from_header" , retryAfter > 0 ,
383+ "error" , err )
395384 if ! modelerrors .SleepWithContext (ctx , waitDuration ) {
396385 return retryDecisionReturn
397386 }
@@ -409,10 +398,16 @@ func (r *LocalRuntime) handleModelError(
409398 return retryDecisionBreak
410399 }
411400
401+ // Retryable (5xx, timeouts): sleep with backoff then retry same model.
402+ waitDuration := modelerrors .CalculateBackoff (attempt )
412403 slog .Warn ("Retryable error from model" ,
413404 "agent" , a .Name (),
414405 "model" , modelEntry .provider .ID (),
415406 "attempt" , attempt + 1 ,
407+ "wait" , waitDuration ,
416408 "error" , err )
409+ if ! modelerrors .SleepWithContext (ctx , waitDuration ) {
410+ return retryDecisionReturn
411+ }
417412 return retryDecisionContinue
418413}
0 commit comments