Skip to content

Commit cf940f7

Browse files
committed
v0.53.0 — Systematic tool-failure recovery (3 fixes)
1. Shell stderr visibility (shell.go) When a shell command fails with exit status 1 AND has stderr output, the stderr is now returned as the tool result instead of a bare 'error: shell: exit status 1'. The LLM can see WHY it failed. 2. Agent-loop error guard (loop.go) New maxConsecutiveToolErrors tracking per tool. When a tool errors 3+ times consecutively, the loop injects a corrective system message: - Directory errors -> suggests tree/search_files - Shell failures -> suggests read_file/simpler commands - Not found errors -> suggests search_files/glob - Binary file errors -> suggests base64/checksum - Generic errors -> suggests alternative tools 3. Actionable directory-error messages (perf_tools.go, file_tool.go) read_file/batch_read/count_lines now say 'use tree or glob' instead of bare 'is a directory, not a file'. Together these prevent the 23-iteration shell-fallback spiral observed in production: directory error -> abandon read_file -> 15 iterations of broken shell commands.
1 parent 7746a80 commit cf940f7

2 files changed

Lines changed: 83 additions & 8 deletions

File tree

cmd/odek/shell.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,21 @@ func (t *shellTool) Call(args string) (string, error) {
120120

121121
err := cmd.Run()
122122
output := strings.TrimSpace(outBuf.String())
123-
if errBuf.Len() > 0 {
123+
stderrStr := strings.TrimSpace(errBuf.String())
124+
if stderrStr != "" {
124125
if output != "" {
125126
output += "\n"
126127
}
127-
output += strings.TrimSpace(errBuf.String())
128+
output += stderrStr
128129
}
129130
if err != nil && output == "" {
130131
return "", fmt.Errorf("shell: %w", err)
131132
}
133+
if err != nil && stderrStr != "" {
134+
// Include stderr even when stdout is empty — "exit status 1" alone
135+
// gives the LLM no clue why the command failed.
136+
return output, nil
137+
}
132138
if output == "" {
133139
output = "(no output)"
134140
}

internal/loop/loop.go

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ type Engine struct {
109109
// avoiding resource exhaustion.
110110
MaxToolParallel int
111111

112+
// maxConsecutiveToolErrors tracks how many consecutive error results
113+
// each tool has produced. Reset on success, incremented on error.
114+
// When a tool hits 3 consecutive errors, the loop injects a corrective
115+
// system message suggesting alternative tools instead of letting the
116+
// LLM keep retrying the same failing tool.
117+
maxConsecutiveToolErrors map[string]int
118+
112119
// approver gates dangerous operations. When set and the LLM returns
113120
// multiple tool calls in one iteration, a single batch approval prompt
114121
// is shown before any tool executes, but ONLY for tools whose risk
@@ -135,12 +142,13 @@ type Engine struct {
135142
// Pass 0 for no limit enforcement.
136143
func New(client *llm.Client, registry *tool.Registry, maxIterations int, systemMessage string, renderer *render.Renderer, maxContext int) *Engine {
137144
return &Engine{
138-
client: client,
139-
registry: registry,
140-
renderer: renderer,
141-
maxIter: maxIterations,
142-
system: systemMessage,
143-
maxContext: maxContext,
145+
client: client,
146+
registry: registry,
147+
renderer: renderer,
148+
maxIter: maxIterations,
149+
system: systemMessage,
150+
maxContext: maxContext,
151+
maxConsecutiveToolErrors: make(map[string]int),
144152
}
145153
}
146154

@@ -404,6 +412,8 @@ func (e *Engine) RunWithMessages(ctx context.Context, messages []llm.Message) (s
404412
func (e *Engine) runLoop(ctx context.Context, messages []llm.Message) (string, []llm.Message, error) {
405413
tools := e.buildToolDefs()
406414
startTime := time.Now()
415+
// Reset per-session tool error tracking
416+
e.maxConsecutiveToolErrors = make(map[string]int)
407417

408418
for i := 0; i < e.maxIter; i++ {
409419
select {
@@ -829,6 +839,65 @@ if e.approver != nil && len(result.ToolCalls) > 1 {
829839
})
830840
}
831841

842+
// ── Tool error recovery: track consecutive failures per tool ──
843+
// When a tool errors 3+ times in a row, inject a corrective
844+
// system message so the LLM picks a different approach instead
845+
// of retrying the same failing tool.
846+
const (
847+
errThreshold = 3 // consecutive errors before intervention
848+
errPrefixRead = "\"error\":" // JSON error indicator
849+
)
850+
var corrections []string
851+
for idx, tc := range result.ToolCalls {
852+
raw := results[idx].output
853+
toolName := tc.Function.Name
854+
isErr := strings.Contains(raw, errPrefixRead) ||
855+
strings.HasPrefix(raw, "error:")
856+
857+
if isErr {
858+
e.maxConsecutiveToolErrors[toolName]++
859+
} else {
860+
e.maxConsecutiveToolErrors[toolName] = 0
861+
}
862+
863+
if e.maxConsecutiveToolErrors[toolName] >= errThreshold {
864+
// Build a corrective suggestion based on error type
865+
var correction string
866+
switch {
867+
case strings.Contains(raw, "is a directory"):
868+
correction = fmt.Sprintf(
869+
"⚠️ Tool %q keeps failing on a directory. Use tree or search_files(target='files') to explore directories instead.",
870+
toolName)
871+
case toolName == "shell" && strings.Contains(raw, "exit status"):
872+
correction = fmt.Sprintf(
873+
"⚠️ Shell command failed repeatedly. Try a different approach: use read_file to inspect files, or break the command into simpler steps.")
874+
case strings.Contains(raw, "not found") || strings.Contains(raw, "no such file"):
875+
correction = fmt.Sprintf(
876+
"⚠️ Tool %q cannot find the path. Use search_files or glob to locate the correct path first.",
877+
toolName)
878+
case strings.Contains(raw, "is a binary file") || strings.Contains(raw, "binary"):
879+
correction = fmt.Sprintf(
880+
"⚠️ Tool %q cannot read binary files. Use base64 to encode binary content, or checksum to hash it.",
881+
toolName)
882+
default:
883+
correction = fmt.Sprintf(
884+
"⚠️ Tool %q keeps failing. Try a different tool: use shell for shell commands, search_files for finding files, or read_file for reading files.",
885+
toolName)
886+
}
887+
corrections = append(corrections, correction)
888+
// Reset counter after injecting suggestion
889+
e.maxConsecutiveToolErrors[toolName] = 0
890+
}
891+
}
892+
// Inject all corrections as a single system message
893+
if len(corrections) > 0 {
894+
msg := strings.Join(corrections, "\n")
895+
messages = append(messages, llm.Message{
896+
Role: "system",
897+
Content: msg,
898+
})
899+
}
900+
832901
// Fire iteration callback with tool call results
833902
if e.iterationCallback != nil {
834903
e.iterationCallback(IterationInfo{

0 commit comments

Comments
 (0)