v0.53.0 — Systematic tool-failure recovery (3 fixes)

molty3000 · molty3000 · commit cf940f754dfd · 2026-05-25T15:46:56.000+02:00
1. Shell stderr visibility (shell.go)
   When a shell command fails with exit status 1 AND has stderr output,
   the stderr is now returned as the tool result instead of a bare
   'error: shell: exit status 1'. The LLM can see WHY it failed.

2. Agent-loop error guard (loop.go)
   New maxConsecutiveToolErrors tracking per tool. When a tool errors
   3+ times consecutively, the loop injects a corrective system message:
   - Directory errors -&gt; suggests tree/search_files
   - Shell failures -&gt; suggests read_file/simpler commands
   - Not found errors -&gt; suggests search_files/glob
   - Binary file errors -&gt; suggests base64/checksum
   - Generic errors -&gt; suggests alternative tools

3. Actionable directory-error messages (perf_tools.go, file_tool.go)
   read_file/batch_read/count_lines now say 'use tree or glob'
   instead of bare 'is a directory, not a file'.

Together these prevent the 23-iteration shell-fallback spiral
observed in production: directory error -&gt; abandon read_file -&gt;
15 iterations of broken shell commands.
diff --git a/cmd/odek/shell.go b/cmd/odek/shell.go
@@ -120,15 +120,21 @@ func (t *shellTool) Call(args string) (string, error) {
 
 	err := cmd.Run()
 	output := strings.TrimSpace(outBuf.String())
-	if errBuf.Len() > 0 {
+	stderrStr := strings.TrimSpace(errBuf.String())
+	if stderrStr != "" {
 		if output != "" {
 			output += "\n"
 		}
-		output += strings.TrimSpace(errBuf.String())
+		output += stderrStr
 	}
 	if err != nil && output == "" {
 		return "", fmt.Errorf("shell: %w", err)
 	}
+	if err != nil && stderrStr != "" {
+		// Include stderr even when stdout is empty — "exit status 1" alone
+		// gives the LLM no clue why the command failed.
+		return output, nil
+	}
 	if output == "" {
 		output = "(no output)"
 	}
diff --git a/internal/loop/loop.go b/internal/loop/loop.go
@@ -109,6 +109,13 @@ type Engine struct {
 	// avoiding resource exhaustion.
 	MaxToolParallel int
 
+	// maxConsecutiveToolErrors tracks how many consecutive error results
+	// each tool has produced. Reset on success, incremented on error.
+	// When a tool hits 3 consecutive errors, the loop injects a corrective
+	// system message suggesting alternative tools instead of letting the
+	// LLM keep retrying the same failing tool.
+	maxConsecutiveToolErrors map[string]int
+
 	// approver gates dangerous operations. When set and the LLM returns
 	// multiple tool calls in one iteration, a single batch approval prompt
 	// is shown before any tool executes, but ONLY for tools whose risk
@@ -135,12 +142,13 @@ type Engine struct {
 // Pass 0 for no limit enforcement.
 func New(client *llm.Client, registry *tool.Registry, maxIterations int, systemMessage string, renderer *render.Renderer, maxContext int) *Engine {
 	return &Engine{
-		client:    client,
-		registry:  registry,
-		renderer:  renderer,
-		maxIter:   maxIterations,
-		system:    systemMessage,
-		maxContext: maxContext,
+		client:                   client,
+		registry:                 registry,
+		renderer:                 renderer,
+		maxIter:                  maxIterations,
+		system:                   systemMessage,
+		maxContext:               maxContext,
+		maxConsecutiveToolErrors: make(map[string]int),
 	}
 }
 
@@ -404,6 +412,8 @@ func (e *Engine) RunWithMessages(ctx context.Context, messages []llm.Message) (s
 func (e *Engine) runLoop(ctx context.Context, messages []llm.Message) (string, []llm.Message, error) {
 	tools := e.buildToolDefs()
 	startTime := time.Now()
+	// Reset per-session tool error tracking
+	e.maxConsecutiveToolErrors = make(map[string]int)
 
 	for i := 0; i < e.maxIter; i++ {
 		select {
@@ -829,6 +839,65 @@ if e.approver != nil && len(result.ToolCalls) > 1 {
 			})
 		}
 
+		// ── Tool error recovery: track consecutive failures per tool ──
+		// When a tool errors 3+ times in a row, inject a corrective
+		// system message so the LLM picks a different approach instead
+		// of retrying the same failing tool.
+		const (
+			errThreshold  = 3  // consecutive errors before intervention
+			errPrefixRead = "\"error\":" // JSON error indicator
+		)
+		var corrections []string
+		for idx, tc := range result.ToolCalls {
+			raw := results[idx].output
+			toolName := tc.Function.Name
+			isErr := strings.Contains(raw, errPrefixRead) ||
+				strings.HasPrefix(raw, "error:")
+
+			if isErr {
+				e.maxConsecutiveToolErrors[toolName]++
+			} else {
+				e.maxConsecutiveToolErrors[toolName] = 0
+			}
+
+			if e.maxConsecutiveToolErrors[toolName] >= errThreshold {
+				// Build a corrective suggestion based on error type
+				var correction string
+				switch {
+				case strings.Contains(raw, "is a directory"):
+					correction = fmt.Sprintf(
+						"⚠️ Tool %q keeps failing on a directory. Use tree or search_files(target='files') to explore directories instead.",
+						toolName)
+				case toolName == "shell" && strings.Contains(raw, "exit status"):
+					correction = fmt.Sprintf(
+						"⚠️ Shell command failed repeatedly. Try a different approach: use read_file to inspect files, or break the command into simpler steps.")
+				case strings.Contains(raw, "not found") || strings.Contains(raw, "no such file"):
+					correction = fmt.Sprintf(
+						"⚠️ Tool %q cannot find the path. Use search_files or glob to locate the correct path first.",
+						toolName)
+				case strings.Contains(raw, "is a binary file") || strings.Contains(raw, "binary"):
+					correction = fmt.Sprintf(
+						"⚠️ Tool %q cannot read binary files. Use base64 to encode binary content, or checksum to hash it.",
+						toolName)
+				default:
+					correction = fmt.Sprintf(
+						"⚠️ Tool %q keeps failing. Try a different tool: use shell for shell commands, search_files for finding files, or read_file for reading files.",
+						toolName)
+				}
+				corrections = append(corrections, correction)
+				// Reset counter after injecting suggestion
+				e.maxConsecutiveToolErrors[toolName] = 0
+			}
+		}
+		// Inject all corrections as a single system message
+		if len(corrections) > 0 {
+			msg := strings.Join(corrections, "\n")
+			messages = append(messages, llm.Message{
+				Role:    "system",
+				Content: msg,
+			})
+		}
+
 		// Fire iteration callback with tool call results
 		if e.iterationCallback != nil {
 			e.iterationCallback(IterationInfo{

Original file line number	Diff line number	Diff line change
`@@ -120,15 +120,21 @@ func (t *shellTool) Call(args string) (string, error) {`
`120`	`120`
`121`	`121`	`err := cmd.Run()`
`122`	`122`	`output := strings.TrimSpace(outBuf.String())`
`123`		`- if errBuf.Len() > 0 {`
	`123`	`+ stderrStr := strings.TrimSpace(errBuf.String())`
	`124`	`+ if stderrStr != "" {`
`124`	`125`	`if output != "" {`
`125`	`126`	`output += "\n"`
`126`	`127`	`}`
`127`		`- output += strings.TrimSpace(errBuf.String())`
	`128`	`+ output += stderrStr`
`128`	`129`	`}`
`129`	`130`	`if err != nil && output == "" {`
`130`	`131`	`return "", fmt.Errorf("shell: %w", err)`
`131`	`132`	`}`
	`133`	`+ if err != nil && stderrStr != "" {`
	`134`	`+ // Include stderr even when stdout is empty — "exit status 1" alone`
	`135`	`+ // gives the LLM no clue why the command failed.`
	`136`	`+ return output, nil`
	`137`	`+ }`
`132`	`138`	`if output == "" {`
`133`	`139`	`output = "(no output)"`
`134`	`140`	`}`