plugin-validator/pkg/llmclient/agentic_client.go at 29842afb84f4fdf3a4c689a40fc485193e83bb9d · grafana/plugin-validator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
package llmclient

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"time"

	"github.com/grafana/plugin-validator/pkg/llmprovider"
	"github.com/grafana/plugin-validator/pkg/llmprovider/anthropicprovider"
	"github.com/grafana/plugin-validator/pkg/llmprovider/geminiprovider"
	"github.com/grafana/plugin-validator/pkg/llmprovider/openaiprovider"
)

// errRecoverable marks per-question failures that should not abort the entire
// run. The question is marked as errored and processing continues.
var errRecoverable = errors.New("recoverable")

const (
	maxToolCallsFirstQuestion = 60
	maxToolCallsFollowUp      = 30
	maxLLMRetries             = 3
	maxConsecutiveNoTools     = 5
	retryDelay                = 2 * time.Second
	llmCallTimeout            = 90 * time.Second

	budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.`

	useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.`

	submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.`
)

// AgenticClient is an interface for agentic LLM interactions
type AgenticClient interface {
	CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error)
}

// agenticClientImpl implements AgenticClient
type agenticClientImpl struct {
	apiKey       string
	model        string
	provider     string
	tools        []llmprovider.Tool
	systemPrompt string
	executor     *toolExecutor
}

// NewAgenticClient creates a new AgenticClient with the given options
func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) {
	if opts == nil {
		return nil, fmt.Errorf("options are required")
	}
	if opts.APIKey == "" {
		return nil, fmt.Errorf("API key is required")
	}
	if opts.Model == "" {
		return nil, fmt.Errorf("model is required")
	}
	if opts.Provider == "" {
		return nil, fmt.Errorf("provider is required")
	}

	tools, err := resolveTools(opts)
	if err != nil {
		return nil, fmt.Errorf("resolving tools: %w", err)
	}

	return &agenticClientImpl{
		apiKey:       opts.APIKey,
		model:        opts.Model,
		provider:     opts.Provider,
		tools:        tools,
		systemPrompt: buildSystemPrompt(opts.SystemPrompt, tools),
	}, nil
}

func (c *agenticClientImpl) getFreshContext() []llmprovider.Message {
	return []llmprovider.Message{
		llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
	}
}

// CallLLM executes an agentic loop with tools to answer questions about code.
// Each question is processed sequentially, with follow-up questions benefiting
// from the context accumulated by earlier questions.
func (c *agenticClientImpl) CallLLM(
	ctx context.Context,
	questions []string,
	repositoryPath string,
) ([]AnswerSchema, error) {
	if len(questions) == 0 {
		return nil, fmt.Errorf("at least one question is required")
	}

	// Initialize LLM based on provider using the client's configured settings
	opts := &AgenticCallOptions{
		APIKey:   c.apiKey,
		Model:    c.model,
		Provider: c.provider,
	}
	provider, err := initProvider(ctx, opts)
	if err != nil {
		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
	}

	// Initialize executor for this repository
	c.executor = newToolExecutor(repositoryPath)

	// Build initial messages with system prompt only (no user message yet)
	messages := c.getFreshContext()

	// Print debug log file path before starting the loop
	printDebugLogPath()
	debugLog("\n\n\n")
	debugLog("################################################################")
	debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model)
	debugLog("# repo=%s", repositoryPath)
	debugLog("# questions=%d", len(questions))
	debugLog("################################################################")

	// Collect answers
	var answers []AnswerSchema

	// Process each question sequentially
	for questionIndex, question := range questions {
		debugLog(
			"\n========== Processing question %d/%d ==========",
			questionIndex+1,
			len(questions),
		)
		debugLog("Question: %s", truncateString(question, 200))

		originalQuestion := question

		// Determine budget for this question
		toolsBudget := maxToolCallsFirstQuestion
		if questionIndex > 0 {
			toolsBudget = maxToolCallsFollowUp
		}
		debugLog("Budget: %d tool calls", toolsBudget)

		// Add the question as a human message
		messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question))

		// Run the question loop
		updatedMessages, answer, usage, err := c.runQuestionLoop(
			ctx,
			provider,
			messages,
			toolsBudget,
			questionIndex,
		)
		messages = updatedMessages

		if err != nil {
			if errors.Is(err, errRecoverable) {
				// Recoverable: mark question as errored, reset context, continue
				debugLog("AgenticClient: question %d recoverable error: %v", questionIndex+1, err)
				answers = append(answers, AnswerSchema{
					Question: originalQuestion,
					Error:    err.Error(),
				})
				messages = []llmprovider.Message{
					llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
				}
				continue
			}
			// Hard error (provider failure) - abort
			debugLog("AgenticClient: question %d failed: %v", questionIndex+1, err)
			if len(answers) > 0 {
				debugLog("AgenticClient: returning %d partial answers", len(answers))
				return answers, nil
			}
			return nil, err
		}

		if answer != nil {
			// Set the question field
			answer.Question = originalQuestion
			answers = append(answers, *answer)
			debugLog("AgenticClient: collected answer %d/%d", len(answers), len(questions))
		} else {
			// Budget exhausted without answer - record error and reset context for next question
			debugLog("AgenticClient: question %d exhausted budget without answer, marking as errored", questionIndex+1)
			answers = append(answers, AnswerSchema{
				Question: originalQuestion,
				Error:    "budget exhausted without answer",
			})
			// Reset conversation to system prompt only so the next question starts fresh
			messages = []llmprovider.Message{
				llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
			}
		}

		debugLog(
			"AgenticClient: accumulated context tokens after question %d: %d (cache_create=%d, cache_read=%d)",
			questionIndex+1,
			usage.TotalTokens,
			usage.CacheCreationInputTokens,
			usage.CacheReadInputTokens,
		)
		if usage.TotalTokens > 100000 {
			debugLog(
				"AgenticClient: context reached %d tokens (>100k). Flushing context to start fresh.",
				usage.TotalTokens,
			)
			messages = c.getFreshContext()
		}
	}

	debugLog("AgenticClient: successfully answered all %d questions", len(questions))
	return answers, nil
}

// runQuestionLoop runs the tool-calling loop for a single question.
// Returns updated messages, the answer (or nil if budget exhausted), and error.
func (c *agenticClientImpl) runQuestionLoop(
	ctx context.Context,
	provider llmprovider.Provider,
	messages []llmprovider.Message,
	toolsBudget int,
	questionIndex int,
) ([]llmprovider.Message, *AnswerSchema, llmprovider.Usage, error) {
	toolCallsRemaining := toolsBudget
	consecutiveNoTools := 0
	iteration := 0
	var lastUsage llmprovider.Usage

	budgetNudged := false

	for toolCallsRemaining > 0 {
		iteration++
		debugLog("========== Question %d iteration %d ==========", questionIndex+1, iteration)
		debugLog("AgenticClient: %d tool calls remaining", toolCallsRemaining)

		if !budgetNudged && toolCallsRemaining <= 5 {
			budgetNudged = true
			debugLog("AgenticClient: nudging model about low budget")
			messages = append(messages, llmprovider.TextMessage(
				llmprovider.RoleHuman,
				fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining),
			))
		}

		// Call LLM with retry logic
		debugLog("AgenticClient: calling LLM...")
		resp, err := c.callLLMWithRetry(ctx, provider, messages)
		if err != nil {
			debugLog("AgenticClient: LLM call failed: %v", err)
			return messages, nil, llmprovider.Usage{}, fmt.Errorf(
				"LLM call failed after %d retries: %w",
				maxLLMRetries,
				err,
			)
		}

		lastUsage = resp.Usage

		if len(resp.Choices) == 0 {
			debugLog("AgenticClient: no choices in response")
			return messages, nil, llmprovider.Usage{}, fmt.Errorf("no response from LLM")
		}

		choice := resp.Choices[0]
		debugLog("AgenticClient: choice - Content=%q, ToolCalls=%d, Thinking=%d",
			truncateString(choice.Content, 200), len(choice.ToolCalls), len(choice.Thinking))
		for j, t := range choice.Thinking {
			debugLog("AgenticClient:   thinking[%d]: text=%q sig=%v",
				j, truncateString(t.Text, 150), t.Signature != "")
		}

		// If no tool calls, check if we should nudge the agent
		if len(choice.ToolCalls) == 0 {
			debugLog("AgenticClient: no tool calls in response")

			consecutiveNoTools++
			debugLog(
				"AgenticClient: consecutive no-tool responses: %d/%d",
				consecutiveNoTools,
				maxConsecutiveNoTools,
			)
			if consecutiveNoTools >= maxConsecutiveNoTools {
				return messages, nil, resp.Usage, fmt.Errorf(
					"agent failed to use tools after %d consecutive attempts: %w",
					maxConsecutiveNoTools, errRecoverable,
				)
			}

			// Add the AI response and remind to use tools
			if choice.Content != "" {
				messages = append(
					messages,
					llmprovider.TextMessage(llmprovider.RoleAI, choice.Content),
				)
			}
			debugLog("AgenticClient: reminding agent to use tools")
			messages = append(messages, llmprovider.TextMessage(
				llmprovider.RoleHuman,
				useToolsReminderPrompt,
			))
			toolCallsRemaining--
			continue
		}

		// Reset consecutive no-tool counter when tools are used
		consecutiveNoTools = 0

		// Build the assistant message with all parts from the response:
		// thinking blocks, text content, and tool calls.
		var aiParts []llmprovider.Part
		for _, t := range choice.Thinking {
			aiParts = append(aiParts, t)
		}
		if choice.Content != "" {
			aiParts = append(aiParts, llmprovider.TextPart{Text: choice.Content})
		}
		for _, tc := range choice.ToolCalls {
			aiParts = append(aiParts, tc)
		}
		messages = append(messages, llmprovider.Message{
			Role:  llmprovider.RoleAI,
			Parts: aiParts,
		})

		// Validate submit_answer is called alone
		hasSubmitAnswer := false
		for _, toolCall := range choice.ToolCalls {
			if toolCall.Name == "submit_answer" {
				hasSubmitAnswer = true
				break
			}
		}
		if hasSubmitAnswer && len(choice.ToolCalls) > 1 {
			debugLog("AgenticClient: submit_answer called with other tools - rejecting all")
			var resultParts []llmprovider.Part
			for _, toolCall := range choice.ToolCalls {
				toolCallsRemaining--
				resultParts = append(resultParts, llmprovider.ToolResultPart{
					ToolCallID: toolCall.ID,
					Name:       toolCall.Name,
					Content:    submitAnswerAloneError,
				})
			}
			messages = append(messages, llmprovider.Message{
				Role:  llmprovider.RoleTool,
				Parts: resultParts,
			})
			continue
		}

		// Execute tool calls and collect results into a single tool message.
		var resultParts []llmprovider.Part
		var answer *AnswerSchema
		for i, toolCall := range choice.ToolCalls {
			toolCallsRemaining--
			response, ans := c.processToolCall(toolCall, i, len(choice.ToolCalls))
			resultParts = append(resultParts, response.Parts...)
			if ans != nil {
				answer = ans
			}
		}
		messages = append(messages, llmprovider.Message{
			Role:  llmprovider.RoleTool,
			Parts: resultParts,
		})
		if answer != nil {
			debugLog("AgenticClient: received answer for question %d", questionIndex+1)
			return messages, answer, resp.Usage, nil
		}
	}

	// Budget exhausted without answer
	debugLog("AgenticClient: question %d exhausted budget", questionIndex+1)
	return messages, nil, lastUsage, nil
}

// processToolCall processes a single tool call and returns the response message and optional answer
func (c *agenticClientImpl) processToolCall(
	toolCall llmprovider.ToolCallPart,
	index, total int,
) (llmprovider.Message, *AnswerSchema) {
	debugLog(
		"AgenticClient: [%d/%d] executing tool: %s",
		index+1,
		total,
		toolCall.Name,
	)
	debugLog("AgenticClient: tool args: %s", truncateString(toolCall.Arguments, 500))

	// Check for submit_answer
	if toolCall.Name == "submit_answer" {
		var answer AnswerSchema
		if err := json.Unmarshal([]byte(toolCall.Arguments), &answer); err != nil {
			debugLog("AgenticClient: failed to parse submit_answer: %v", err)
			// Report parse error back to agent so it can retry
			return llmprovider.Message{
				Role: llmprovider.RoleTool,
				Parts: []llmprovider.Part{
					llmprovider.ToolResultPart{
						ToolCallID: toolCall.ID,
						Name:       toolCall.Name,
						Content: fmt.Sprintf(
							"Error parsing answer: %v. Please try again with valid JSON.",
							err,
						),
					},
				},
			}, nil
		}
		debugLog("AgenticClient: received answer: short_answer=%v, answer=%s",
			answer.ShortAnswer, truncateString(answer.Answer, 100))

		// Return success response and the answer
		return llmprovider.Message{
			Role: llmprovider.RoleTool,
			Parts: []llmprovider.Part{
				llmprovider.ToolResultPart{
					ToolCallID: toolCall.ID,
					Name:       toolCall.Name,
					Content:    "Answer recorded successfully.",
				},
			},
		}, &answer
	}

	// Execute other tools
	result, err := c.executor.execute(toolCall.Name, toolCall.Arguments)
	if err != nil {
		result = fmt.Sprintf("Error: %v", err)
	}
	debugLog("AgenticClient: tool result: %s", truncateString(result, 300))

	return llmprovider.Message{
		Role: llmprovider.RoleTool,
		Parts: []llmprovider.Part{
			llmprovider.ToolResultPart{
				ToolCallID: toolCall.ID,
				Name:       toolCall.Name,
				Content:    result,
			},
		},
	}, nil
}

// callLLMWithRetry calls the LLM with retry logic for transient errors
func (c *agenticClientImpl) callLLMWithRetry(
	ctx context.Context,
	provider llmprovider.Provider,
	messages []llmprovider.Message,
) (*llmprovider.Response, error) {
	var lastErr error
	for attempt := 1; attempt <= maxLLMRetries; attempt++ {
		callCtx, cancel := context.WithTimeout(ctx, llmCallTimeout)
		resp, err := provider.GenerateContent(callCtx, messages, llmprovider.WithTools(c.tools))
		cancel()
		if err == nil {
			return resp, nil
		}
		lastErr = err
		debugLog("AgenticClient: LLM call failed (attempt %d/%d): %v", attempt, maxLLMRetries, err)

		if attempt < maxLLMRetries {
			debugLog("AgenticClient: retrying in %v...", retryDelay)
			time.Sleep(retryDelay)
		}
	}
	return nil, lastErr
}

// truncateString truncates a string to maxLen characters, adding "..." if truncated
func truncateString(s string, maxLen int) string {
	if len(s) <= maxLen {
		return s
	}
	return s[:maxLen] + "..."
}

// initProvider creates the appropriate native provider for the given config.
func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) {
	switch opts.Provider {
	case "google":
		return geminiprovider.New(ctx, opts.APIKey, opts.Model)
	case "anthropic":
		return anthropicprovider.New(opts.APIKey, opts.Model)
	case "openai":
		return openaiprovider.New(opts.APIKey, opts.Model)
	default:
		return nil, fmt.Errorf(
			"unsupported provider: %s (supported: google, anthropic, openai)",
			opts.Provider,
		)
	}
}