From 9f071bcd49b9af66a637ff3e1bccf1e0eb22c368 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 01:18:36 -0700 Subject: [PATCH 01/33] [feat] support azd optimize and eval --- .../azd_observability_bugbash.md | 0 .../azure.ai.agents/internal/cmd/eval.go | 567 +++++++++++++++++ .../internal/cmd/eval_helpers.go | 216 +++++++ .../azure.ai.agents/internal/cmd/eval_init.go | 280 +++++++++ .../internal/cmd/eval_init_jobs.go | 363 +++++++++++ .../internal/cmd/eval_init_prompts.go | 318 ++++++++++ .../internal/cmd/eval_init_test.go | 577 ++++++++++++++++++ .../azure.ai.agents/internal/cmd/eval_list.go | 150 +++++ .../internal/cmd/eval_list_test.go | 37 ++ .../azure.ai.agents/internal/cmd/eval_run.go | 210 +++++++ .../internal/cmd/eval_run_test.go | 149 +++++ .../azure.ai.agents/internal/cmd/eval_show.go | 182 ++++++ .../azure.ai.agents/internal/cmd/eval_test.go | 531 ++++++++++++++++ .../azure.ai.agents/internal/cmd/optimize.go | 393 ++++++++++++ .../internal/cmd/optimize_apply.go | 157 +++++ .../internal/cmd/optimize_cancel.go | 65 ++ .../internal/cmd/optimize_cancel_test.go | 34 ++ .../internal/cmd/optimize_config.go | 220 +++++++ .../internal/cmd/optimize_config_test.go | 295 +++++++++ .../internal/cmd/optimize_deploy.go | 418 +++++++++++++ .../internal/cmd/optimize_deploy_test.go | 156 +++++ .../internal/cmd/optimize_helpers.go | 115 ++++ .../internal/cmd/optimize_helpers_test.go | 58 ++ .../internal/cmd/optimize_list.go | 126 ++++ .../internal/cmd/optimize_list_test.go | 40 ++ .../internal/cmd/optimize_status.go | 130 ++++ .../internal/cmd/optimize_status_test.go | 38 ++ .../internal/cmd/optimize_test.go | 91 +++ .../azure.ai.agents/internal/cmd/root.go | 2 + .../internal/pkg/agents/agent_yaml/yaml.go | 2 + .../internal/pkg/agents/dataset_api/models.go | 54 ++ .../pkg/agents/dataset_api/models_test.go | 92 +++ .../pkg/agents/dataset_api/operations.go | 220 +++++++ .../pkg/agents/dataset_api/operations_test.go | 207 +++++++ .../internal/pkg/agents/eval_api/artifacts.go | 202 ++++++ .../pkg/agents/eval_api/eval_config.go | 143 +++++ .../pkg/agents/eval_api/eval_config_test.go | 269 ++++++++ .../pkg/agents/eval_api/generation.go | 146 +++++ .../pkg/agents/eval_api/generation_test.go | 137 +++++ .../internal/pkg/agents/eval_api/models.go | 368 +++++++++++ .../pkg/agents/eval_api/operations.go | 259 ++++++++ .../pkg/agents/eval_api/operations_test.go | 443 ++++++++++++++ .../internal/pkg/agents/eval_api/poller.go | 186 ++++++ .../pkg/agents/eval_api/poller_test.go | 228 +++++++ .../internal/pkg/agents/opteval/yaml.go | 107 ++++ .../internal/pkg/agents/opteval/yaml_test.go | 165 +++++ .../pkg/agents/optimize_api/client.go | 362 +++++++++++ .../pkg/agents/optimize_api/client_test.go | 267 ++++++++ .../pkg/agents/optimize_api/models.go | 216 +++++++ .../pkg/agents/optimize_api/models_test.go | 237 +++++++ .../pkg/agents/optimize_api/poller.go | 48 ++ .../pkg/agents/optimize_api/poller_test.go | 161 +++++ 52 files changed, 10437 insertions(+) create mode 100644 cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go new file mode 100644 index 00000000000..a01b85117c4 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -0,0 +1,567 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log" + "os" + "path/filepath" + "strings" + "time" + + "azureaiagent/internal/exterrors" + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/dataset_api" + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/azure/azure-dev/cli/azd/pkg/output" + "github.com/azure/azure-dev/cli/azd/pkg/ux" + "github.com/fatih/color" + "github.com/spf13/cobra" + "go.yaml.in/yaml/v3" +) + +const ( + defaultEvalConfigName = "eval.yaml" + defaultEvalName = "smoke-core" + defaultEvalModel = "gpt-4o" + defaultEvalSamples = 100 +) + +type evalConfig = eval_api.EvalConfig +type evalAgentRef = opteval.AgentRef +type evalDatasetRef = opteval.DatasetRef + +// evalState holds transient runtime state stored in the azd environment. +type evalState struct { + InitStatus string + DatasetGenOpID string + DatasetGenStatus string + EvalGenOpID string + EvalGenStatus string + EvalID string +} + +// Azd environment keys for eval state. +const ( + evalKeyInitStatus = "LAST_EVAL_INIT_STATUS" + evalKeyDatasetGenOpID = "LAST_EVAL_DATASET_GEN_OP_ID" + evalKeyDatasetGenStatus = "LAST_EVAL_DATASET_GEN_STATUS" + evalKeyEvalGenOpID = "LAST_EVAL_GEN_OP_ID" + evalKeyEvalGenStatus = "LAST_EVAL_GEN_STATUS" + evalKeyEvalID = "LAST_EVAL_ID" +) + +type evalResolvedContext struct { + azdClient *azdext.AzdClient + evalClient *eval_api.EvalClient + datasetClient *dataset_api.DatasetClient + projectRoot string + hasProject bool + agentProject string + agentProjectSource string + agentName string + agentNameSource string + version string + versionSource string + agentKind agent_yaml.AgentKind + agentKindSource string + serviceName string + projectEndpoint string + projectEndpointSource string + envName string +} + +type evalContextOptions struct { + agent string + projectEndpoint string + requireAgent bool + noPrompt bool +} + +func newEvalCommand(extCtx *azdext.ExtensionContext) *cobra.Command { + cmd := &cobra.Command{ + Use: "eval ", + Short: "Create and run quick evals for an agent.", + Long: `Create and run quick evals for an agent. + +These commands are designed for quick agent eval onboarding under azd ai agent. +Use eval init to generate an eval config, then eval run to execute it.`, + } + + cmd.AddCommand(newEvalInitCommand(extCtx)) + cmd.AddCommand(newEvalRunCommand()) + cmd.AddCommand(newEvalListCommand()) + cmd.AddCommand(newEvalShowCommand()) + + return cmd +} + +func resolveEvalContext(ctx context.Context, options evalContextOptions) (*evalResolvedContext, error) { + fmt.Println(output.WithGrayFormat("Resolving eval context...")) + + azdClient, err := azdext.NewAzdClient() + if err != nil { + return nil, fmt.Errorf("failed to create azd client: %w", err) + } + + fmt.Println(output.WithGrayFormat(" Reading project configuration...")) + projectResponse, err := azdClient.Project().Get(ctx, &azdext.EmptyRequest{}) + + // If no azd workspace is found, fall back to prompt-based resolution. + if err != nil || projectResponse.Project == nil { + return resolveEvalContextWithoutProject(ctx, azdClient, options) + } + project := projectResponse.Project + + fmt.Println(output.WithGrayFormat(" Detecting agent service...")) + + // Read the current azd environment once — used for agent info, endpoint, and env name. + var envName string + envResp, envErr := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if envErr == nil && envResp.Environment != nil { + envName = envResp.Environment.Name + } + + getEnvValue := func(key string) string { + if envName == "" { + return "" + } + v, e := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, Key: key, + }) + if e != nil || v.Value == "" { + return "" + } + return v.Value + } + + var svc *azdext.ServiceConfig + var info *AgentServiceInfo + svc, _, err = resolveAgentService(ctx, azdClient, options.agent, options.noPrompt) + if err == nil { + // Resolve deployed agent name/version from azd environment. + info = &AgentServiceInfo{ServiceName: svc.Name} + serviceKey := toServiceKey(svc.Name) + if v := getEnvValue(fmt.Sprintf("AGENT_%s_NAME", serviceKey)); v != "" { + info.AgentName = v + } + if v := getEnvValue(fmt.Sprintf("AGENT_%s_VERSION", serviceKey)); v != "" { + info.Version = v + } + } else if options.agent == "" && options.requireAgent { + azdClient.Close() + return nil, evalAgentContextError(err) + } + + fmt.Println(output.WithGrayFormat(" Resolving Foundry project endpoint...")) + projectEndpoint := options.projectEndpoint + projectEndpointSource := "--project-endpoint" + if projectEndpoint == "" { + if v := getEnvValue("AZURE_AI_PROJECT_ENDPOINT"); v != "" { + projectEndpoint = v + projectEndpointSource = "AZURE_AI_PROJECT_ENDPOINT" + } + } + if projectEndpoint == "" { + if v := getEnvValue("AZURE_AI_PROJECT_ID"); v != "" { + ep, epErr := endpointFromProjectID(v) + if epErr != nil { + azdClient.Close() + return nil, epErr + } + projectEndpoint = ep + projectEndpointSource = "AZURE_AI_PROJECT_ID" + } + } + if projectEndpoint == "" { + azdClient.Close() + return nil, exterrors.Dependency( + exterrors.CodeMissingAiProjectEndpoint, + "Foundry project context could not be resolved", + "run 'azd ai agent init' to configure your project, or pass --project-endpoint directly", + ) + } + + agentName := options.agent + agentNameSource := "--agent" + agentVersion := "" + agentVersionSource := "unresolved" + agentKind := agent_yaml.AgentKind("") + agentKindSource := "unresolved" + serviceName := "" + agentProject := project.Path + agentProjectSource := "workspace root" + if agentName == "" { + agentNameSource = "unresolved" + } + if svc != nil { + serviceName = svc.Name + agentProject = filepath.Join(project.Path, svc.RelativePath) + agentProjectSource = fmt.Sprintf("azure.yaml service %q project path", svc.Name) + serviceKey := toServiceKey(svc.Name) + if info != nil && info.AgentName != "" { + agentName = info.AgentName + agentNameSource = fmt.Sprintf("AGENT_%s_NAME", serviceKey) + } + if info != nil && info.Version != "" { + agentVersion = info.Version + agentVersionSource = fmt.Sprintf("AGENT_%s_VERSION", serviceKey) + } + if detectedKind, manifestPath := detectEvalAgentKind(agentProject); detectedKind != "" { + agentKind = detectedKind + agentKindSource = relPathForYaml(project.Path, manifestPath) + } + } + if agentKind == "" { + agentKind = agent_yaml.AgentKindHosted + agentKindSource = "default" + } + if !agent_yaml.IsValidAgentKind(agentKind) { + azdClient.Close() + return nil, fmt.Errorf("unsupported agent kind %q", agentKind) + } + + if options.requireAgent && agentName == "" { + azdClient.Close() + return nil, evalAgentContextError(nil) + } + + credential, err := newAgentCredential() + if err != nil { + azdClient.Close() + return nil, err + } + evalClient := eval_api.NewEvalClient(projectEndpoint, credential) + datasetClient := dataset_api.NewDatasetClient(projectEndpoint, credential) + + return &evalResolvedContext{ + azdClient: azdClient, + evalClient: evalClient, + datasetClient: datasetClient, + projectRoot: project.Path, + hasProject: true, + agentProject: agentProject, + agentProjectSource: agentProjectSource, + agentName: agentName, + agentNameSource: agentNameSource, + version: agentVersion, + versionSource: agentVersionSource, + agentKind: agentKind, + agentKindSource: agentKindSource, + serviceName: serviceName, + projectEndpoint: projectEndpoint, + projectEndpointSource: projectEndpointSource, + envName: envName, + }, nil +} + +// resolveEvalContextWithoutProject prompts the user for essential inputs when +// there is no azd workspace (no azure.yaml). In --no-prompt mode it requires +// --project-endpoint and --agent to be passed explicitly. +func resolveEvalContextWithoutProject( + ctx context.Context, + azdClient *azdext.AzdClient, + options evalContextOptions, +) (*evalResolvedContext, error) { + fmt.Println(output.WithGrayFormat(" No azd project found. Prompting for inputs...")) + + projectEndpoint := options.projectEndpoint + agentName := options.agent + + if options.noPrompt { + if projectEndpoint == "" { + azdClient.Close() + return nil, exterrors.Dependency( + exterrors.CodeMissingAiProjectEndpoint, + "--project-endpoint is required when running outside an azd project with --no-prompt", + "pass --project-endpoint (-p) with your Foundry project endpoint URL", + ) + } + if agentName == "" && options.requireAgent { + azdClient.Close() + return nil, evalAgentContextError(nil) + } + } else { + prompt := azdClient.Prompt() + + if projectEndpoint == "" { + resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Foundry project endpoint URL", + IgnoreHintKeys: true, + }, + }) + if err != nil { + azdClient.Close() + return nil, fmt.Errorf("prompting for project endpoint: %w", err) + } + projectEndpoint = strings.TrimSpace(resp.Value) + if projectEndpoint == "" { + azdClient.Close() + return nil, fmt.Errorf("project endpoint is required") + } + } + + if agentName == "" && options.requireAgent { + resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Agent name", + IgnoreHintKeys: true, + }, + }) + if err != nil { + azdClient.Close() + return nil, fmt.Errorf("prompting for agent name: %w", err) + } + agentName = strings.TrimSpace(resp.Value) + if agentName == "" { + azdClient.Close() + return nil, fmt.Errorf("agent name is required") + } + } + } + + credential, err := newAgentCredential() + if err != nil { + azdClient.Close() + return nil, err + } + + cwd, _ := os.Getwd() + evalClient := eval_api.NewEvalClient(projectEndpoint, credential) + datasetClient := dataset_api.NewDatasetClient(projectEndpoint, credential) + + return &evalResolvedContext{ + azdClient: azdClient, + evalClient: evalClient, + datasetClient: datasetClient, + projectRoot: cwd, + agentProject: cwd, + agentProjectSource: "current directory", + agentName: agentName, + agentNameSource: "user input", + version: "", + versionSource: "unresolved", + agentKind: agent_yaml.AgentKindHosted, + agentKindSource: "default", + serviceName: "", + projectEndpoint: projectEndpoint, + projectEndpointSource: "user input", + envName: "", + }, nil +} + +func printEvalDetectedContext(resolved *evalResolvedContext, configPath string) { + fmt.Println() + fmt.Println(color.CyanString("Detected eval target:")) + if resolved.serviceName != "" { + printEvalField("Service", resolved.serviceName, "azure.yaml") + } + printEvalField("Agent", resolved.agentName, resolved.agentNameSource) + printEvalField("Version", resolved.version, resolved.versionSource) + printEvalField("Kind", string(resolved.agentKind), resolved.agentKindSource) + printEvalField("Endpoint", resolved.projectEndpoint, resolved.projectEndpointSource) + printEvalField("Project", resolved.agentProject, resolved.agentProjectSource) + fmt.Printf(" Eval config: %s\n", output.WithHighLightFormat(configPath)) + fmt.Println() +} + +func printEvalField(label, value, source string) { + padded := fmt.Sprintf("%-16s", label+":") + if value == "" || source == "unresolved" { + fmt.Printf(" %s%s\n", padded, output.WithGrayFormat("%s (%s)", value, source)) + } else { + fmt.Printf(" %s %s %s\n", + color.GreenString("(✓)"), + padded+output.WithHighLightFormat(value), + output.WithGrayFormat("(%s)", source), + ) + } +} + +func detectEvalAgentKind(agentProject string) (agent_yaml.AgentKind, string) { + for _, fileName := range []string{"agent.yaml", "agent.yml"} { + path := filepath.Join(agentProject, fileName) + data, err := os.ReadFile(path) //nolint:gosec // local agent manifest path is derived from azure.yaml service project + if err != nil { + continue + } + + var manifest struct { + Kind agent_yaml.AgentKind `yaml:"kind"` + } + if err := yaml.Unmarshal(data, &manifest); err != nil { + continue + } + if agent_yaml.IsValidAgentKind(manifest.Kind) { + return manifest.Kind, path + } + } + + return "", "" +} + +func evalAgentContextError(cause error) error { + message := "agent context could not be resolved" + if cause != nil { + message = fmt.Sprintf("%s: %s", message, cause) + } + return exterrors.Dependency( + exterrors.CodeMissingAgentEnvVars, + message, + "run 'azd ai agent init' to configure your agent, or pass --agent and --project-endpoint directly", + ) +} + +func endpointFromProjectID(projectID string) (string, error) { + project, err := extractProjectDetails(projectID) + if err != nil { + return "", err + } + return buildAgentEndpoint(project.AccountName, project.ProjectName), nil +} + +func pollEvalOperation( + ctx context.Context, + label string, + operationID string, + get eval_api.GetJobFunc, + apiVersion string, +) (*eval_api.GenerationJob, error) { + return pollEvalOperationWithSpinner(ctx, label, operationID, get, apiVersion, true) +} + +func pollEvalOperationWithSpinner( + ctx context.Context, + label string, + operationID string, + get eval_api.GetJobFunc, + apiVersion string, + showSpinner bool, +) (*eval_api.GenerationJob, error) { + if operationID == "" { + return nil, fmt.Errorf("%s did not return an operation ID", strings.ToLower(label)) + } + + start := time.Now() + if showSpinner { + spinner := ux.NewSpinner(&ux.SpinnerOptions{ + Text: label + "...", + ClearOnStop: true, + }) + if err := spinner.Start(ctx); err != nil { + fmt.Printf("%s: running\n", label) + } + defer func() { _ = spinner.Stop(ctx) }() + } + + poller := eval_api.NewPoller(operationID, apiVersion, get) + job, err := poller.Poll(ctx) + + elapsed := time.Since(start).Round(time.Second) + + if err != nil { + if _, ok := errors.AsType[*eval_api.PollerTimeoutError](err); ok { + fmt.Printf(" %s %s (%s)\n", + color.YellowString("(!) Timed out"), label, elapsed) + return nil, err + } + if jfe, ok := errors.AsType[*eval_api.JobFailedError](err); ok { + if body, marshalErr := json.MarshalIndent(jfe.Job, "", " "); marshalErr == nil { + log.Printf("[debug] %s: failed response:\n%s", label, body) + } + fmt.Printf(" %s %s (%s)\n", color.RedString("(x) Failed"), label, elapsed) + return nil, fmt.Errorf("%s failed with status %q", strings.ToLower(label), jfe.Status) + } + fmt.Printf(" %s %s\n", color.RedString("(x) Failed"), label) + return nil, err + } + + log.Printf("[debug] %s: completed successfully", label) + fmt.Printf(" %s %s (%s)\n", color.GreenString("(✓) Done"), label, elapsed) + return job, nil +} + +func readEvalConfig(path string) (*evalConfig, error) { + return eval_api.LoadEvalConfig(path) +} + +func writeEvalConfig(path string, cfg *evalConfig) error { + return eval_api.WriteEvalConfig(path, cfg) +} + +// formatTimestamp formats a timestamp value for display in eval output. +func formatTimestamp(ts any) string { + return eval_api.FormatTimestamp(ts) +} + +// loadEvalState reads eval runtime state from the azd environment. +// Returns an empty state if no values are set. +func loadEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) *evalState { + get := func(key string) string { + v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, Key: key, + }) + if err != nil || v.Value == "" { + return "" + } + return v.Value + } + return &evalState{ + InitStatus: get(evalKeyInitStatus), + DatasetGenOpID: get(evalKeyDatasetGenOpID), + DatasetGenStatus: get(evalKeyDatasetGenStatus), + EvalGenOpID: get(evalKeyEvalGenOpID), + EvalGenStatus: get(evalKeyEvalGenStatus), + EvalID: get(evalKeyEvalID), + } +} + +// saveEvalState persists eval runtime state to the azd environment. +func saveEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string, state *evalState) error { + pairs := []struct { + key, val string + }{ + {evalKeyInitStatus, state.InitStatus}, + {evalKeyDatasetGenOpID, state.DatasetGenOpID}, + {evalKeyDatasetGenStatus, state.DatasetGenStatus}, + {evalKeyEvalGenOpID, state.EvalGenOpID}, + {evalKeyEvalGenStatus, state.EvalGenStatus}, + {evalKeyEvalID, state.EvalID}, + } + for _, p := range pairs { + if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, Key: p.key, Value: p.val, + }); err != nil { + return fmt.Errorf("setting %s in azd env: %w", p.key, err) + } + } + return nil +} + +// clearEvalState removes eval state keys from the azd environment. +func clearEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) { + for _, key := range []string{ + evalKeyInitStatus, evalKeyDatasetGenOpID, evalKeyDatasetGenStatus, + evalKeyEvalGenOpID, evalKeyEvalGenStatus, evalKeyEvalID, + } { + _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, Key: key, Value: "", + }) + } +} + +func relPathForYaml(baseDir string, target string) string { + if rel, err := filepath.Rel(baseDir, target); err == nil { + return filepath.ToSlash(rel) + } + return filepath.ToSlash(target) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go new file mode 100644 index 00000000000..fac4411a2a8 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + + "azureaiagent/internal/pkg/agents/dataset_api" + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" +) + +// foundryBaseDir is the base directory for eval artifacts under the project root. +const foundryBaseDir = ".azure/.foundry" + +// resolveEvalOutputPath resolves the eval config output path. +func resolveEvalOutputPath(output, agentProject string) string { + return eval_api.ResolveEvalOutputPath(output, agentProject) +} + +// resolveEvalConfigPath resolves the eval config path for reading. +func resolveEvalConfigPath(config, agentProject string) string { + return eval_api.ResolveEvalConfigPath(config, agentProject) +} + +// ensureFoundryDirs creates the .azure/.foundry directory tree with standard +// subdirectories (datasets, evaluators, results). +func ensureFoundryDirs(projectRoot string) error { + base := filepath.Join(projectRoot, ".azure", ".foundry") + for _, sub := range []string{"datasets", "evaluators", "results"} { + if err := os.MkdirAll(filepath.Join(base, sub), 0750); err != nil { + return err + } + } + return nil +} + +// saveDatasetGenerationResult saves the raw dataset generation result JSON. +func saveDatasetGenerationResult(projectRoot, datasetName string, result json.RawMessage) { + if datasetName == "" || len(result) == 0 { + return + } + dir := filepath.Join(projectRoot, ".azure", ".foundry", "datasets") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create dataset dir: %v", err) + return + } + // Pretty-print the JSON for human review. + var pretty json.RawMessage + if err := json.Unmarshal(result, &pretty); err == nil { + if formatted, err := json.MarshalIndent(pretty, "", " "); err == nil { + result = formatted + } + } + path := filepath.Join(dir, datasetName+".json") + if err := os.WriteFile(path, result, 0600); err != nil { + log.Printf("[debug] failed to save dataset result: %v", err) + } +} + +// downloadDatasetArtifact downloads the dataset and writes it locally. +// If the download fails (e.g., non-TLS test server), a placeholder is written. +func downloadDatasetArtifact( + ctx context.Context, + client *dataset_api.DatasetClient, + projectRoot string, + ref *opteval.DatasetRef, + apiVersion string, +) error { + if ref == nil || ref.Name == "" { + return nil + } + + dest := datasetArtifactPath(projectRoot, ref) + dir := filepath.Dir(dest) + if err := os.MkdirAll(dir, 0750); err != nil { + return fmt.Errorf("creating dataset artifact dir: %w", err) + } + + // Attempt full download via the dataset API. + cred, credErr := client.GetDatasetCredential(ctx, ref.Name, ref.Version, apiVersion) + if credErr != nil { + // Gracefully write a placeholder when credential fetch fails. + log.Printf("[debug] dataset credential fetch failed: %v — writing placeholder", credErr) + return os.WriteFile(dest, []byte("{}\n"), 0600) + } + + downloadURL := cred.ResolvedDownloadURI() + if downloadURL == "" { + return os.WriteFile(dest, []byte("{}\n"), 0600) + } + + data, dlErr := client.DownloadDataset(ctx, downloadURL) + if dlErr != nil { + log.Printf("[debug] dataset download failed: %v — writing placeholder", dlErr) + return os.WriteFile(dest, []byte("{}\n"), 0600) + } + + return os.WriteFile(dest, data, 0600) +} + +// datasetArtifactPath returns the local filesystem path for a downloaded dataset. +func datasetArtifactPath(projectRoot string, ref *opteval.DatasetRef) string { + if ref == nil || ref.Name == "" { + return "" + } + name := ref.Name + if ref.Version != "" { + name = name + "-" + ref.Version + } + return filepath.Join(projectRoot, ".azure", ".foundry", "datasets", name+".jsonl") +} + +// saveEvaluatorResult saves the raw evaluator generation result. +func saveEvaluatorResult(projectRoot, evaluatorName string, result json.RawMessage) { + if evaluatorName == "" || len(result) == 0 { + return + } + dir := filepath.Join(projectRoot, ".azure", ".foundry", "evaluators") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create evaluator dir: %v", err) + return + } + var pretty json.RawMessage + if err := json.Unmarshal(result, &pretty); err == nil { + if formatted, err := json.MarshalIndent(pretty, "", " "); err == nil { + result = formatted + } + } + path := filepath.Join(dir, evaluatorName+".json") + if err := os.WriteFile(path, result, 0600); err != nil { + log.Printf("[debug] failed to save evaluator result: %v", err) + } +} + +// writeEvalReviewArtifacts writes human-readable review artifacts for evaluators. +// It writes a stub YAML file for each evaluator unless a result JSON already exists. +func writeEvalReviewArtifacts(projectRoot string, cfg *eval_api.EvalConfig) { + if cfg == nil { + return + } + dir := filepath.Join(projectRoot, ".azure", ".foundry", "evaluators") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create evaluator review dir: %v", err) + return + } + for _, evaluator := range cfg.Evaluators { + if evaluator == "" { + continue + } + // Skip if a result JSON already exists. + jsonPath := filepath.Join(dir, evaluator+".json") + if _, err := os.Stat(jsonPath); err == nil { + continue + } + yamlPath := filepath.Join(dir, evaluator+".yaml") + stub := fmt.Sprintf("# Evaluator stub: %s\nname: %s\n", evaluator, evaluator) + if err := os.WriteFile(yamlPath, []byte(stub), 0600); err != nil { + log.Printf("[debug] failed to write evaluator stub: %v", err) + } + } + + // Print artifact paths for user review. + artifactsDir := filepath.Join(projectRoot, ".azure", ".foundry") + fmt.Printf("\n Artifacts: %s\n", artifactsDir) + if cfg.DatasetReference != nil && cfg.DatasetReference.Name != "" { + name := cfg.DatasetReference.Name + if cfg.DatasetReference.Version != "" { + name += "-" + cfg.DatasetReference.Version + } + fmt.Printf(" datasets/%s.jsonl\n", name) + } + for _, evaluator := range cfg.Evaluators { + if evaluator != "" { + fmt.Printf(" evaluators/%s.json\n", evaluator) + } + } +} + +// writeJSONFile writes a value as formatted JSON to the specified path. +func writeJSONFile(path string, v any) error { + if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + data, err := json.MarshalIndent(v, "", " ") + if err != nil { + return fmt.Errorf("marshalling JSON: %w", err) + } + return os.WriteFile(path, data, 0600) +} + +// formatAny converts any value to a string for display. +func formatAny(v any) string { + if v == nil { + return "" + } + switch val := v.(type) { + case string: + return val + case float64: + if val == float64(int64(val)) { + return fmt.Sprintf("%d", int64(val)) + } + return fmt.Sprintf("%g", val) + case bool: + return fmt.Sprintf("%t", val) + default: + return fmt.Sprintf("%v", val) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go new file mode 100644 index 00000000000..ebcf4f6cd05 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -0,0 +1,280 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/eval_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +// DataGenerationAPIVersion is the API version used for data generation jobs. +const DataGenerationAPIVersion = "v1" + +// EvalInitFlags defines the customized flags for the eval init command. +type evalInitFlags struct { + name string + agent string + projectEndpoint string + genInstruction string + genInstructionFile string + evalModel string + dataset string + output string + maxSamples int + evaluators []string + noWait bool + resetDefaults bool + evalModelSet bool + maxSamplesSet bool + traceDays int + // Internal flags set during interactive prompts. + regenerateDataset bool + regenerateEvaluator bool +} + +func newEvalInitCommand(extCtx *azdext.ExtensionContext) *cobra.Command { + flags := &evalInitFlags{evalModel: defaultEvalModel, maxSamples: defaultEvalSamples, output: defaultEvalConfigName} + cmd := &cobra.Command{ + Use: "init", + Short: "Generate a local eval suite for a deployed agent.", + Long: `Generate a local eval suite for a deployed agent. + +By default, this command submits dataset and evaluator generation jobs, waits for +completion, downloads review artifacts under .azure/.foundry, and writes eval.yaml at +the agent project root. Use --no-wait to write pending operation IDs and return.`, + Example: ` azd ai agent eval init + azd ai agent eval init --gen-instruction "This agent handles restaurant reservations." --eval-model gpt-4o --max-samples 50 + azd ai agent eval init --gen-instruction-file ./instructions.md --eval-model gpt-4o + azd ai agent eval init --dataset ./tests/golden.jsonl --evaluator builtin.intent_resolution`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + logCleanup := setupDebugLogging(cmd.Flags()) + defer logCleanup() + flags.evalModelSet = cmd.Flags().Changed("eval-model") + flags.maxSamplesSet = cmd.Flags().Changed("max-samples") + return runEvalInit(ctx, flags, extCtx.NoPrompt) + }, + } + + cmd.Flags().StringVar(&flags.name, "name", "", "Name for the eval suite") + cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit generation jobs and return immediately") + cmd.Flags().StringVar(&flags.agent, "agent", "", "Target agent name") + cmd.Flags().StringVarP(&flags.projectEndpoint, "project-endpoint", "p", "", "Microsoft Foundry project endpoint URL") + cmd.Flags().StringVarP(&flags.genInstruction, "gen-instruction", "g", "", "Inline instruction for dataset and evaluator generation") + cmd.Flags().StringVarP(&flags.genInstructionFile, "gen-instruction-file", "G", "", "Path to a file containing the generation instruction") + cmd.Flags().StringVar(&flags.evalModel, "eval-model", defaultEvalModel, "Model used for evaluation and generation, and also as the default model for evaluation") + cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") + cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Maximum number of samples to generate") + cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, "Built-in or custom evaluator name") + cmd.Flags().StringVarP(&flags.output, "out-file", "O", defaultEvalConfigName, "Eval config path") + cmd.Flags().IntVar(&flags.traceDays, "trace-days", 0, "Include agent traces from the last N days (0 = no traces)") + _ = cmd.Flags().MarkHidden("trace-days") + cmd.Flags().BoolVar(&flags.resetDefaults, "reset-defaults", false, "Overwrite an existing eval config") + + return cmd +} + +// runEvalInit executes the eval init command logic. It resolves context, prompts for missing options, submits generation jobs, polls for completion (unless --no-wait), writes the eval config, and prints next steps. +func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error { + if flags.genInstruction != "" && flags.genInstructionFile != "" { + return fmt.Errorf("cannot use both --gen-instruction and --gen-instruction-file; provide one or the other") + } + if flags.genInstructionFile != "" { + data, err := os.ReadFile(flags.genInstructionFile) //nolint:gosec // user-provided instruction file path + if err != nil { + return fmt.Errorf("reading instruction file %q: %w", flags.genInstructionFile, err) + } + flags.genInstruction = strings.TrimSpace(string(data)) + } + + resolved, err := resolveEvalContext(ctx, evalContextOptions{ + agent: flags.agent, + projectEndpoint: flags.projectEndpoint, + requireAgent: true, + noPrompt: noPrompt, + }) + if err != nil { + return err + } + defer resolved.azdClient.Close() + + configPath := resolveEvalOutputPath(flags.output, resolved.agentProject) + printEvalDetectedContext(resolved, configPath) + + // When eval.yaml exists, decide whether to regenerate or create fresh. + existingCfg, hasExisting := tryLoadExistingEvalConfig(configPath) + isRegenerate := false + var builtinEvals []string + + if hasExisting && !flags.resetDefaults { + if noPrompt { + // --no-prompt: treat as full regeneration. + flags.regenerateDataset = true + flags.regenerateEvaluator = true + } else { + if err := promptRegenerateChoices(ctx, resolved, existingCfg, flags); err != nil { + return err + } + if !flags.regenerateDataset && !flags.regenerateEvaluator { + fmt.Println("Keeping existing eval config unchanged.") + return nil + } + } + isRegenerate = true + + // Carry forward existing options when not explicitly overridden. + if flags.name == "" && existingCfg.Name != "" { + flags.name = existingCfg.Name + } + if existingCfg.Options != nil && !flags.evalModelSet { + flags.evalModel = existingCfg.Options.EvalModel + } + if flags.genInstruction == "" { + flags.genInstruction = existingCfg.GenerationInstruction + } + if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { + flags.maxSamples = existingCfg.MaxSamples + } + if flags.traceDays == 0 && existingCfg.TraceDays > 0 { + flags.traceDays = existingCfg.TraceDays + } + // Track builtin evaluators for preservation during evaluator regeneration. + if flags.regenerateEvaluator { + _, builtinEvals = eval_api.SplitEvaluators(existingCfg.Evaluators) + } + } + + // When the user hasn't explicitly set --eval-model, use the deployed model. + if !flags.evalModelSet && resolved.envName != "" { + if v, err := resolved.azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: resolved.envName, + Key: "AZURE_AI_MODEL_DEPLOYMENT_NAME", + }); err == nil && v.Value != "" { + flags.evalModel = v.Value + } + } + + if err := promptEvalInitOptions(ctx, resolved, flags, noPrompt); err != nil { + return err + } + + // Finalize the eval suite name with a random suffix to avoid collisions. + flags.name = resolveEvalName(flags) + "-" + randomSuffix() + + // Prompt agents use the agent source directly; hosted agents require a gen-instruction. + if resolved.agentKind != agent_yaml.AgentKindPrompt && + flags.genInstruction == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { + return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") + } + if flags.maxSamples <= 0 { + return fmt.Errorf("--max-samples must be a positive integer") + } + + if resolved.hasProject { + if err := ensureFoundryDirs(resolved.projectRoot); err != nil { + return err + } + } + + evalCfg := newEvalConfig(flags, resolved) + state := &evalState{} + + // Determine which generation jobs to submit. + var needDatasetGen, needEvalGen bool + if isRegenerate { + needDatasetGen = flags.regenerateDataset + needEvalGen = flags.regenerateEvaluator + // Preserve fields that are not being regenerated. + if !needDatasetGen { + evalCfg.DatasetFile = existingCfg.DatasetFile + evalCfg.Config.DatasetReference = existingCfg.Config.DatasetReference + } + if !needEvalGen { + evalCfg.Evaluators = existingCfg.Evaluators + } + } else { + needDatasetGen = flags.dataset == "" + needEvalGen = len(flags.evaluators) == 0 + if !needDatasetGen { + // User provided a local dataset file — use it directly. + datasetPath, err := resolveLocalDatasetFile(flags.dataset, resolved.agentProject) + if err != nil { + return err + } + evalCfg.DatasetFile = datasetPath + } + if !needEvalGen { + evalCfg.Evaluators = evaluatorsFromFlags(flags.evaluators) + } + } + + // Submit generation jobs (fast API calls). + if needDatasetGen { + job, err := submitDatasetGeneration(ctx, resolved, flags) + if err != nil { + return err + } + state.DatasetGenOpID = job.OperationID() + state.DatasetGenStatus = job.NormalizedStatus() + } + if needEvalGen { + job, err := submitEvaluatorGeneration(ctx, resolved, flags) + if err != nil { + return err + } + state.EvalGenOpID = job.OperationID() + state.EvalGenStatus = job.NormalizedStatus() + } + + if flags.noWait { + if needDatasetGen || needEvalGen { + state.InitStatus = "pending" + } + return writePendingEvalInit(ctx, resolved, configPath, evalCfg, state) + } + + if err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, builtinEvals); err != nil { + if _, ok := errors.AsType[*initTimeoutError](err); ok { + return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) + } + return err + } + + state.InitStatus = "completed" + clearEvalState(ctx, resolved.azdClient, resolved.envName) + if err := writeEvalConfig(configPath, evalCfg); err != nil { + return err + } + + if resolved.hasProject { + writeEvalReviewArtifacts(resolved.projectRoot, evalCfg) + } + if isRegenerate { + fmt.Println(color.GreenString("Eval suite regenerated")) + } else { + fmt.Println(color.GreenString("Eval suite created")) + } + fmt.Printf(" Config: %s\n", configPath) + if evalCfg.DatasetFile != "" { + fmt.Printf(" Dataset: %s\n", evalCfg.DatasetFile) + } + for _, evaluator := range evalCfg.Evaluators { + if evaluator != "" { + fmt.Printf(" Evaluator: %s\n", evaluator) + } + } + fmt.Printf("\n Review the generated assets, then run:\n %s\n", "azd ai agent eval run") + return nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go new file mode 100644 index 00000000000..081b0f01fb5 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -0,0 +1,363 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "log" + "os" + "path/filepath" + "sync" + + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/fatih/color" +) + +func resolveEvalName(flags *evalInitFlags) string { + if flags.name != "" { + return flags.name + } + return defaultEvalName +} + +// randomSuffix returns a short random hex string (4 bytes = 8 chars). +func randomSuffix() string { + b := make([]byte, 4) + if _, err := rand.Read(b); err != nil { + return "0000" + } + return hex.EncodeToString(b) +} + +func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalConfig { + return &evalConfig{ + Config: opteval.Config{ + Name: resolveEvalName(flags), + Agent: evalAgentRef{ + Name: resolved.agentName, + Kind: resolved.agentKind, + Version: resolved.version, + }, + }, + Options: &opteval.Options{ + EvalModel: flags.evalModel, + }, + GenerationInstruction: flags.genInstruction, + MaxSamples: flags.maxSamples, + TraceDays: flags.traceDays, + } +} + +func submitDatasetGeneration( + ctx context.Context, + resolved *evalResolvedContext, + flags *evalInitFlags, +) (*eval_api.GenerationJob, error) { + var traces *eval_api.TraceOptions + if flags.traceDays > 0 { + traces = &eval_api.TraceOptions{Days: flags.traceDays} + } + sources := eval_api.BuildGenerationSources( + string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, traces, + ) + request := eval_api.NewDataGenerationJobRequest( + resolveEvalName(flags), flags.evalModel, flags.maxSamples, sources, + ) + if body, err := json.MarshalIndent(request, "", " "); err == nil { + log.Printf("[debug] submitDatasetGeneration request:\n%s", body) + } + return resolved.evalClient.CreateDataGenerationJob(ctx, request, DataGenerationAPIVersion) +} + +func submitEvaluatorGeneration( + ctx context.Context, + resolved *evalResolvedContext, + flags *evalInitFlags, +) (*eval_api.GenerationJob, error) { + var traces *eval_api.TraceOptions + if flags.traceDays > 0 { + traces = &eval_api.TraceOptions{Days: flags.traceDays} + } + sources := eval_api.BuildGenerationSources( + string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, traces, + ) + request := eval_api.NewEvaluatorGenerationJobRequest( + resolveEvalName(flags), flags.evalModel, sources, + ) + if body, err := json.MarshalIndent(request, "", " "); err == nil { + log.Printf("[debug] submitEvaluatorGeneration request:\n%s", body) + } + return resolved.evalClient.CreateEvaluatorGenerationJob(ctx, request, DefaultAgentAPIVersion) +} + +// resolveLocalDatasetFile resolves the dataset flag value to an absolute path +// for the local JSONL file. If the value is relative it is resolved against +// the agent project directory. +func resolveLocalDatasetFile(dataset string, agentProject string) (string, error) { + if filepath.IsAbs(dataset) { + if _, err := os.Stat(dataset); err != nil { + return "", fmt.Errorf("dataset file %q is not accessible: %w", dataset, err) + } + return dataset, nil + } + abs := filepath.Join(agentProject, dataset) + if _, err := os.Stat(abs); err != nil { + return "", fmt.Errorf("dataset file %q is not accessible: %w", dataset, err) + } + return abs, nil +} + +func datasetFromJob(job *eval_api.GenerationJob) *evalDatasetRef { + return &evalDatasetRef{ + Name: job.ResolvedDatasetName(), + Version: job.ResolvedDatasetVersion(), + } +} + +func evaluatorFromJob(job *eval_api.GenerationJob) string { + return job.ResolvedEvaluatorName() +} + +func evaluatorsFromFlags(values []string) []string { + return values +} + +func buildOpenAIEvalRequest(evalCfg *evalConfig) *eval_api.CreateOpenAIEvalRequest { + return evalCfg.ToAgentTargetAdaptableEvalGroupRequest() +} + +func resumeEvalInit( + ctx context.Context, + resolved *evalResolvedContext, + configPath string, + evalCfg *evalConfig, + state *evalState, +) error { + if err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, nil); err != nil { + if _, ok := errors.AsType[*initTimeoutError](err); ok { + return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) + } + return err + } + state.InitStatus = "completed" + clearEvalState(ctx, resolved.azdClient, resolved.envName) + if resolved.hasProject { + writeEvalReviewArtifacts(resolved.projectRoot, evalCfg) + } + return writeEvalConfig(configPath, evalCfg) +} + +// pollAndFinalizeJobs polls pending dataset and evaluator generation jobs in +// parallel, saves artifacts when an azd project exists, and updates state and +// evalCfg. Jobs whose status is already terminal are skipped (safe for resume). +// builtinEvals are prepended to the generated evaluator name on completion; +// pass nil for fresh inits. +func pollAndFinalizeJobs( + ctx context.Context, + resolved *evalResolvedContext, + evalCfg *evalConfig, + state *evalState, + builtinEvals []string, +) error { + var ( + mu sync.Mutex + datasetPollErr error + evalPollErr error + wg sync.WaitGroup + ) + + pollDataset := state.DatasetGenOpID != "" && + !eval_api.ParseJobStatus(state.DatasetGenStatus).IsTerminal() + pollEval := state.EvalGenOpID != "" && + !eval_api.ParseJobStatus(state.EvalGenStatus).IsTerminal() + + // When both jobs run in parallel, disable individual spinners to avoid + // overlapping terminal output. Print status lines upfront instead. + parallel := pollDataset && pollEval + if parallel { + fmt.Println(" Waiting for generation jobs...") + fmt.Printf(" - Dataset generation: %s\n", state.DatasetGenOpID) + fmt.Printf(" - Evaluator generation: %s\n", state.EvalGenOpID) + } + + if pollDataset { + wg.Add(1) + go func() { + defer wg.Done() + completed, err := pollEvalOperationWithSpinner( + ctx, "Dataset generation", state.DatasetGenOpID, + resolved.evalClient.GetDataGenerationJob, DataGenerationAPIVersion, + !parallel, + ) + if err != nil { + mu.Lock() + datasetPollErr = err + mu.Unlock() + return + } + mu.Lock() + state.DatasetGenStatus = completed.NormalizedStatus() + mu.Unlock() + dsRef := datasetFromJob(completed) + if resolved.hasProject { + saveDatasetGenerationResult( + resolved.projectRoot, completed.ResolvedDatasetName(), completed.Result, + ) + if err := downloadDatasetArtifact( + ctx, resolved.datasetClient, resolved.projectRoot, dsRef, DefaultAgentAPIVersion, + ); err != nil { + mu.Lock() + datasetPollErr = err + mu.Unlock() + return + } + mu.Lock() + evalCfg.DatasetFile = datasetArtifactPath(resolved.projectRoot, dsRef) + mu.Unlock() + } + }() + } + + if pollEval { + wg.Add(1) + go func() { + defer wg.Done() + completed, err := pollEvalOperationWithSpinner( + ctx, "Evaluator generation", state.EvalGenOpID, + resolved.evalClient.GetEvaluatorGenerationJob, DefaultAgentAPIVersion, + !parallel, + ) + if err != nil { + mu.Lock() + evalPollErr = err + mu.Unlock() + return + } + evalName := evaluatorFromJob(completed) + mu.Lock() + state.EvalGenStatus = completed.NormalizedStatus() + evalCfg.Evaluators = append(builtinEvals, evalName) + mu.Unlock() + if resolved.hasProject { + saveEvaluatorResult(resolved.projectRoot, evalName, completed.Result) + } + }() + } + + wg.Wait() + + // If either job timed out, return a timeout error so the caller can + // persist the YAML and operation IDs for later resume. + dsTimeout := isPollerTimeout(datasetPollErr) + evalTimeout := isPollerTimeout(evalPollErr) + if dsTimeout || evalTimeout { + return &initTimeoutError{ + datasetOpID: state.DatasetGenOpID, + evaluatorOpID: state.EvalGenOpID, + datasetTimedOut: dsTimeout, + evaluatorTimedOut: evalTimeout, + } + } + + if datasetPollErr != nil { + return datasetPollErr + } + return evalPollErr +} + +// isPollerTimeout returns true when the error is a *eval_api.PollerTimeoutError. +func isPollerTimeout(err error) bool { + _, ok := errors.AsType[*eval_api.PollerTimeoutError](err) + return ok +} + +// initTimeoutError is returned by pollAndFinalizeJobs when one or both +// generation jobs exceed the polling timeout. The caller should persist state +// and YAML so the user can resume later. +type initTimeoutError struct { + datasetOpID string + evaluatorOpID string + datasetTimedOut bool + evaluatorTimedOut bool +} + +func (e *initTimeoutError) Error() string { + return "generation jobs did not complete within the polling timeout" +} + +func writePendingEvalInit( + ctx context.Context, + resolved *evalResolvedContext, + configPath string, + evalCfg *evalConfig, + state *evalState, +) error { + if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + return err + } + if err := writeEvalConfig(configPath, evalCfg); err != nil { + return err + } + fmt.Println(color.YellowString("Eval init submitted (async)")) + if state.DatasetGenOpID != "" { + fmt.Printf(" dataset generation: %s (%s)\n", state.DatasetGenOpID, state.DatasetGenStatus) + } + if state.EvalGenOpID != "" { + fmt.Printf(" evaluator generation: %s (%s)\n", state.EvalGenOpID, state.EvalGenStatus) + } + fmt.Printf("\n Config written to: %s\n", configPath) + fmt.Printf(" State saved to: azd environment %q\n", resolved.envName) + fmt.Println("\n When ready, run:") + fmt.Println(" azd ai agent eval run") + return nil +} + +// writeTimedOutEvalInit persists state and YAML when generation jobs exceed +// the polling timeout, allowing the user to resume later. +func writeTimedOutEvalInit( + ctx context.Context, + resolved *evalResolvedContext, + configPath string, + evalCfg *evalConfig, + state *evalState, +) error { + state.InitStatus = "pending" + if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + return err + } + if err := writeEvalConfig(configPath, evalCfg); err != nil { + return err + } + fmt.Println(color.YellowString("\nGeneration jobs timed out but are still running on the server.")) + if state.DatasetGenOpID != "" { + fmt.Printf(" dataset generation: %s\n", state.DatasetGenOpID) + } + if state.EvalGenOpID != "" { + fmt.Printf(" evaluator generation: %s\n", state.EvalGenOpID) + } + fmt.Printf("\n Config written to: %s\n", configPath) + fmt.Printf(" State saved to: azd environment %q\n", resolved.envName) + fmt.Println("\n To resume polling, run:") + fmt.Println(" azd ai agent eval init") + fmt.Println("\n To start fresh and clear timed-out state, run:") + fmt.Println(" azd ai agent eval init --reset-defaults") + return nil +} + +// tryLoadExistingEvalConfig attempts to load an eval config from the given path. +// Returns (config, true) if the file exists and parses successfully, or (nil, false) otherwise. +func tryLoadExistingEvalConfig(configPath string) (*evalConfig, bool) { + cfg, err := readEvalConfig(configPath) + if err != nil { + return nil, false + } + return cfg, true +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go new file mode 100644 index 00000000000..b49acf5af90 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -0,0 +1,318 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "fmt" + "os" + "strconv" + "strings" + + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/eval_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" +) + +func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, flags *evalInitFlags, noPrompt bool) error { + azdClient := resolved.azdClient + if noPrompt { + return nil + } + + if flags.name == "" { + resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Eval suite name", + DefaultValue: defaultEvalName, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for eval suite name: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + flags.name = value + } + } + + needsGeneration := flags.dataset == "" || len(flags.evaluators) == 0 + + if flags.genInstruction == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + // Let the user choose between inline text or loading from a file. + inputChoices := []*azdext.SelectChoice{ + {Label: "Type inline", Value: "inline"}, + {Label: "Load from file", Value: "file"}, + } + defaultIdx := int32(0) + selResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "How would you like to provide the generation instruction?", + Choices: inputChoices, + SelectedIndex: &defaultIdx, + }, + }) + if err != nil { + return fmt.Errorf("prompting for instruction input method: %w", err) + } + + if inputChoices[int(*selResp.Value)].Value == "file" { + // Prompt for the file path. + pathResp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Path to instruction file", + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for instruction file path: %w", err) + } + filePath := strings.TrimSpace(pathResp.Value) + data, err := os.ReadFile(filePath) //nolint:gosec // user-provided instruction file path + if err != nil { + return fmt.Errorf("reading instruction file %q: %w", filePath, err) + } + flags.genInstruction = strings.TrimSpace(string(data)) + } else { + // Inline text input. + resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Describe what this agent does and what scenarios to test", + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for generation instruction: %w", err) + } + flags.genInstruction = strings.TrimSpace(resp.Value) + } + } + + // TODO: Re-enable trace prompt once trace support is ready. + // // Ask whether to include traces, unless already set via flags. + // if flags.traceDays == 0 && needsGeneration { + // confirmResp, err := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + // Options: &azdext.ConfirmOptions{ + // Message: "Include agent traces for evaluation?", + // DefaultValue: new(bool), // default false + // }, + // }) + // if err != nil { + // return fmt.Errorf("prompting for trace inclusion: %w", err) + // } + // if confirmResp.GetValue() { + // rangeChoices := []*azdext.SelectChoice{ + // {Label: "Last Day", Value: "1"}, + // {Label: "Last 7 Days", Value: "7"}, + // {Label: "Last 30 Days", Value: "30"}, + // {Label: "Last 90 Days", Value: "90"}, + // } + // defaultRangeIdx := int32(1) // 7 days + // rangeResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + // Options: &azdext.SelectOptions{ + // Message: "Select trace time range", + // Choices: rangeChoices, + // SelectedIndex: &defaultRangeIdx, + // }, + // }) + // if err != nil { + // return fmt.Errorf("prompting for trace time range: %w", err) + // } + // days, _ := strconv.Atoi(rangeChoices[int(*rangeResp.Value)].Value) + // flags.traceDays = days + // } + // } + + if !needsGeneration { + return nil + } + + if !flags.evalModelSet { + // Read the deployed model name from the azd environment to use as default. + var deployedModel string + if resolved.envName != "" { + if v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: resolved.envName, + Key: "AZURE_AI_MODEL_DEPLOYMENT_NAME", + }); err == nil && v.Value != "" { + deployedModel = v.Value + } + } + + choices := buildModelChoices(deployedModel) + defaultIndex := int32(0) + resp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "Select the model for evaluation and generation", + Choices: choices, + SelectedIndex: &defaultIndex, + }, + }) + if err != nil { + return fmt.Errorf("prompting for evaluation model: %w", err) + } + selected := choices[int(*resp.Value)].Value + + // User chose to pick from another deployment in the project. + if selected == selectOtherDeployment { + selected, err = promptProjectDeployment(ctx, resolved) + if err != nil { + return err + } + } + flags.evalModel = selected + } + + if !flags.maxSamplesSet { + resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Max samples", + DefaultValue: strconv.Itoa(defaultEvalSamples), + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for max samples: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + parsed, err := strconv.Atoi(value) + if err != nil || parsed <= 0 { + return fmt.Errorf("--max-samples must be a positive integer") + } + flags.maxSamples = parsed + } + } + + return nil +} + +// selectOtherDeployment is the sentinel value for the "Select another deployment" +// choice in the model prompt. +const selectOtherDeployment = "__select_other_deployment__" + +// buildModelChoices builds the initial model choices for the generation model +// prompt. When deployedModel is non-empty it appears first as the default. +// A "Select another deployment" option is always appended so the user can +// browse all deployments in the Foundry project. +func buildModelChoices(deployedModel string) []*azdext.SelectChoice { + var choices []*azdext.SelectChoice + if deployedModel != "" { + choices = append(choices, &azdext.SelectChoice{ + Label: deployedModel + " (deployed)", + Value: deployedModel, + }) + } + choices = append(choices, &azdext.SelectChoice{ + Label: "Select another deployment", + Value: selectOtherDeployment, + }) + return choices +} + +// promptProjectDeployment fetches model deployments from the Foundry project +// and prompts the user to select one. +func promptProjectDeployment(ctx context.Context, resolved *evalResolvedContext) (string, error) { + var deployments []FoundryDeploymentInfo + if resolved.envName != "" { + if v, err := resolved.azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: resolved.envName, + Key: "AZURE_AI_PROJECT_ID", + }); err == nil && v.Value != "" { + if project, err := extractProjectDetails(v.Value); err == nil { + if cred, err := newAgentCredential(); err == nil { + deployments, _ = listProjectDeployments( + ctx, cred, + project.SubscriptionId, + project.ResourceGroupName, + project.AccountName, + ) + } + } + } + } + if len(deployments) == 0 { + return "", fmt.Errorf("no model deployments found in the Foundry project") + } + + choices := make([]*azdext.SelectChoice, len(deployments)) + for i, d := range deployments { + label := d.Name + if d.ModelName != "" { + label = fmt.Sprintf("%s (%s)", d.Name, d.ModelName) + } + choices[i] = &azdext.SelectChoice{Label: label, Value: d.Name} + } + + defaultIndex := int32(0) + resp, err := resolved.azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "Select a model deployment", + Choices: choices, + SelectedIndex: &defaultIndex, + }, + }) + if err != nil { + return "", fmt.Errorf("prompting for model deployment: %w", err) + } + return choices[int(*resp.Value)].Value, nil +} + +// promptRegenerateChoices asks the user whether to regenerate the existing +// dataset and evaluator using individual yes/no confirmations. +func promptRegenerateChoices( + ctx context.Context, + resolved *evalResolvedContext, + existingCfg *evalConfig, + flags *evalInitFlags, +) error { + prompt := resolved.azdClient.Prompt() + + // Ask about dataset. + datasetLabel := existingCfg.DatasetFile + if datasetLabel == "" && existingCfg.DatasetReference != nil { + datasetLabel = existingCfg.DatasetReference.Name + } + if datasetLabel != "" { + resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: fmt.Sprintf("Existing dataset: %s. Do you want to regenerate?", datasetLabel), + DefaultValue: new(false), + }, + }) + if err != nil { + return fmt.Errorf("prompting for dataset regeneration: %w", err) + } + if resp.Value != nil && *resp.Value { + flags.regenerateDataset = true + } + } + + // Ask about evaluator — only generated (non-builtin) evaluators can be regenerated. + generated, builtin := eval_api.SplitEvaluators(existingCfg.Evaluators) + if len(generated) > 0 { + generatedLabel := strings.Join(generated, ", ") + msg := fmt.Sprintf("Existing evaluator: %s. Do you want to regenerate?", generatedLabel) + if len(builtin) > 0 { + msg = fmt.Sprintf( + "Existing evaluator: %s (built-in evaluators %s will be kept). Do you want to regenerate?", + generatedLabel, strings.Join(builtin, ", "), + ) + } + resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: msg, + DefaultValue: new(false), + }, + }) + if err != nil { + return fmt.Errorf("prompting for evaluator regeneration: %w", err) + } + if resp.Value != nil && *resp.Value { + flags.regenerateEvaluator = true + } + } + + return nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go new file mode 100644 index 00000000000..56d6657d76f --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -0,0 +1,577 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// newEvalInitCommand — command shape +// --------------------------------------------------------------------------- + +func TestNewEvalInitCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newEvalInitCommand(&azdext.ExtensionContext{}) + + expectedFlags := []struct { + name string + defaultValue string + }{ + {"name", ""}, + {"no-wait", "false"}, + {"agent", ""}, + {"project-endpoint", ""}, + {"gen-instruction", ""}, + {"gen-instruction-file", ""}, + {"eval-model", defaultEvalModel}, + {"dataset", ""}, + {"max-samples", "100"}, + {"out-file", defaultEvalConfigName}, + {"reset-defaults", "false"}, + } + + for _, ef := range expectedFlags { + t.Run(ef.name, func(t *testing.T) { + f := cmd.Flags().Lookup(ef.name) + require.NotNil(t, f, "flag %q should exist", ef.name) + assert.Equal(t, ef.defaultValue, f.DefValue) + }) + } +} + +func TestNewEvalInitCommand_NoArgs(t *testing.T) { + t.Parallel() + cmd := newEvalInitCommand(&azdext.ExtensionContext{}) + assert.NoError(t, cmd.Args(cmd, nil)) + assert.Error(t, cmd.Args(cmd, []string{"extra"})) +} + +func TestNewEvalInitCommand_ShortOutFile(t *testing.T) { + t.Parallel() + cmd := newEvalInitCommand(&azdext.ExtensionContext{}) + f := cmd.Flags().ShorthandLookup("O") + require.NotNil(t, f, "flag -O shorthand should exist") + assert.Equal(t, "out-file", f.Name) +} + +// --------------------------------------------------------------------------- +// gen-instruction / gen-instruction-file mutual exclusion +// --------------------------------------------------------------------------- + +func TestRunEvalInit_MutualExclusion(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + genInstruction: "inline text", + genInstructionFile: "some-file.txt", + } + err := runEvalInit(t.Context(), flags, true) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot use both --gen-instruction and --gen-instruction-file") +} + +func TestRunEvalInit_InstructionFromFile(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + instrFile := filepath.Join(tmpDir, "instruction.md") + require.NoError(t, os.WriteFile(instrFile, []byte(" Test booking agent \n"), 0600)) + + flags := &evalInitFlags{ + genInstructionFile: instrFile, + evalModel: defaultEvalModel, + maxSamples: 10, + } + // runEvalInit will fail later (no azd client), but genInstruction should be populated first. + _ = runEvalInit(t.Context(), flags, true) + assert.Equal(t, "Test booking agent", flags.genInstruction) +} + +func TestRunEvalInit_InstructionFileMissing(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + genInstructionFile: "/nonexistent/path/instruction.txt", + } + err := runEvalInit(t.Context(), flags, true) + require.Error(t, err) + assert.Contains(t, err.Error(), "reading instruction file") +} + +// --------------------------------------------------------------------------- +// newEvalConfig +// --------------------------------------------------------------------------- + +func TestNewEvalConfig(t *testing.T) { + t.Parallel() + + t.Run("uses default name", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + genInstruction: "Test the booking agent", + evalModel: "gpt-4.1", + maxSamples: 50, + } + resolved := &evalResolvedContext{ + agentName: "booking-agent", + agentKind: agent_yaml.AgentKindHosted, + version: "v2", + } + + cfg := newEvalConfig(flags, resolved) + + assert.Equal(t, defaultEvalName, cfg.Name) + assert.Equal(t, "booking-agent", cfg.Agent.Name) + assert.Equal(t, agent_yaml.AgentKindHosted, cfg.Agent.Kind) + assert.Equal(t, "v2", cfg.Agent.Version) + assert.Equal(t, "gpt-4.1", cfg.Options.EvalModel) + assert.Equal(t, "Test the booking agent", cfg.GenerationInstruction) + assert.Equal(t, 50, cfg.MaxSamples) + }) + + t.Run("uses custom name from flag", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + name: "my-suite", + maxSamples: 10, + } + resolved := &evalResolvedContext{agentName: "a"} + cfg := newEvalConfig(flags, resolved) + assert.Equal(t, "my-suite", cfg.Name) + }) +} + +// --------------------------------------------------------------------------- +// datasetFromJob +// --------------------------------------------------------------------------- + +func TestDatasetFromJob(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + job *eval_api.GenerationJob + expectedName string + expectedVersion string + }{ + { + "standard fields", + &eval_api.GenerationJob{DatasetName: "ds-1", DatasetVersion: "v2"}, + "ds-1", "v2", + }, + { + "name fallback", + &eval_api.GenerationJob{Name: "ds-2"}, + "ds-2", "v1", + }, + { + "version fallback", + &eval_api.GenerationJob{DatasetName: "ds-3", Version: "v3"}, + "ds-3", "v3", + }, + { + "empty defaults version to v1", + &eval_api.GenerationJob{Name: "ds-4"}, + "ds-4", "v1", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + ref := datasetFromJob(tt.job) + assert.Equal(t, tt.expectedName, ref.Name) + assert.Equal(t, tt.expectedVersion, ref.Version) + }) + } +} + +// --------------------------------------------------------------------------- +// parseDatasetURI +// --------------------------------------------------------------------------- + +func TestIsDatasetName(t *testing.T) { + t.Parallel() + + t.Run("simple name is a dataset name", func(t *testing.T) { + t.Parallel() + assert.True(t, eval_api.IsDatasetName("eval-data-2026-04-16")) + }) + + t.Run("name with dots but no data extension", func(t *testing.T) { + t.Parallel() + assert.True(t, eval_api.IsDatasetName("my-dataset.v2")) + }) + + t.Run("jsonl file is not a name", func(t *testing.T) { + t.Parallel() + assert.False(t, eval_api.IsDatasetName("golden.jsonl")) + }) + + t.Run("json file is not a name", func(t *testing.T) { + t.Parallel() + assert.False(t, eval_api.IsDatasetName("data.json")) + }) + + t.Run("csv file is not a name", func(t *testing.T) { + t.Parallel() + assert.False(t, eval_api.IsDatasetName("results.csv")) + }) + + t.Run("path with separator is not a name", func(t *testing.T) { + t.Parallel() + assert.False(t, eval_api.IsDatasetName("./tests/golden.jsonl")) + }) + + t.Run("empty string is not a name", func(t *testing.T) { + t.Parallel() + assert.False(t, eval_api.IsDatasetName("")) + }) +} + +// --------------------------------------------------------------------------- +// buildModelChoices +// --------------------------------------------------------------------------- + +func TestBuildModelChoices(t *testing.T) { + t.Parallel() + + t.Run("no deployed model has select-other only", func(t *testing.T) { + t.Parallel() + choices := buildModelChoices("") + require.Len(t, choices, 1) + assert.Equal(t, selectOtherDeployment, choices[0].Value) + assert.Equal(t, "Select another deployment", choices[0].Label) + }) + + t.Run("deployed model first then select-other", func(t *testing.T) { + t.Parallel() + choices := buildModelChoices("my-deployment") + require.Len(t, choices, 2) + assert.Equal(t, "my-deployment", choices[0].Value) + assert.Contains(t, choices[0].Label, "(deployed)") + assert.Equal(t, selectOtherDeployment, choices[1].Value) + }) +} + +// --------------------------------------------------------------------------- +// evaluatorFromJob +// --------------------------------------------------------------------------- + +func TestEvaluatorFromJob(t *testing.T) { + t.Parallel() + + t.Run("extracts name from job", func(t *testing.T) { + t.Parallel() + job := &eval_api.GenerationJob{ + EvaluatorName: "quality-eval", + } + name := evaluatorFromJob(job) + assert.Equal(t, "quality-eval", name) + }) + + t.Run("extracts name from result", func(t *testing.T) { + t.Parallel() + job := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), + } + name := evaluatorFromJob(job) + assert.Equal(t, "smoke-core", name) + }) + + t.Run("returns empty when no name", func(t *testing.T) { + t.Parallel() + job := &eval_api.GenerationJob{} + name := evaluatorFromJob(job) + assert.Empty(t, name) + }) +} + +// --------------------------------------------------------------------------- +// eval_api.BuildGenerationSources +// --------------------------------------------------------------------------- + +func TestBuildGenerationSources(t *testing.T) { + t.Parallel() + + t.Run("hosted agent includes prompt and agent sources", func(t *testing.T) { + t.Parallel() + sources := eval_api.BuildGenerationSources( + string(agent_yaml.AgentKindHosted), "my-agent", "v2", + "Test customer service interactions", nil, + ) + require.Len(t, sources, 2) + + // First source: prompt + assert.Equal(t, "prompt", sources[0].Type) + assert.Equal(t, "Test customer service interactions", sources[0].Prompt) + + // Second source: agent + assert.Equal(t, "agent", sources[1].Type) + assert.Equal(t, "my-agent", sources[1].AgentName) + assert.Equal(t, "v2", sources[1].AgentVersion) + assert.Empty(t, sources[1].Prompt) + }) + + t.Run("prompt agent includes only agent source", func(t *testing.T) { + t.Parallel() + sources := eval_api.BuildGenerationSources( + string(agent_yaml.AgentKindPrompt), "prompt-agent", "v1", "", nil, + ) + require.Len(t, sources, 1) + + assert.Equal(t, "agent", sources[0].Type) + assert.Equal(t, "prompt-agent", sources[0].AgentName) + assert.Equal(t, "v1", sources[0].AgentVersion) + assert.Empty(t, sources[0].Prompt, "prompt agents should not have prompt field") + }) + + t.Run("prompt agent without version omits agent_version", func(t *testing.T) { + t.Parallel() + sources := eval_api.BuildGenerationSources( + string(agent_yaml.AgentKindPrompt), "prompt-agent", "", "", nil, + ) + require.Len(t, sources, 1) + + assert.Equal(t, "agent", sources[0].Type) + assert.Equal(t, "prompt-agent", sources[0].AgentName) + assert.Empty(t, sources[0].AgentVersion, "empty version should be omitted") + }) + + t.Run("hosted agent without instruction omits prompt source", func(t *testing.T) { + t.Parallel() + sources := eval_api.BuildGenerationSources( + string(agent_yaml.AgentKindHosted), "my-agent", "v1", "", nil, + ) + require.Len(t, sources, 1) + assert.Equal(t, "agent", sources[0].Type) + }) +} + +// --------------------------------------------------------------------------- +// evaluatorsFromFlags +// --------------------------------------------------------------------------- + +func TestEvaluatorsFromFlags(t *testing.T) { + t.Parallel() + + t.Run("passes through strings", func(t *testing.T) { + t.Parallel() + result := evaluatorsFromFlags([]string{"builtin.task_adherence", "my-custom"}) + require.Len(t, result, 2) + assert.Equal(t, "builtin.task_adherence", result[0]) + assert.Equal(t, "my-custom", result[1]) + }) + + t.Run("nil returns nil", func(t *testing.T) { + t.Parallel() + result := evaluatorsFromFlags(nil) + assert.Nil(t, result) + }) +} + +// --------------------------------------------------------------------------- +// buildOpenAIEvalRequest +// --------------------------------------------------------------------------- + +func TestBuildOpenAIEvalRequest(t *testing.T) { + t.Parallel() + + cfg := &evalConfig{ + Config: opteval.Config{ + Name: "smoke-core", + Agent: evalAgentRef{ + Name: "agent-1", + Version: "v1", + }, + DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, + Evaluators: []string{"builtin.quality"}, + }, + Options: &opteval.Options{EvalModel: "gpt-4o"}, + } + + req := buildOpenAIEvalRequest(cfg) + + assert.Equal(t, "smoke-core", req.Name) + assert.Equal(t, "agent-1", req.Metadata["azd_agent"]) + assert.Equal(t, "v1", req.Metadata["azd_agent_version"]) + require.NotNil(t, req.DataSourceConfig) + assert.Equal(t, "custom", req.DataSourceConfig.Type) + require.Len(t, req.TestingCriteria, 1) + assert.Equal(t, "azure_ai_evaluator", req.TestingCriteria[0].Type) + assert.Equal(t, "builtin.quality", req.TestingCriteria[0].EvaluatorName) + assert.Equal(t, "gpt-4o", req.TestingCriteria[0].InitializationParameters["model"]) + assert.Equal(t, "{{item.messages}}", req.TestingCriteria[0].DataMapping["messages"]) + assert.Equal(t, "{{item.query}}", req.TestingCriteria[0].DataMapping["query"]) + assert.Equal(t, "{{sample.output_text}}", req.TestingCriteria[0].DataMapping["response"]) +} + +func TestBuildOpenAIEvalRequest_WithDatasetFile(t *testing.T) { + t.Parallel() + + cfg := &evalConfig{ + Config: opteval.Config{ + Name: "test-eval", + Agent: evalAgentRef{Name: "agent-1"}, + DatasetFile: "tasks.jsonl", + }, + } + + req := buildOpenAIEvalRequest(cfg) + require.NotNil(t, req.DataSourceConfig) + assert.Equal(t, "custom", req.DataSourceConfig.Type) + assert.Empty(t, req.TestingCriteria) +} + +// --------------------------------------------------------------------------- +// resolveLocalDatasetFile +// --------------------------------------------------------------------------- + +func TestResolveLocalDatasetFile_Absolute(t *testing.T) { + t.Parallel() + dir := t.TempDir() + f := filepath.Join(dir, "tasks.jsonl") + require.NoError(t, os.WriteFile(f, []byte(`{"query":"hi"}`+"\n"), 0600)) + + result, err := resolveLocalDatasetFile(f, "/other") + require.NoError(t, err) + assert.Equal(t, f, result) +} + +func TestResolveLocalDatasetFile_Relative(t *testing.T) { + t.Parallel() + dir := t.TempDir() + f := filepath.Join(dir, "data.jsonl") + require.NoError(t, os.WriteFile(f, []byte(`{"query":"hi"}`+"\n"), 0600)) + + result, err := resolveLocalDatasetFile("data.jsonl", dir) + require.NoError(t, err) + assert.Equal(t, f, result) +} + +func TestResolveLocalDatasetFile_NotFound(t *testing.T) { + t.Parallel() + _, err := resolveLocalDatasetFile("missing.jsonl", t.TempDir()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "not accessible") +} + +// --------------------------------------------------------------------------- +// tryLoadExistingEvalConfig +// --------------------------------------------------------------------------- + +func TestTryLoadExistingEvalConfig_Found(t *testing.T) { + t.Parallel() + dir := t.TempDir() + cfgPath := filepath.Join(dir, "eval.yaml") + cfg := &evalConfig{ + Config: opteval.Config{ + Name: "smoke-core", + Agent: evalAgentRef{ + Name: "my-agent", + }, + DatasetFile: "data.jsonl", + Evaluators: []string{"quality"}, + }, + } + require.NoError(t, writeEvalConfig(cfgPath, cfg)) + + loaded, ok := tryLoadExistingEvalConfig(cfgPath) + require.True(t, ok) + assert.Equal(t, "smoke-core", loaded.Name) + assert.Equal(t, "my-agent", loaded.Agent.Name) + assert.Equal(t, []string{"quality"}, loaded.Evaluators) +} + +func TestTryLoadExistingEvalConfig_NotFound(t *testing.T) { + t.Parallel() + cfg, ok := tryLoadExistingEvalConfig(filepath.Join(t.TempDir(), "missing.yaml")) + assert.False(t, ok) + assert.Nil(t, cfg) +} + +func TestTryLoadExistingEvalConfig_InvalidYAML(t *testing.T) { + t.Parallel() + dir := t.TempDir() + cfgPath := filepath.Join(dir, "eval.yaml") + require.NoError(t, os.WriteFile(cfgPath, []byte(":\ninvalid: [yaml"), 0600)) + + cfg, ok := tryLoadExistingEvalConfig(cfgPath) + assert.False(t, ok) + assert.Nil(t, cfg) +} + +// --------------------------------------------------------------------------- +// eval_api.SplitEvaluators / eval_api.IsBuiltinEvaluator +// --------------------------------------------------------------------------- + +func TestIsBuiltinEvaluator(t *testing.T) { + t.Parallel() + tests := []struct { + name string + input string + expected bool + }{ + {"builtin prefix", "builtin.task_adherence", true}, + {"builtin prefix dot only", "builtin.", true}, + {"custom evaluator", "my-quality", false}, + {"empty string", "", false}, + {"similar prefix", "builtins.quality", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, eval_api.IsBuiltinEvaluator(tt.input)) + }) + } +} + +func TestSplitEvaluators(t *testing.T) { + t.Parallel() + tests := []struct { + name string + input []string + expectedGenerated []string + expectedBuiltin []string + }{ + { + "mixed list", + []string{"builtin.task_adherence", "my-quality", "builtin.safety"}, + []string{"my-quality"}, + []string{"builtin.task_adherence", "builtin.safety"}, + }, + { + "all builtin", + []string{"builtin.quality", "builtin.safety"}, + nil, + []string{"builtin.quality", "builtin.safety"}, + }, + { + "all generated", + []string{"smoke-core", "custom-1"}, + []string{"smoke-core", "custom-1"}, + nil, + }, + { + "empty list", + nil, + nil, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + generated, builtin := eval_api.SplitEvaluators(tt.input) + assert.Equal(t, tt.expectedGenerated, generated) + assert.Equal(t, tt.expectedBuiltin, builtin) + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go new file mode 100644 index 00000000000..47687de3d30 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "fmt" + "os" + "sync" + "text/tabwriter" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type evalListFlags struct { + limit int +} + +func newEvalListCommand() *cobra.Command { + flags := &evalListFlags{limit: 10} + cmd := &cobra.Command{ + Use: "list", + Short: "List evaluations for the current project.", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + logCleanup := setupDebugLogging(cmd.Flags()) + defer logCleanup() + return runEvalList(ctx, flags) + }, + } + cmd.Flags().IntVar(&flags.limit, "limit", 10, "Maximum number of evals to return") + return cmd +} + +// evalRunSummary holds the fetched run info for a single eval. +type evalRunSummary struct { + runCount int + lastRunStatus string +} + +func runEvalList(ctx context.Context, flags *evalListFlags) error { + resolved, err := resolveEvalContext(ctx, evalContextOptions{}) + if err != nil { + return err + } + defer resolved.azdClient.Close() + + // Load the active eval ID from the azd environment. + var activeEvalID string + if resolved.envName != "" { + state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + activeEvalID = state.EvalID + } + + resp, err := resolved.evalClient.ListOpenAIEvals(ctx, flags.limit, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to list evals: %w", err) + } + + items := resp.Data + + // Fetch run summaries in parallel for each eval. + summaries := make([]evalRunSummary, len(items)) + var wg sync.WaitGroup + for i, item := range items { + wg.Add(1) + go func(idx int, evalID string) { + defer wg.Done() + runs, err := resolved.evalClient.ListOpenAIEvalRuns(ctx, evalID, 10, DefaultAgentAPIVersion) + if err != nil || runs == nil { + return + } + summaries[idx].runCount = len(runs.Data) + if len(runs.Data) > 0 { + summaries[idx].lastRunStatus = runs.Data[0].Status + } + }(i, item.ResolvedID()) + } + wg.Wait() + + w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintln(w, " \tEval ID\tName\tStatus of last run\tRuns\tCreated by\tCreated on") + fmt.Fprintln(w, " \t-------\t----\t------------------\t----\t----------\t----------") + for i, item := range items { + marker := " " + if item.ResolvedID() == activeEvalID { + marker = "*" + } + name := item.Name + if name == "" { + name = item.ResolvedID() + } + status := padColorizedStatus(summaries[i].lastRunStatus) + createdBy := item.CreatedBy + createdOn := formatTimestamp(item.CreatedAt) + + fmt.Fprintf(w, "%s \t%s\t%s\t%s\t%d\t%s\t%s\n", + marker, + item.ResolvedID(), + name, + status, + summaries[i].runCount, + createdBy, + createdOn, + ) + } + if err := w.Flush(); err != nil { + return err + } + if activeEvalID != "" { + fmt.Printf("\n* = active eval in current environment\n") + } + fmt.Printf("(showing %d — use --limit to change)\n", len(items)) + return nil +} + +// padColorizedStatus returns a fixed-width colored status string so that +// tabwriter aligns columns correctly despite ANSI escape sequences. +func padColorizedStatus(status string) string { + const statusWidth = 10 // wide enough for "Completed", "Cancelled", etc. + label, colorFn := statusLabelAndColor(status) + padded := fmt.Sprintf("%-*s", statusWidth, label) + return colorFn(padded) +} + +// statusLabelAndColor maps a raw status to a display label and color function. +func statusLabelAndColor(status string) (string, func(string, ...any) string) { + switch status { + case "completed": + return "Completed", color.GreenString + case "succeeded": + return "Succeeded", color.GreenString + case "failed": + return "Failed", color.RedString + case "cancelled", "canceled": + return "Cancelled", color.YellowString + case "running", "in_progress": + return "Running", color.CyanString + case "partial": + return "Partial", color.YellowString + case "": + return "No runs", color.HiBlackString + default: + return status, fmt.Sprintf + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go new file mode 100644 index 00000000000..fbb7d8fabc5 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// newEvalListCommand — command shape +// --------------------------------------------------------------------------- + +func TestNewEvalListCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newEvalListCommand() + + f := cmd.Flags().Lookup("limit") + require.NotNil(t, f) + assert.Equal(t, "20", f.DefValue) +} + +func TestNewEvalListCommand_NoArgs(t *testing.T) { + t.Parallel() + cmd := newEvalListCommand() + assert.NoError(t, cmd.Args(cmd, nil)) + assert.Error(t, cmd.Args(cmd, []string{"extra"})) +} + +func TestNewEvalListCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newEvalListCommand() + assert.Equal(t, "list", cmd.Use) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go new file mode 100644 index 00000000000..298592fff4f --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -0,0 +1,210 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "net/url" + "os" + "strings" + + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type evalRunFlags struct { + config string +} + +func newEvalRunCommand() *cobra.Command { + flags := &evalRunFlags{config: defaultEvalConfigName} + cmd := &cobra.Command{ + Use: "run", + Short: "Execute an evaluation run from eval.yaml.", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + logCleanup := setupDebugLogging(cmd.Flags()) + defer logCleanup() + return runEvalRun(ctx, flags) + }, + } + cmd.Flags().StringVar(&flags.config, "config", defaultEvalConfigName, "Local eval config YAML") + return cmd +} + +func runEvalRun(ctx context.Context, flags *evalRunFlags) error { + resolved, err := resolveEvalContext(ctx, evalContextOptions{}) + if err != nil { + return err + } + defer resolved.azdClient.Close() + + configPath := resolveEvalConfigPath(flags.config, resolved.agentProject) + evalCfg, err := readEvalConfig(configPath) + if err != nil { + return err + } + if resolved.agentName == "" { + resolved.agentName = evalCfg.Agent.Name + } + if resolved.version == "" { + resolved.version = evalCfg.Agent.Version + } + + state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + + if state.InitStatus == "pending" { + if err := resumeEvalInit(ctx, resolved, configPath, evalCfg, state); err != nil { + return err + } + } + + evalID := state.EvalID + if evalID == "" { + created, err := resolved.evalClient.CreateOpenAIEval( + ctx, buildOpenAIEvalRequest(evalCfg), DefaultAgentAPIVersion, + ) + if err != nil { + return fmt.Errorf("failed to create eval: %w", err) + } + evalID = created.ResolvedID() + if evalID == "" { + evalID = evalCfg.Name + } + state.EvalID = evalID + if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + return err + } + } + + runReq := &eval_api.CreateOpenAIEvalRunRequest{ + Name: evalCfg.Name, + Metadata: map[string]string{"azd_agent": evalCfg.Agent.Name}, + } + + // Build agent target data source. + dataSource := eval_api.NewAgentTargetDataSource( + resolved.agentName, agentVersionPtr(resolved.version), + ) + + // Set source from local dataset file or remote dataset reference. + if evalCfg.DatasetFile != "" { + items, err := loadEvalDatasetFile(evalCfg.DatasetFile) + if err != nil { + return err + } + dataSource.SetFileContent(items) + } else if evalCfg.DatasetReference != nil { + fileID := buildDatasetFileID(resolved.projectEndpoint, evalCfg.DatasetReference) + dataSource.SetFileID(fileID) + } else { + return fmt.Errorf("no dataset configured; run 'azd ai agent eval init' or specify dataset_file / dataset_reference in the eval config") + } + + runReq.DataSource = dataSource + + run, err := resolved.evalClient.CreateOpenAIEvalRun( + ctx, + evalID, + runReq, + DefaultAgentAPIVersion, + ) + if err != nil { + return fmt.Errorf("failed to start eval run: %w", err) + } + + fmt.Println(color.GreenString("Eval run started")) + fmt.Printf(" Eval: %s\n", evalID) + if run.ID != "" { + fmt.Printf(" Run: %s\n", run.ID) + } + if run.ReportURL != "" { + fmt.Printf(" Report: %s\n", run.ReportURL) + } + return nil +} + +// loadEvalDatasetFile reads a JSONL file and returns each line as a map. +func loadEvalDatasetFile(path string) ([]map[string]any, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) + } + defer f.Close() + + var items []map[string]any + scanner := bufio.NewScanner(f) + lineNum := 0 + for scanner.Scan() { + lineNum++ + line := scanner.Text() + if line == "" { + continue + } + var item map[string]any + if err := json.Unmarshal([]byte(line), &item); err != nil { + return nil, fmt.Errorf("failed to parse dataset line %d: %w", lineNum, err) + } + items = append(items, item) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading dataset file %s: %w", path, err) + } + if len(items) == 0 { + return nil, fmt.Errorf("dataset file %s contains no items", path) + } + return items, nil +} + +// buildDatasetFileID constructs an azureai:// URI for a remote dataset reference. +// Format: azureai://accounts//projects//data//versions/ +// The account and project are extracted from the project endpoint URL +// (https://.services.ai.azure.com/api/projects/). +func buildDatasetFileID(projectEndpoint string, ref *opteval.DatasetRef) string { + account, project := parseProjectEndpoint(projectEndpoint) + version := ref.Version + if version == "" { + version = "1" + } + return fmt.Sprintf("azureai://accounts/%s/projects/%s/data/%s/versions/%s", + account, project, ref.Name, version) +} + +// parseProjectEndpoint extracts account and project names from a Foundry project endpoint URL. +func parseProjectEndpoint(endpoint string) (account, project string) { + u, err := url.Parse(endpoint) + if err != nil { + return "", "" + } + // Host format: .services.ai.azure.com + host := u.Hostname() + if idx := strings.Index(host, "."); idx > 0 { + account = host[:idx] + } + // Path format: /api/projects/ + parts := strings.Split(strings.Trim(u.Path, "/"), "/") + for i, p := range parts { + if p == "projects" && i+1 < len(parts) { + project = parts[i+1] + break + } + } + return account, project +} + +// agentVersionPtr returns a pointer to the version string, or nil if empty. +func agentVersionPtr(version string) *string { + if version == "" { + return nil + } + return &version +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go new file mode 100644 index 00000000000..e50479be7f7 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go @@ -0,0 +1,149 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// newEvalRunCommand — command shape +// --------------------------------------------------------------------------- + +func TestNewEvalRunCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newEvalRunCommand() + + f := cmd.Flags().Lookup("config") + require.NotNil(t, f) + assert.Equal(t, defaultEvalConfigName, f.DefValue) +} + +func TestNewEvalRunCommand_NoArgs(t *testing.T) { + t.Parallel() + cmd := newEvalRunCommand() + assert.NoError(t, cmd.Args(cmd, nil)) + assert.Error(t, cmd.Args(cmd, []string{"extra"})) +} + +func TestNewEvalRunCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newEvalRunCommand() + assert.Equal(t, "run", cmd.Use) +} + +// --------------------------------------------------------------------------- +// loadEvalDatasetFile +// --------------------------------------------------------------------------- + +func TestLoadEvalDatasetFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + f := filepath.Join(dir, "data.jsonl") + content := "{\"query\":\"hello\",\"id\":\"1\"}\n{\"query\":\"world\",\"id\":\"2\"}\n" + require.NoError(t, os.WriteFile(f, []byte(content), 0600)) + + items, err := loadEvalDatasetFile(f) + require.NoError(t, err) + require.Len(t, items, 2) + assert.Equal(t, "hello", items[0]["query"]) + assert.Equal(t, "2", items[1]["id"]) +} + +func TestLoadEvalDatasetFile_Empty(t *testing.T) { + t.Parallel() + dir := t.TempDir() + f := filepath.Join(dir, "empty.jsonl") + require.NoError(t, os.WriteFile(f, []byte(""), 0600)) + + _, err := loadEvalDatasetFile(f) + assert.Error(t, err) + assert.Contains(t, err.Error(), "contains no items") +} + +func TestLoadEvalDatasetFile_NotFound(t *testing.T) { + t.Parallel() + _, err := loadEvalDatasetFile("/nonexistent/data.jsonl") + assert.Error(t, err) +} + +// --------------------------------------------------------------------------- +// parseProjectEndpoint +// --------------------------------------------------------------------------- + +func TestParseProjectEndpoint(t *testing.T) { + t.Parallel() + tests := []struct { + name string + endpoint string + expectedAccount string + expectedProject string + }{ + { + "standard endpoint", + "https://foundryljm7.services.ai.azure.com/api/projects/projectljm7", + "foundryljm7", + "projectljm7", + }, + { + "endpoint with trailing slash", + "https://myaccount.services.ai.azure.com/api/projects/myproject/", + "myaccount", + "myproject", + }, + { + "empty string", + "", + "", + "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + account, project := parseProjectEndpoint(tt.endpoint) + assert.Equal(t, tt.expectedAccount, account) + assert.Equal(t, tt.expectedProject, project) + }) + } +} + +// --------------------------------------------------------------------------- +// buildDatasetFileID +// --------------------------------------------------------------------------- + +func TestBuildDatasetFileID(t *testing.T) { + t.Parallel() + tests := []struct { + name string + endpoint string + ref *opteval.DatasetRef + expected string + }{ + { + "with version", + "https://foundryljm7.services.ai.azure.com/api/projects/projectljm7", + &opteval.DatasetRef{Name: "bugbash-mt-sim-scenarios", Version: "1"}, + "azureai://accounts/foundryljm7/projects/projectljm7/data/bugbash-mt-sim-scenarios/versions/1", + }, + { + "default version", + "https://myaccount.services.ai.azure.com/api/projects/myproject", + &opteval.DatasetRef{Name: "my-dataset"}, + "azureai://accounts/myaccount/projects/myproject/data/my-dataset/versions/1", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := buildDatasetFileID(tt.endpoint, tt.ref) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go new file mode 100644 index 00000000000..cf59343b8d0 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "fmt" + "os" + "text/tabwriter" + + "azureaiagent/internal/pkg/agents/eval_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type evalShowFlags struct { + evalRunID string + limit int + output string +} + +func newEvalShowCommand() *cobra.Command { + flags := &evalShowFlags{limit: 20} + cmd := &cobra.Command{ + Use: "show [eval-id]", + Short: "Show an eval definition, run history, or run details.", + Long: `Show an eval definition, run history, or run details. + +If eval-id is omitted, the most recent eval from the current environment is used.`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + logCleanup := setupDebugLogging(cmd.Flags()) + defer logCleanup() + + var evalID string + if len(args) > 0 { + evalID = args[0] + } + return runEvalShow(ctx, evalID, flags) + }, + } + cmd.Flags().StringVar(&flags.evalRunID, "eval-run-id", "", "Show details for a specific eval run") + cmd.Flags().IntVar(&flags.limit, "limit", 20, "Maximum number of runs to show") + cmd.Flags().StringVarP(&flags.output, "out-file", "O", "", "Export full run results to a JSON file") + return cmd +} + +func runEvalShow(ctx context.Context, evalID string, flags *evalShowFlags) error { + resolved, err := resolveEvalContext(ctx, evalContextOptions{}) + if err != nil { + return err + } + defer resolved.azdClient.Close() + + // Fall back to the eval ID stored in the azd environment. + if evalID == "" && resolved.envName != "" { + state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + evalID = state.EvalID + } + if evalID == "" { + return fmt.Errorf("no eval-id provided and none found in the current environment; run 'azd ai agent eval init' first or pass an eval-id") + } + + if flags.evalRunID != "" { + run, err := resolved.evalClient.GetOpenAIEvalRun(ctx, evalID, flags.evalRunID, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to get eval run: %w", err) + } + if flags.output != "" { + return writeJSONFile(flags.output, run) + } + return printEvalRunSummary(evalID, run) + } + + evalObj, err := resolved.evalClient.GetOpenAIEval(ctx, evalID, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to get eval: %w", err) + } + runs, err := resolved.evalClient.ListOpenAIEvalRuns(ctx, evalID, flags.limit, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to list eval runs: %w", err) + } + if flags.output != "" { + return writeJSONFile(flags.output, map[string]any{ + "eval": evalObj, + "runs": runs.Data, + }) + } + return printEvalSummary(evalObj, runs.Data, flags.limit) +} + +func printEvalSummary(evalObj *eval_api.OpenAIEval, runs []eval_api.OpenAIEvalRun, limit int) error { + fmt.Printf("Eval: %s\n", evalObj.ResolvedID()) + if evalObj.Name != "" { + fmt.Printf("Name: %s\n", evalObj.Name) + } + if agent := evalObj.Metadata["azd_agent"]; agent != "" { + fmt.Printf("Agent: %s\n", agent) + } + fmt.Printf("Created: %s\n", formatTimestamp(evalObj.CreatedAt)) + if evalObj.CreatedBy != "" { + fmt.Printf("Created by: %s\n", evalObj.CreatedBy) + } + fmt.Printf("Runs: %d\n\n", len(runs)) + fmt.Println("Recent runs:") + w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintln(w, " Run ID\tStatus\tPassed\tFailed\tCreated") + fmt.Fprintln(w, " ------\t------\t------\t------\t-------") + for _, run := range runs { + passed, failed := "", "" + if run.ResultCounts != nil { + passed = fmt.Sprintf("%d/%d", run.ResultCounts.Passed, run.ResultCounts.Total) + failed = fmt.Sprintf("%d", run.ResultCounts.Failed) + } + fmt.Fprintf(w, " %s\t%s\t%s\t%s\t%s\n", + run.ID, + run.Status, + passed, + failed, + formatTimestamp(run.CreatedAt), + ) + } + if err := w.Flush(); err != nil { + return err + } + fmt.Printf("\n(showing %d of %d runs — use --limit to change)\n", min(limit, len(runs)), len(runs)) + return nil +} + +func printEvalRunSummary(evalID string, run *eval_api.OpenAIEvalRun) error { + fmt.Printf("Eval: %s\n", evalID) + fmt.Printf("Run: %s\n", run.ID) + if run.Name != "" { + fmt.Printf("Name: %s\n", run.Name) + } + fmt.Printf("Status: %s\n", run.Status) + fmt.Printf("Created: %s\n", formatTimestamp(run.CreatedAt)) + if run.CreatedBy != "" { + fmt.Printf("Created by: %s\n", run.CreatedBy) + } + + // Agent target info from data source. + if run.DataSource != nil && run.DataSource.Target != nil { + agent := run.DataSource.Target.Name + if run.DataSource.Target.Version != nil { + agent += " v" + *run.DataSource.Target.Version + } + fmt.Printf("Agent: %s\n", agent) + } + + // Result counts. + if rc := run.ResultCounts; rc != nil { + fmt.Printf("\nResults: %d total, %s passed, %s failed, %s errored\n", + rc.Total, + color.GreenString("%d", rc.Passed), + color.RedString("%d", rc.Failed), + color.YellowString("%d", rc.Errored), + ) + } + + // Per-criteria breakdown. + if len(run.PerTestingCriteria) > 0 { + fmt.Println("\nPer-criteria results:") + for _, c := range run.PerTestingCriteria { + fmt.Printf(" %s: %s passed, %s failed, %s errored\n", + c.TestingCriteria, + color.GreenString("%d", c.Passed), + color.RedString("%d", c.Failed), + color.YellowString("%d", c.Errored), + ) + } + } + + if run.ReportURL != "" { + fmt.Printf("\nReport: %s\n", run.ReportURL) + } + return nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go new file mode 100644 index 00000000000..ce8211963d7 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -0,0 +1,531 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/dataset_api" + "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeTokenCredential satisfies azcore.TokenCredential for tests. +type fakeTokenCredential struct{} + +func (f *fakeTokenCredential) GetToken( + _ context.Context, + _ policy.TokenRequestOptions, +) (azcore.AccessToken, error) { + return azcore.AccessToken{Token: "fake-token"}, nil +} + +// --------------------------------------------------------------------------- +// newEvalCommand — command tree shape +// --------------------------------------------------------------------------- + +func TestNewEvalCommand_HasExpectedSubcommands(t *testing.T) { + t.Parallel() + + cmd := newEvalCommand(&azdext.ExtensionContext{}) + names := make([]string, 0, len(cmd.Commands())) + for _, sub := range cmd.Commands() { + names = append(names, sub.Name()) + } + + assert.Contains(t, names, "init") + assert.Contains(t, names, "run") + assert.Contains(t, names, "list") + assert.Contains(t, names, "show") +} + +func TestNewEvalCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newEvalCommand(&azdext.ExtensionContext{}) + assert.Equal(t, "eval ", cmd.Use) +} + +// --------------------------------------------------------------------------- +// GenerationJob methods +// --------------------------------------------------------------------------- + +func TestGenerationJob_OperationID(t *testing.T) { + t.Parallel() + assert.Equal(t, "op-123", (&eval_api.GenerationJob{ID: "op-123"}).OperationID()) + assert.Equal(t, "", (&eval_api.GenerationJob{}).OperationID()) +} + +func TestGenerationJob_NormalizedStatus(t *testing.T) { + t.Parallel() + assert.Equal(t, "completed", (&eval_api.GenerationJob{Status: "completed"}).NormalizedStatus()) + assert.Equal(t, "running", (&eval_api.GenerationJob{}).NormalizedStatus()) +} + +func TestGenerationJob_ResolvedDatasetName(t *testing.T) { + t.Parallel() + assert.Equal(t, "ds-1", (&eval_api.GenerationJob{DatasetName: "ds-1", Name: "fallback"}).ResolvedDatasetName()) + assert.Equal(t, "fallback", (&eval_api.GenerationJob{Name: "fallback"}).ResolvedDatasetName()) + assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedDatasetName()) + + // Extracts name from the result JSON when dataset_name and name are empty. + job := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"generated-ds","version":"v2"}`), + } + assert.Equal(t, "generated-ds", job.ResolvedDatasetName()) +} + +func TestGenerationJob_ResolvedDatasetVersion(t *testing.T) { + t.Parallel() + assert.Equal(t, "v2", (&eval_api.GenerationJob{DatasetVersion: "v2"}).ResolvedDatasetVersion()) + assert.Equal(t, "v3", (&eval_api.GenerationJob{Version: "v3"}).ResolvedDatasetVersion()) + assert.Equal(t, "v1", (&eval_api.GenerationJob{}).ResolvedDatasetVersion()) + + // Extracts version from the result JSON when dataset_version and version are empty. + job := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"ds","version":"v5"}`), + } + assert.Equal(t, "v5", job.ResolvedDatasetVersion()) +} + +func TestGenerationJob_ResolvedEvaluatorName(t *testing.T) { + t.Parallel() + assert.Equal(t, "quality", (&eval_api.GenerationJob{EvaluatorName: "quality", Name: "fb"}).ResolvedEvaluatorName()) + assert.Equal(t, "fb", (&eval_api.GenerationJob{Name: "fb"}).ResolvedEvaluatorName()) + assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedEvaluatorName()) + + // Extracts name from the result JSON when evaluator_name and name are empty. + job := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), + } + assert.Equal(t, "smoke-core", job.ResolvedEvaluatorName()) +} + +func TestOpenAIEval_ResolvedID(t *testing.T) { + t.Parallel() + assert.Equal(t, "eval-1", (&eval_api.OpenAIEval{ID: "eval-1", Name: "n"}).ResolvedID()) + assert.Equal(t, "n", (&eval_api.OpenAIEval{Name: "n"}).ResolvedID()) + assert.Equal(t, "", (&eval_api.OpenAIEval{}).ResolvedID()) +} + +// --------------------------------------------------------------------------- +// formatAny / formatTimestamp +// --------------------------------------------------------------------------- + +func TestFormatAny(t *testing.T) { + t.Parallel() + + assert.Equal(t, "", formatAny(nil)) + assert.Equal(t, "hello", formatAny("hello")) + assert.Equal(t, "42", formatAny(float64(42))) + assert.Equal(t, "true", formatAny(true)) +} + +func TestFormatTimestamp(t *testing.T) { + t.Parallel() + + assert.Equal(t, "2024-01-15 10:30 UTC", formatTimestamp("2024-01-15 10:30 UTC")) + assert.Contains(t, formatTimestamp(float64(1705312200)), "2024-01-15") + assert.Contains(t, formatTimestamp(int64(1705312200)), "2024-01-15") + assert.Equal(t, "", formatTimestamp(nil)) + assert.Equal(t, "", formatTimestamp(true)) +} + +// --------------------------------------------------------------------------- +// resolveEvalOutputPath / resolveEvalConfigPath +// --------------------------------------------------------------------------- + +func TestResolveEvalOutputPath(t *testing.T) { + t.Parallel() + + t.Run("absolute path returned as-is", func(t *testing.T) { + t.Parallel() + abs := filepath.Join(os.TempDir(), "eval.yaml") + assert.Equal(t, abs, resolveEvalOutputPath(abs, "/project")) + }) + + t.Run("relative path joined with agent project", func(t *testing.T) { + t.Parallel() + result := resolveEvalOutputPath("eval.yaml", "/project/agent") + assert.Equal(t, filepath.Join("/project/agent", "eval.yaml"), result) + }) +} + +func TestResolveEvalConfigPath(t *testing.T) { + t.Parallel() + + t.Run("absolute path returned as-is", func(t *testing.T) { + t.Parallel() + abs := filepath.Join(os.TempDir(), "eval.yaml") + assert.Equal(t, abs, resolveEvalConfigPath(abs, "/project")) + }) + + t.Run("relative path joined with agent project when file does not exist", func(t *testing.T) { + t.Parallel() + result := resolveEvalConfigPath("nonexistent.yaml", "/project/agent") + assert.Equal(t, filepath.Join("/project/agent", "nonexistent.yaml"), result) + }) +} + +// --------------------------------------------------------------------------- +// detectEvalAgentKind +// --------------------------------------------------------------------------- + +func TestDetectEvalAgentKind(t *testing.T) { + t.Parallel() + + t.Run("detects hosted kind from agent.yaml", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + writeTestFile(t, dir, "agent.yaml", "kind: hosted\nname: test-agent\n") + kind, path := detectEvalAgentKind(dir) + assert.Equal(t, agent_yaml.AgentKindHosted, kind) + assert.Equal(t, filepath.Join(dir, "agent.yaml"), path) + }) + + t.Run("detects prompt kind from agent.yml", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + writeTestFile(t, dir, "agent.yml", "kind: prompt\nname: test-agent\n") + kind, path := detectEvalAgentKind(dir) + assert.Equal(t, agent_yaml.AgentKindPrompt, kind) + assert.Equal(t, filepath.Join(dir, "agent.yml"), path) + }) + + t.Run("returns empty for missing manifest", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + kind, path := detectEvalAgentKind(dir) + assert.Empty(t, kind) + assert.Empty(t, path) + }) + + t.Run("returns empty for invalid kind", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + writeTestFile(t, dir, "agent.yaml", "kind: invalid_kind_xyz\nname: test-agent\n") + kind, path := detectEvalAgentKind(dir) + assert.Empty(t, kind) + assert.Empty(t, path) + }) + + t.Run("returns empty for malformed YAML", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + writeTestFile(t, dir, "agent.yaml", "{{invalid yaml}}") + kind, path := detectEvalAgentKind(dir) + assert.Empty(t, kind) + assert.Empty(t, path) + }) +} + +// --------------------------------------------------------------------------- +// ensureFoundryDirs +// --------------------------------------------------------------------------- + +func TestEnsureFoundryDirs(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + err := ensureFoundryDirs(dir) + require.NoError(t, err) + + for _, sub := range []string{"datasets", "evaluators", "results"} { + path := filepath.Join(dir, ".azure", ".foundry", sub) + info, err := os.Stat(path) + require.NoError(t, err, "expected %s to exist", sub) + assert.True(t, info.IsDir()) + } +} + +func TestEnsureFoundryDirs_Idempotent(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + require.NoError(t, ensureFoundryDirs(dir)) + require.NoError(t, ensureFoundryDirs(dir)) +} + +// --------------------------------------------------------------------------- +// evalState — stored in azd environment (integration-tested via eval init/run) +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// writeEvalReviewArtifacts +// --------------------------------------------------------------------------- + +func TestWriteEvalReviewArtifacts(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + cfg := &evalConfig{} + cfg.DatasetReference = &evalDatasetRef{Name: "test-data", Version: "v1"} + cfg.Evaluators = []string{"quality"} + + writeEvalReviewArtifacts(dir, cfg) + + // writeEvalReviewArtifacts only writes evaluator stubs; dataset download + // is handled separately by downloadDatasetArtifact. + dsPath := filepath.Join(dir, ".azure", ".foundry", "datasets", "test-data-v1.jsonl") + assert.NoFileExists(t, dsPath) + + evPath := filepath.Join(dir, ".azure", ".foundry", "evaluators", "quality.yaml") + assert.FileExists(t, evPath) +} + +func TestWriteEvalReviewArtifacts_NilDataset(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + cfg := &evalConfig{} + // No dataset reference — should not panic. + writeEvalReviewArtifacts(dir, cfg) +} + +// --------------------------------------------------------------------------- +// saveEvaluatorResult +// --------------------------------------------------------------------------- + +func TestSaveEvaluatorResult(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + result := json.RawMessage(`{"name":"smoke-core","description":"An evaluator"}`) + saveEvaluatorResult(dir, "smoke-core", result) + + path := filepath.Join(dir, ".azure", ".foundry", "evaluators", "smoke-core.json") + assert.FileExists(t, path) + data, err := os.ReadFile(path) + require.NoError(t, err) + assert.Contains(t, string(data), `"name": "smoke-core"`) + assert.Contains(t, string(data), `"description": "An evaluator"`) +} + +func TestSaveEvaluatorResult_NilResult(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + saveEvaluatorResult(dir, "test", nil) + path := filepath.Join(dir, ".azure", ".foundry", "evaluators", "test.json") + assert.NoFileExists(t, path) +} + +func TestSaveEvaluatorResult_EmptyName(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + saveEvaluatorResult(dir, "", json.RawMessage(`{"name":"x"}`)) + // Should not create any file. + matches, _ := filepath.Glob(filepath.Join(dir, ".azure", ".foundry", "evaluators", "*.json")) + assert.Empty(t, matches) +} + +func TestSaveDatasetGenerationResult(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + result := json.RawMessage(`{"name":"my-dataset","version":"v2"}`) + saveDatasetGenerationResult(dir, "my-dataset", result) + + path := filepath.Join(dir, ".azure", ".foundry", "datasets", "my-dataset.json") + assert.FileExists(t, path) + data, err := os.ReadFile(path) + require.NoError(t, err) + assert.Contains(t, string(data), `"name": "my-dataset"`) + assert.Contains(t, string(data), `"version": "v2"`) +} + +func TestWriteEvalReviewArtifacts_SkipsWhenResultExists(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + // Pre-save a result file. + saveEvaluatorResult(dir, "quality", json.RawMessage(`{"name":"quality"}`)) + + cfg := &evalConfig{} + cfg.Evaluators = []string{"quality"} + writeEvalReviewArtifacts(dir, cfg) + + // Should NOT create a .yaml stub since .json result already exists. + yamlPath := filepath.Join(dir, ".azure", ".foundry", "evaluators", "quality.yaml") + assert.NoFileExists(t, yamlPath) +} + +// --------------------------------------------------------------------------- +// downloadDatasetArtifact +// --------------------------------------------------------------------------- + +func TestDownloadDatasetArtifact_NilDataset(t *testing.T) { + t.Parallel() + err := downloadDatasetArtifact(t.Context(), nil, t.TempDir(), nil, "2025-11-15-preview") + require.NoError(t, err) +} + +func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { + t.Parallel() + + // The Azure SDK bearer token policy rejects non-TLS test servers, so the + // credential call will fail. downloadDatasetArtifact gracefully writes a + // placeholder in that case — verify the placeholder is created. + apiServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"sas_uri":"http://blob.example/data?sig=abc"}`)) + })) + t.Cleanup(apiServer.Close) + + client := dataset_api.NewDatasetClient(apiServer.URL, &fakeTokenCredential{}) + dir := t.TempDir() + require.NoError(t, ensureFoundryDirs(dir)) + + ref := &evalDatasetRef{Name: "test-ds", Version: "v1"} + err := downloadDatasetArtifact(t.Context(), client, dir, ref, "2025-11-15-preview") + require.NoError(t, err) + + // Placeholder is written when credential fetch fails (non-TLS test server). + dest := datasetArtifactPath(dir, ref) + assert.FileExists(t, dest) + data, err := os.ReadFile(dest) + require.NoError(t, err) + assert.Equal(t, "{}\n", string(data)) +} + +// --------------------------------------------------------------------------- +// datasetArtifactPath +// --------------------------------------------------------------------------- + +func TestDatasetArtifactPath(t *testing.T) { + t.Parallel() + ref := &evalDatasetRef{Name: "golden", Version: "v2"} + result := datasetArtifactPath("/project", ref) + assert.Equal(t, filepath.Join("/project", ".azure", ".foundry", "datasets", "golden-v2.jsonl"), result) +} + +// --------------------------------------------------------------------------- +// writeJSONFile +// --------------------------------------------------------------------------- + +func TestWriteJSONFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "result.json") + + err := writeJSONFile(path, map[string]string{"hello": "world"}) + require.NoError(t, err) + + data, err := os.ReadFile(path) + require.NoError(t, err) + assert.Contains(t, string(data), `"hello": "world"`) +} + +// --------------------------------------------------------------------------- +// evalAgentContextError +// --------------------------------------------------------------------------- + +func TestEvalAgentContextError(t *testing.T) { + t.Parallel() + + t.Run("without cause", func(t *testing.T) { + t.Parallel() + err := evalAgentContextError(nil) + assert.Contains(t, err.Error(), "agent context could not be resolved") + var localErr *azdext.LocalError + require.True(t, errors.As(err, &localErr)) + assert.Contains(t, localErr.Suggestion, "azd ai agent init") + }) + + t.Run("with cause", func(t *testing.T) { + t.Parallel() + cause := assert.AnError + err := evalAgentContextError(cause) + assert.Contains(t, err.Error(), cause.Error()) + var localErr *azdext.LocalError + require.True(t, errors.As(err, &localErr)) + assert.Contains(t, localErr.Suggestion, "--agent") + assert.Contains(t, localErr.Suggestion, "--project-endpoint") + }) +} + +// --------------------------------------------------------------------------- +// relPathForYaml +// --------------------------------------------------------------------------- + +func TestRelPathForYaml(t *testing.T) { + t.Parallel() + + result := relPathForYaml("/project", filepath.Join("/project", "src", "agent.yaml")) + assert.Equal(t, "src/agent.yaml", result) +} + +// --------------------------------------------------------------------------- +// writeEvalConfig / readEvalConfig round-trip +// --------------------------------------------------------------------------- + +func TestEvalConfigRoundTrip(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "eval.yaml") + + original := &evalConfig{ + Config: opteval.Config{ + Name: "smoke-core", + Agent: evalAgentRef{ + Name: "my-agent", + Kind: agent_yaml.AgentKindHosted, + Version: "v1", + }, + DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, + Evaluators: []string{"builtin.task_adherence"}, + }, + Options: &opteval.Options{ + EvalModel: "gpt-4o", + }, + GenerationInstruction: "Test this agent", + MaxSamples: 50, + } + + err := writeEvalConfig(path, original) + require.NoError(t, err) + + loaded, err := readEvalConfig(path) + require.NoError(t, err) + + assert.Equal(t, original.Name, loaded.Name) + assert.Equal(t, original.Agent.Name, loaded.Agent.Name) + assert.Equal(t, original.Agent.Kind, loaded.Agent.Kind) + assert.Equal(t, original.Agent.Version, loaded.Agent.Version) + assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) + assert.Equal(t, original.GenerationInstruction, loaded.GenerationInstruction) + assert.Equal(t, original.MaxSamples, loaded.MaxSamples) + require.NotNil(t, loaded.DatasetReference) + assert.Equal(t, "ds", loaded.DatasetReference.Name) + require.Len(t, loaded.Evaluators, 1) + assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0]) +} + +func TestReadEvalConfig_MissingFile(t *testing.T) { + t.Parallel() + _, err := readEvalConfig("/nonexistent/path/eval.yaml") + assert.Error(t, err) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go new file mode 100644 index 00000000000..9a8c88b49e6 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -0,0 +1,393 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +// optimizeAgentContext holds the resolved agent name and project directory. +type optimizeAgentContext struct { + agentName string + agentProject string // project directory path (empty if not resolved from azd project) +} + +// resolveOptimizeAgent resolves the agent name and project directory using: +// 1. Explicit --agent flag +// 2. azd project context (resolveAgentService + environment variables) +// 3. Error with guidance +func resolveOptimizeAgent(ctx context.Context, flagValue string, noPrompt bool) (*optimizeAgentContext, error) { + if flagValue != "" { + return &optimizeAgentContext{agentName: flagValue}, nil + } + + // Try resolving from azd project — single resolveAgentService call + // to get both project path and agent info from environment. + azdClient, err := azdext.NewAzdClient() + if err == nil { + defer azdClient.Close() + + svc, project, svcErr := resolveAgentService(ctx, azdClient, "", noPrompt) + if svcErr == nil && svc != nil && project != nil { + agentProject := filepath.Join(project.Path, svc.RelativePath) + serviceKey := toServiceKey(svc.Name) + + // Read agent name from azd environment + envResp, envErr := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if envErr == nil && envResp.Environment != nil { + nameKey := fmt.Sprintf("AGENT_%s_NAME", serviceKey) + if v, e := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envResp.Environment.Name, + Key: nameKey, + }); e == nil && v.Value != "" { + return &optimizeAgentContext{ + agentName: v.Value, + agentProject: agentProject, + }, nil + } + } + } + } + + return nil, fmt.Errorf("agent name is required: use --agent , or run from an azd project after 'azd deploy'") +} + +type optimizeFlags struct { + configFile string + agent string + evalModel string + strategies []string + noWait bool + watch bool + pollInterval int + optimizeConnectionFlags +} + +func newOptimizeCommand(extCtx *azdext.ExtensionContext) *cobra.Command { + flags := &optimizeFlags{} + action := &OptimizeAction{flags: flags, noPrompt: extCtx.NoPrompt} + + cmd := &cobra.Command{ + Use: "optimize [agent-name]", + Short: "Evaluate and optimize AI agents.", + Long: `Evaluate and optimize AI agents — baseline scoring and iterative improvement. + +When run without a subcommand, submits an optimization job. +Use --config for a custom YAML spec, or just provide the agent name to use sensible defaults.`, + Example: ` # Optimize (auto-detect agent from azd project) + azd ai agent optimize + + # Optimize a specific agent + azd ai agent optimize my-agent + + # Optimize with skill strategy + azd ai agent optimize --strategy skill + + # Optimize with both strategies + azd ai agent optimize --strategy instruction --strategy skill + + # Full control via config file + azd ai agent optimize --config spec.yaml + + # Subcommands + azd ai agent optimize status --watch + azd ai agent optimize list + azd ai agent optimize cancel + azd ai agent optimize deploy --candidate `, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + setupDebugLogging(cmd.Flags()) + + // Positional arg fills in agent name + if len(args) > 0 && flags.agent == "" { + flags.agent = args[0] + } + + return action.Run(ctx, cmd) + }, + } + + cmd.Flags().StringVarP(&flags.configFile, "config", "c", "", "Path to YAML config file (optional — uses defaults if omitted)") + cmd.Flags().StringVarP(&flags.agent, "agent", "a", "", "Agent name (auto-detected from azd project if omitted)") + cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", "gpt-4.1-mini", "Model for evaluation") + cmd.Flags().StringArrayVarP(&flags.strategies, "strategy", "s", nil, "Optimization strategy: instruction, skill (repeatable)") + cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit job and return immediately without waiting for completion") + cmd.Flags().BoolVar(&flags.watch, "watch", true, "Watch for job completion (opposite of --no-wait)") + cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") + flags.optimizeConnectionFlags.register(cmd) + + cmd.AddCommand(newOptimizeStatusCommand()) + cmd.AddCommand(newOptimizeListCommand()) + cmd.AddCommand(newOptimizeCancelCommand()) + cmd.AddCommand(newOptimizeApplyCommand(extCtx)) + cmd.AddCommand(newOptimizeDeployCommand()) + + return cmd +} + +// OptimizeAction implements the optimize (submit job) command. +type OptimizeAction struct { + flags *optimizeFlags + noPrompt bool +} + +func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { + endpoint, err := a.flags.resolve(ctx) + if err != nil { + return err + } + + var cfg *OptimizeConfig + configSource := "" // tracks where the config came from for user messaging + + if a.flags.configFile != "" { + cfg, err = LoadOptimizeConfig(a.flags.configFile) + if err != nil { + return fmt.Errorf("%w\n\nCheck that the file path is correct and contains valid YAML", err) + } + configSource = a.flags.configFile + } else { + resolved, err := resolveOptimizeAgent(ctx, a.flags.agent, a.noPrompt) + if err != nil { + return err + } + + // Check if eval.yaml exists in the agent project and offer to use it + if resolved.agentProject != "" { + evalPath := filepath.Join(resolved.agentProject, defaultEvalConfigName) + if _, statErr := os.Stat(evalPath); statErr == nil && !a.noPrompt { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr == nil { + defer azdClient.Close() + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: fmt.Sprintf("Found %s in project. Use it for optimization?", defaultEvalConfigName), + DefaultValue: new(true), + }, + }) + if promptErr == nil && resp.Value != nil && *resp.Value { + cfg, err = LoadOptimizeConfig(evalPath) + if err != nil { + return fmt.Errorf("failed to load %s: %w", evalPath, err) + } + configSource = evalPath + } + } + } + } + + if cfg == nil { + cfg = defaultOptimizeConfig(resolved.agentName) + } + } + + if err := cfg.Validate(); err != nil { + return fmt.Errorf("invalid config: %w", err) + } + + // CLI flags override config values + if a.flags.evalModel != "" { + cfg.Options.EvalModel = a.flags.evalModel + } + if len(a.flags.strategies) > 0 { + cfg.Options.Strategies = a.flags.strategies + } + + out := cmd.OutOrStdout() + bold := color.New(color.Bold) + + bold.Fprintf(out, "Optimizing agent %q...\n", cfg.Agent.Name) + if configSource == "" { + fmt.Fprintf(out, " Dataset: built-in (3 tasks, 12 criteria)\n") + } else { + fmt.Fprintf(out, " Config: %s\n", configSource) + } + + credential, err := newAgentCredential() + if err != nil { + return err + } + + client := optimize_api.NewOptimizeClient(endpoint, credential) + + optimizeReq, err := cfg.ToRequest(endpoint) + if err != nil { + return fmt.Errorf("failed to build optimization request: %w", err) + } + + resp, err := client.StartOptimize(ctx, optimizeReq) + if err != nil { + return fmt.Errorf("failed to submit optimization job: %w\n\nCheck that the endpoint %q is reachable", err, endpoint) + } + + fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(resp.OperationID)) + fmt.Fprintf(out, " Status: %s\n\n", resp.Status) + + // Store last operation ID in azd environment for use by status/deploy + saveLastOptimizeJobID(ctx, resp.OperationID) + + if !a.flags.noWait && !optimize_api.IsTerminal(resp.Status) { + finalStatus, err := pollOptimizeJob(cmd, client, a.flags.pollInterval, resp.OperationID) + if err != nil { + return err + } + printOptimizeResults(out, finalStatus) + } + + return nil +} + +func pollOptimizeJob( + cmd *cobra.Command, + client *optimize_api.OptimizeClient, + pollInterval int, + operationID string, +) (*optimize_api.OptimizeJobStatus, error) { + out := cmd.OutOrStdout() + spinFrames := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} + frameIdx := 0 + startTime := time.Now() + + poller := &optimize_api.Poller{ + Client: client, + OperationID: operationID, + Interval: time.Duration(pollInterval) * time.Second, + OnProgress: func(status *optimize_api.OptimizeJobStatus) { + elapsed := time.Since(startTime).Truncate(time.Second) + spin := spinFrames[frameIdx%len(spinFrames)] + frameIdx++ + + progress := fmt.Sprintf("\r %s %s", spin, status.Status) + if status.Progress != nil { + p := status.Progress + if p.CurrentStrategy != "" { + progress += fmt.Sprintf(" · strategy: %s", p.CurrentStrategy) + } + if p.CurrentIteration > 0 { + progress += fmt.Sprintf(" · iteration %d", p.CurrentIteration) + } + if p.BestScore > 0 { + progress += fmt.Sprintf(" · score: %.2f", p.BestScore) + } + } + progress += fmt.Sprintf(" · %s", elapsed) + fmt.Fprintf(out, "%-80s", progress) + }, + } + + finalStatus, err := poller.PollUntilDone(cmd.Context()) + fmt.Fprintln(out) + if err != nil { + return nil, fmt.Errorf("failed while polling optimization job: %w", err) + } + + return finalStatus, nil +} + +func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus) { + if status.Error != nil { + fmt.Fprintf(out, "\n %s %s\n", color.RedString("Error:"), status.Error.Message) + } + + if len(status.Candidates) == 0 { + return + } + + bold := color.New(color.Bold) + green := color.New(color.FgGreen) + + bold.Fprintln(out, "\nResults:") + fmt.Fprintf(out, " %-20s %7s %7s %8s\n", "Candidate", "Score", "Pass", "Tokens") + fmt.Fprintf(out, " %-20s %7s %7s %8s\n", + strings.Repeat("─", 20), strings.Repeat("─", 7), + strings.Repeat("─", 7), strings.Repeat("─", 8)) + + bestName := "" + if status.Best != nil { + bestName = status.Best.Name + } + + for _, c := range status.Candidates { + isBest := c.Name == bestName + name := c.Name + if isBest { + name += " ★" + } + + line := fmt.Sprintf(" %-20s %7.2f %6.0f%% %8.0f", name, c.AvgScore, c.PassRate*100, c.AvgTokens) + if isBest { + green.Fprintln(out, line) + } else { + fmt.Fprintln(out, line) + } + } + + // Print candidate IDs for deploy + hasIDs := false + for _, c := range status.Candidates { + if c.CandidateID != "" { + if !hasIDs { + fmt.Fprintf(out, "\n Candidate IDs:\n") + hasIDs = true + } + marker := " " + if c.Name == bestName { + marker = "★ " + } + fmt.Fprintf(out, " %s%-20s %s\n", marker, c.Name, c.CandidateID) + } + } + + // Print deploy command for best candidate + if status.Best != nil && status.Best.CandidateID != "" { + agentName := "" + if status.Agent != nil { + agentName = status.Agent.AgentName + } + fmt.Fprintf(out, "\n Deploy the best candidate:\n") + fmt.Fprintf(out, " azd ai agent optimize deploy --candidate %s --agent %s\n", + status.Best.CandidateID, agentName) + } + fmt.Fprintln(out) +} + +func formatOptimizeStatus(status string) string { + switch status { + case optimize_api.StatusCompleted: + return color.GreenString(status) + case optimize_api.StatusFailed: + return color.RedString(status) + case optimize_api.StatusCancelled: + return color.YellowString(status) + case optimize_api.StatusRunning: + return color.CyanString(status) + case optimize_api.StatusPending: + return color.BlueString(status) + default: + return status + } +} + +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go new file mode 100644 index 00000000000..4d0682c300f --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -0,0 +1,157 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "path/filepath" + + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type optimizeApplyFlags struct { + candidate string + agent string + optimizeConnectionFlags +} + +func newOptimizeApplyCommand(extCtx *azdext.ExtensionContext) *cobra.Command { + flags := &optimizeApplyFlags{} + action := &OptimizeApplyAction{flags: flags, noPrompt: extCtx.NoPrompt} + + cmd := &cobra.Command{ + Use: "apply", + Short: "Apply optimized candidate configuration locally to your azd project.", + Long: `Download the optimized configuration and skill files from an optimization +candidate and write them into your local azd project. + +After applying, run 'azd deploy' to deploy the optimized agent version.`, + Example: ` # Apply candidate config locally, then deploy + azd ai agent optimize apply --candidate cand_abc123 + azd deploy`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + setupDebugLogging(cmd.Flags()) + return action.Run(ctx, cmd) + }, + } + + cmd.Flags().StringVar(&flags.candidate, "candidate", "", "Candidate ID from optimization results (required)") + cmd.Flags().StringVar(&flags.agent, "agent", "", "Agent service name (auto-detected from azure.yaml)") + _ = cmd.MarkFlagRequired("candidate") + flags.optimizeConnectionFlags.register(cmd) + + return cmd +} + +// OptimizeApplyAction implements the optimize apply command. +type OptimizeApplyAction struct { + flags *optimizeApplyFlags + noPrompt bool +} + +func (a *OptimizeApplyAction) Run(ctx context.Context, cmd *cobra.Command) error { + out := cmd.OutOrStdout() + bold := color.New(color.Bold) + + azdClient, err := azdext.NewAzdClient() + if err != nil { + return fmt.Errorf("failed to create azd client: %w\n\n"+ + "'optimize apply' requires an azd project. Use 'optimize deploy' for standalone API deployment", err) + } + defer azdClient.Close() + + svc, project, err := resolveAgentService(ctx, azdClient, a.flags.agent, a.noPrompt) + if err != nil || project == nil || svc == nil { + return fmt.Errorf("could not resolve agent service in azd project: %w\n\n"+ + "Run 'azd ai agent init' first, or use 'optimize deploy' for standalone API deployment", err) + } + + return a.apply(ctx, azdClient, svc, project, out, bold) +} + +func (a *OptimizeApplyAction) apply( + ctx context.Context, + azdClient *azdext.AzdClient, + svc *azdext.ServiceConfig, + project *azdext.ProjectConfig, + out io.Writer, + bold *color.Color, +) error { + projectEndpoint, err := resolveProjectEndpointForDeploy(ctx, &a.flags.optimizeConnectionFlags) + if err != nil { + return err + } + agentYamlPath := filepath.Join(project.Path, svc.RelativePath, "agent.yaml") + + bold.Fprintf(out, "Applying optimization candidate %s...\n\n", a.flags.candidate) + + credential, err := newAgentCredential() + if err != nil { + return err + } + optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) + + // Step 1: Fetch candidate config. + fmt.Fprintf(out, " Fetching candidate config...\n") + candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) + if err != nil { + return fmt.Errorf("failed to fetch candidate config: %w", err) + } + + configJSON, err := json.Marshal(candidateConfig) + if err != nil { + return fmt.Errorf("failed to serialize candidate config: %w", err) + } + + // Step 2: Write OPTIMIZATION_CONFIG and OPTIMIZATION_CANDIDATE_ID into agent.yaml. + fmt.Fprintf(out, " Updating %s...\n", agentYamlPath) + if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CONFIG", string(configJSON)); err != nil { + return fmt.Errorf("failed to update agent.yaml: %w", err) + } + if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CANDIDATE_ID", a.flags.candidate); err != nil { + return fmt.Errorf("failed to update agent.yaml: %w", err) + } + + // Step 3: Download skill files from the candidate manifest. + serviceDir := filepath.Join(project.Path, svc.RelativePath) + if n, dlErr := downloadSkillFiles(ctx, optClient, a.flags.candidate, serviceDir, out); dlErr != nil { + fmt.Fprintf(out, " warning: failed to download skill files: %s\n", dlErr) + } else if n > 0 { + fmt.Fprintf(out, " Downloaded %d skill file(s)\n", n) + } + + // Step 4: Store candidate ID in the azd environment for postdeploy tracking. + serviceKey := toServiceKey(svc.Name) + candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) + + envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if err != nil { + return fmt.Errorf("failed to get current environment: %w", err) + } + if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envResp.Environment.Name, + Key: candidateKey, + Value: a.flags.candidate, + }); err != nil { + return fmt.Errorf("failed to store candidate ID in azd environment: %w", err) + } + + // Done — prompt the user to deploy. + fmt.Fprintln(out) + color.New(color.FgGreen, color.Bold).Fprintf(out, + " ✓ Candidate %s applied successfully\n\n", a.flags.candidate) + fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", + color.CyanString("azd deploy --service %s", svc.Name)) + + return nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go new file mode 100644 index 00000000000..7908627eede --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "fmt" + + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type optimizeCancelFlags struct { + optimizeConnectionFlags +} + +func newOptimizeCancelCommand() *cobra.Command { + flags := &optimizeCancelFlags{} + + cmd := &cobra.Command{ + Use: "cancel ", + Short: "Cancel a running optimization job.", + Long: `Cancel a running optimization or evaluation job by its operation ID. + +Only jobs in a non-terminal state (pending, running) can be cancelled.`, + Example: ` # Cancel a running job + azd ai agent optimize cancel opt_abc123`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runOptimizeCancel(cmd, flags, args[0]) + }, + } + + flags.optimizeConnectionFlags.register(cmd) + + return cmd +} + +func runOptimizeCancel(cmd *cobra.Command, flags *optimizeCancelFlags, operationID string) error { + endpoint, err := flags.resolve(cmd.Context()) + if err != nil { + return err + } + + credential, err := newAgentCredential() + if err != nil { + return err + } + + client := optimize_api.NewOptimizeClient(endpoint, credential) + + cancelResp, err := client.CancelOptimize(cmd.Context(), operationID) + if err != nil { + return fmt.Errorf("failed to cancel job: %w\n\nCheck that the operation ID %q is correct and the job is still running", err, operationID) + } + + out := cmd.OutOrStdout() + fmt.Fprintf(out, " %s Job %s has been cancelled (status: %s).\n", + color.YellowString("⚠"), operationID, cancelResp.Status) + fmt.Fprintf(out, "\n Check status with:\n azd ai agent optimize status %s\n", operationID) + + return nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel_test.go new file mode 100644 index 00000000000..af815c2128f --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel_test.go @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOptimizeCancelCommand_RequiresPositionalArg(t *testing.T) { + cmd := newOptimizeCancelCommand() + + err := cmd.Args(cmd, []string{}) + assert.Error(t, err) + + err = cmd.Args(cmd, []string{"opt_abc123"}) + assert.NoError(t, err) + + err = cmd.Args(cmd, []string{"opt_abc123", "extra"}) + assert.Error(t, err) +} + +func TestOptimizeCancelCommand_HasConnectionFlags(t *testing.T) { + cmd := newOptimizeCancelCommand() + + assert.NotNil(t, cmd.Flags().Lookup("endpoint")) + assert.NotNil(t, cmd.Flags().Lookup("project-endpoint")) + + assert.Nil(t, cmd.Flags().Lookup("subscription")) + assert.Nil(t, cmd.Flags().Lookup("resource-group")) + assert.Nil(t, cmd.Flags().Lookup("workspace")) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go new file mode 100644 index 00000000000..64f3178854c --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -0,0 +1,220 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + + "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/optimize_api" + + "go.yaml.in/yaml/v3" +) + +// OptimizeConfig extends the shared Config with optimize-specific fields. +type OptimizeConfig struct { + opteval.Config `yaml:",inline"` + + // Optimize-specific YAML fields. + ValidationReference *opteval.DatasetRef `yaml:"validation_reference,omitempty"` + Criteria []OptimizeConfigCriterion `yaml:"criteria,omitempty"` + Options *opteval.Options `yaml:"options"` + InlineDataset []optimize_api.DatasetTask `yaml:"-"` // populated by defaultOptimizeConfig, not from YAML +} + +// OptimizeConfigCriterion is a named evaluation criterion with a natural-language instruction. +type OptimizeConfigCriterion struct { + Name string `yaml:"name"` + Instruction string `yaml:"instruction"` +} + +// LoadOptimizeConfig reads and parses a YAML optimization config file. +func LoadOptimizeConfig(path string) (*OptimizeConfig, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read config file %s: %w", path, err) + } + + var cfg OptimizeConfig + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse config file %s: %w", path, err) + } + + return &cfg, nil +} + +// Validate checks required fields and mutual exclusivity constraints. +func (c *OptimizeConfig) Validate() error { + if c.Agent.Name == "" { + return fmt.Errorf("agent.name is required") + } + + if c.Options == nil || c.Options.EvalModel == "" { + return fmt.Errorf("options.eval_model is required") + } + + hasFile := c.DatasetFile != "" + hasRef := c.DatasetReference != nil + hasInline := len(c.InlineDataset) > 0 + + if hasFile && hasRef { + return fmt.Errorf("dataset_file and dataset_reference are mutually exclusive; specify one, not both") + } + + if !hasFile && !hasRef && !hasInline { + return fmt.Errorf("one of dataset_file or dataset_reference is required") + } + + return nil +} + +// defaultOptimizeConfig returns a config with sensible defaults and a built-in +// evaluation dataset. +func defaultOptimizeConfig(agentName string) *OptimizeConfig { + return &OptimizeConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: agentName}, + Evaluators: []string{"task_adherence"}, + }, + InlineDataset: defaultDataset, + Options: &opteval.Options{ + EvalModel: "gpt-4o", + Mode: "optimize", + Strategies: []string{"instruction", "skill", "agents-optimization-job"}, + Budget: 5, + }, + } +} + +var defaultDataset = []optimize_api.DatasetTask{ + { + Name: "calculator_module", + Prompt: "Create a Python module calc.py with four functions: add, subtract, multiply, divide. Each takes two numbers and returns the result. Include a brief test at the bottom (if __name__ == '__main__') that exercises each function and prints the results. Then run it.", + Criteria: []optimize_api.Criterion{ + {Name: "decimal_types", Instruction: "ALL functions MUST use and return Python's decimal.Decimal type, NOT float."}, + {Name: "error_code_prefix", Instruction: "ALL error messages raised by any function MUST include a bracketed error code prefix [CALC-NNN]."}, + {Name: "version_constant", Instruction: "The module MUST define VERSION = '0.1.0' and __version__ = VERSION near the top."}, + {Name: "module_exports", Instruction: "The module MUST define __all__ = ['add', 'subtract', 'multiply', 'divide'] at the top."}, + }, + }, + { + Name: "csv_report", + Prompt: "Create a Python script report.py that generates a CSV file 'sales_report.csv' with 10 rows of sample sales data. Columns: date, product, quantity, unit_price, total. Then read the CSV back and print a summary: total revenue and the top-selling product by quantity. Run the script.", + Criteria: []optimize_api.Criterion{ + {Name: "pipe_delimiter", Instruction: "The CSV file MUST use pipe '|' as the delimiter, NOT comma."}, + {Name: "zero_padded_quantity", Instruction: "ALL quantity values MUST be zero-padded to exactly 4 digits (e.g. '0042' not '42')."}, + {Name: "logging_not_print", Instruction: "The script MUST use Python's logging module for progress messages, NOT print()."}, + {Name: "summary_footer", Instruction: "The LAST line of the CSV file MUST be a comment starting with '# SUMMARY:' including total revenue."}, + }, + }, + { + Name: "api_response_builder", + Prompt: "Create a Python module api_utils.py with a function build_response(data, status_code=200) that builds a JSON-ready dictionary representing an API response. Also create a function validate_email(email: str) -> bool that checks if an email is roughly valid. Write a test block that demonstrates both functions with a few examples and prints the JSON output. Run it.", + Criteria: []optimize_api.Criterion{ + {Name: "named_tuple_validation", Instruction: "validate_email() MUST return a typing.NamedTuple with fields (is_valid: bool, reason: str), NOT a bare bool."}, + {Name: "request_id", Instruction: "build_response() MUST include a 'requestId' field containing a UUID4 string."}, + {Name: "rfc7807_errors", Instruction: "When status_code >= 400, the response MUST follow RFC 7807 with 'type', 'title', 'detail', 'status' keys."}, + {Name: "camel_case_keys", Instruction: "ALL dictionary keys in the response MUST be camelCase (e.g. 'statusCode', NOT 'status_code')."}, + }, + }, +} + +// ToRequest converts the YAML config into an API OptimizeRequest. +// If DatasetFile is set, each line of the file is read as a JSON-encoded DatasetTask. +func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.OptimizeRequest, error) { + req := &optimize_api.OptimizeRequest{ + Agent: optimize_api.AgentDefinition{ + FoundryProjectURL: projectEndpoint, + AgentName: c.Agent.Name, + AgentVersion: c.Agent.Version, + Model: c.Agent.Model, + }, + Evaluators: c.Evaluators, + Options: optimize_api.OptimizeOptions{ + EvalModel: c.Options.EvalModel, + Budget: c.Options.Budget, + MaxIterations: c.Options.MaxIterations, + MinImprovement: c.Options.MinImprovement, + ImprovementThreshold: c.Options.ImprovementThreshold, + PassThreshold: c.Options.PassThreshold, + Strategies: c.Options.Strategies, + KeepVersions: c.Options.KeepVersions, + TasksPerIteration: c.Options.TasksPerIteration, + ReflectionModel: c.Options.ReflectionModel, + Mode: c.Options.Mode, + }, + } + + // Map criteria from config schema to API schema. + for _, crit := range c.Criteria { + req.Criteria = append(req.Criteria, optimize_api.Criterion{ + Name: crit.Name, + Instruction: crit.Instruction, + }) + } + + if c.DatasetReference != nil { + req.TrainDatasetReference = &optimize_api.DatasetReference{ + Name: c.DatasetReference.Name, + Version: c.DatasetReference.Version, + } + } + + if c.ValidationReference != nil { + req.ValidationDatasetReference = &optimize_api.DatasetReference{ + Name: c.ValidationReference.Name, + Version: c.ValidationReference.Version, + } + } + + if c.DatasetFile != "" { + tasks, err := loadDatasetFile(c.DatasetFile) + if err != nil { + return nil, err + } + req.Dataset = tasks + } else if len(c.InlineDataset) > 0 { + req.Dataset = c.InlineDataset + } + + return req, nil +} + +// loadDatasetFile reads a JSONL file where each line is a JSON DatasetTask. +func loadDatasetFile(path string) ([]optimize_api.DatasetTask, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) + } + defer f.Close() + + var tasks []optimize_api.DatasetTask + scanner := bufio.NewScanner(f) + lineNum := 0 + for scanner.Scan() { + lineNum++ + line := scanner.Text() + if line == "" { + continue + } + var task optimize_api.DatasetTask + if err := json.Unmarshal([]byte(line), &task); err != nil { + return nil, fmt.Errorf("failed to parse dataset line %d: %w", lineNum, err) + } + tasks = append(tasks, task) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading dataset file %s: %w", path, err) + } + + if len(tasks) == 0 { + return nil, fmt.Errorf("dataset file %s contains no tasks", path) + } + + return tasks, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go new file mode 100644 index 00000000000..b1b10cab059 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -0,0 +1,295 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func writeTestFile(t *testing.T, dir, name, content string) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.WriteFile(path, []byte(content), 0644)) + return path +} + +func TestLoadOptimizeConfig_WithDatasetFile(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + datasetPath := writeTestFile(t, dir, "tasks.jsonl", + `{"prompt":"What is 2+2?","groundTruth":"4"} +{"prompt":"Capital of France?","groundTruth":"Paris"} +`) + + yamlContent := ` +agent: + name: my-agent + version: "1" + model: gpt-4o +dataset_file: ` + datasetPath + ` +evaluators: + - coherence + - relevance +criteria: + - name: accuracy + instruction: answer must be correct +options: + eval_model: gpt-4o-mini + budget: 100 + max_iterations: 5 + strategies: + - prompt_mutation +` + cfgPath := writeTestFile(t, dir, "optimize.yaml", yamlContent) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + require.NoError(t, cfg.Validate()) + + req, err := cfg.ToRequest("https://example.ai.azure.com/project/p") + require.NoError(t, err) + + assert.Equal(t, "my-agent", req.Agent.AgentName) + assert.Equal(t, "1", req.Agent.AgentVersion) + assert.Equal(t, "https://example.ai.azure.com/project/p", req.Agent.FoundryProjectURL) + assert.Len(t, req.Dataset, 2) + assert.Equal(t, "What is 2+2?", req.Dataset[0].Prompt) + assert.Equal(t, "4", req.Dataset[0].GroundTruth) + assert.Nil(t, req.TrainDatasetReference) + assert.Equal(t, "gpt-4o-mini", req.Options.EvalModel) + assert.Equal(t, 100, req.Options.Budget) + assert.Equal(t, []string{"coherence", "relevance"}, req.Evaluators) + assert.Len(t, req.Criteria, 1) + assert.Equal(t, "accuracy", req.Criteria[0].Name) +} + +func TestLoadOptimizeConfig_WithDatasetReference(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + yamlContent := ` +agent: + name: ref-agent +dataset_reference: + name: my-dataset + version: "2" +validation_reference: + name: val-dataset + version: "1" +options: + eval_model: gpt-4o-mini +` + cfgPath := writeTestFile(t, dir, "optimize.yaml", yamlContent) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + require.NoError(t, cfg.Validate()) + + req, err := cfg.ToRequest("https://example.com/proj") + require.NoError(t, err) + + assert.Equal(t, "ref-agent", req.Agent.AgentName) + assert.Empty(t, req.Dataset) + require.NotNil(t, req.TrainDatasetReference) + assert.Equal(t, "my-dataset", req.TrainDatasetReference.Name) + assert.Equal(t, "2", req.TrainDatasetReference.Version) + require.NotNil(t, req.ValidationDatasetReference) + assert.Equal(t, "val-dataset", req.ValidationDatasetReference.Name) +} + +func TestValidate_MissingAgentName(t *testing.T) { + t.Parallel() + + cfg := &OptimizeConfig{ + Config: opteval.Config{ + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + }, + Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + } + + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "agent.name is required") +} + +func TestValidate_MissingEvalModel(t *testing.T) { + t.Parallel() + + cfg := &OptimizeConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent"}, + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + }, + } + + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "eval_model is required") +} + +func TestValidate_BothDatasetFileAndReference(t *testing.T) { + t.Parallel() + + cfg := &OptimizeConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent"}, + DatasetFile: "tasks.jsonl", + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + }, + Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + } + + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") +} + +func TestValidate_NeitherDatasetFileNorReference(t *testing.T) { + t.Parallel() + + cfg := &OptimizeConfig{ + Config: opteval.Config{Agent: opteval.AgentRef{Name: "agent"}}, + Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + } + + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "one of dataset_file or dataset_reference is required") +} + +func TestLoadOptimizeConfig_FileNotFound(t *testing.T) { + t.Parallel() + + _, err := LoadOptimizeConfig("/nonexistent/path/optimize.yaml") + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to read config file") +} + +func TestLoadOptimizeConfig_InvalidYAML(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + cfgPath := writeTestFile(t, dir, "bad.yaml", "{{invalid yaml}}") + + _, err := LoadOptimizeConfig(cfgPath) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse config") +} + +func TestLoadOptimizeConfig_EvalYAMLFormat(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + // An eval.yaml file should be loadable by the optimize config loader. + // eval_model at the top level won't map to Options, so we verify the + // agent and evaluators parse correctly. + yamlContent := ` +name: smoke-core +agent: + name: my-eval-agent + version: "3" + kind: hosted +dataset_reference: + name: eval-dataset + version: "1" +evaluators: + - task_adherence +options: + eval_model: gpt-4o +` + cfgPath := writeTestFile(t, dir, "eval.yaml", yamlContent) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + + assert.Equal(t, "my-eval-agent", cfg.Agent.Name) + assert.Equal(t, "3", cfg.Agent.Version) + require.NotNil(t, cfg.Options) + assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) + assert.Len(t, cfg.Evaluators, 1) + assert.Equal(t, "task_adherence", cfg.Evaluators[0]) + require.NotNil(t, cfg.DatasetReference) + assert.Equal(t, "eval-dataset", cfg.DatasetReference.Name) +} + +func TestLoadOptimizeConfig_ScalarEvaluatorsWithOptions(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + yamlContent := ` +agent: + name: my-test-agent + +dataset_file: eval.jsonl + +evaluators: + - task_adherence + +options: + eval_model: gpt-4o + mode: evaluate + strategies: + - instruction + budget: 3 +` + datasetPath := writeTestFile(t, dir, "eval.jsonl", + `{"prompt":"hello","groundTruth":"hi"} +`) + // Rewrite dataset_file to the real temp path so Validate+ToRequest work. + yamlContent = ` +agent: + name: my-test-agent +dataset_file: ` + datasetPath + ` +evaluators: + - task_adherence +options: + eval_model: gpt-4o + mode: evaluate + strategies: + - instruction + budget: 3 +` + cfgPath := writeTestFile(t, dir, "spec.yaml", yamlContent) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + + // Agent + assert.Equal(t, "my-test-agent", cfg.Agent.Name) + + // Dataset + assert.Equal(t, datasetPath, cfg.DatasetFile) + assert.Nil(t, cfg.DatasetReference) + + // Evaluator — scalar string without builtin. prefix resolves as custom. + require.Len(t, cfg.Evaluators, 1) + assert.Equal(t, "task_adherence", cfg.Evaluators[0]) + + // Options + require.NotNil(t, cfg.Options) + assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) + assert.Equal(t, "evaluate", cfg.Options.Mode) + assert.Equal(t, []string{"instruction"}, cfg.Options.Strategies) + assert.Equal(t, 3, cfg.Options.Budget) + + // Validate + ToRequest + require.NoError(t, cfg.Validate()) + req, err := cfg.ToRequest("https://example.ai.azure.com/project/p") + require.NoError(t, err) + assert.Equal(t, "my-test-agent", req.Agent.AgentName) + assert.Len(t, req.Dataset, 1) + assert.Equal(t, []string{"task_adherence"}, req.Evaluators) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go new file mode 100644 index 00000000000..7cd311610a4 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -0,0 +1,418 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "azureaiagent/internal/pkg/agents/agent_api" + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" + "go.yaml.in/yaml/v3" +) + +type optimizeDeployFlags struct { + candidate string + agent string + optimizeConnectionFlags +} + +func newOptimizeDeployCommand() *cobra.Command { + flags := &optimizeDeployFlags{} + action := &OptimizeDeployAction{flags: flags} + + cmd := &cobra.Command{ + Use: "deploy [agent-name]", + Short: "Deploy a winning optimization candidate as a new agent version via the API.", + Long: `Deploy an optimization candidate directly via the Foundry agent API. + +This creates a new agent version with the optimized configuration applied. +Use 'optimize apply' instead if you want to localize the config into your azd project first.`, + Example: ` # Deploy candidate directly + azd ai agent optimize deploy --candidate cand_abc123 --agent my-agent + + # Deploy with explicit endpoint + azd ai agent optimize deploy --candidate cand_abc123 --agent my-agent --project-endpoint https://...`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + setupDebugLogging(cmd.Flags()) + + if len(args) > 0 && flags.agent == "" { + flags.agent = args[0] + } + + return action.Run(ctx, cmd) + }, + } + + cmd.Flags().StringVar(&flags.candidate, "candidate", "", "Candidate ID from optimization results (required)") + cmd.Flags().StringVar(&flags.agent, "agent", "", "Agent name to deploy to (auto-detected from agent.yaml)") + _ = cmd.MarkFlagRequired("candidate") + flags.optimizeConnectionFlags.register(cmd) + + return cmd +} + +// OptimizeDeployAction implements the optimize deploy command. +type OptimizeDeployAction struct { + flags *optimizeDeployFlags +} + +func (a *OptimizeDeployAction) Run(ctx context.Context, cmd *cobra.Command) error { + out := cmd.OutOrStdout() + bold := color.New(color.Bold) + + return a.runDirect(ctx, out, bold) +} + +// runDirect deploys a candidate directly via the Foundry agent API. +// TODO: Change this to full remote deployment here if not in an azd project +func (a *OptimizeDeployAction) runDirect( + ctx context.Context, + out io.Writer, + bold *color.Color, +) error { + // Resolve agent name from flag or agent.yaml in current directory. + resolved, err := resolveOptimizeAgent(ctx, a.flags.agent, false) + if err != nil { + return err + } + agentName := resolved.agentName + + // Resolve project endpoint (for Foundry agent API). + projectEndpoint, err := resolveProjectEndpointForDeploy(ctx, &a.flags.optimizeConnectionFlags) + if err != nil { + return err + } + + bold.Fprintf(out, "Deploying candidate %s to agent %s...\n\n", a.flags.candidate, agentName) + + // Step 1: Fetch candidate config from optimization service. + fmt.Fprintf(out, " Fetching candidate config...\n") + credential, err := newAgentCredential() + if err != nil { + return err + } + optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) + candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) + if err != nil { + return fmt.Errorf("failed to fetch candidate config: %w", err) + } + + // JSON-stringify the candidate config for the env var. + configJSON, err := json.Marshal(candidateConfig) + if err != nil { + return fmt.Errorf("failed to serialize candidate config: %w", err) + } + + // Step 2: Fetch current agent from Foundry. + fmt.Fprintf(out, " Fetching current agent definition...\n") + agentClient := agent_api.NewAgentClient(projectEndpoint, credential) + + agentObj, err := agentClient.GetAgent(ctx, agentName, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to get agent %q: %w", agentName, err) + } + + // Extract definition from latest version using map[string]any for flexibility. + latestDef, err := extractLatestDefinition(agentObj) + if err != nil { + return err + } + + // Step 3: Merge env vars and create new version. + // Use OPTIMIZATION_CONFIG (non-reserved) — the agent SDK reads both + // AGENT_OPTIMIZATION_CONFIG (first-party service) and OPTIMIZATION_CONFIG (CLI). + envVars := extractEnvVars(latestDef) + envVars["OPTIMIZATION_CONFIG"] = string(configJSON) + + newDef := buildDeployDefinition(latestDef, envVars) + + description := fmt.Sprintf("Optimized: candidate %s", a.flags.candidate) + createReq := &agent_api.CreateAgentVersionRequest{ + Description: &description, + Metadata: map[string]string{"optimized_from": a.flags.candidate}, + Definition: newDef, + } + + fmt.Fprintf(out, " Creating new agent version...\n") + versionObj, err := agentClient.CreateAgentVersion(ctx, agentName, createReq, DefaultAgentAPIVersion) + if err != nil { + // Check for reserved env var error (AGENT_* and FOUNDRY_* are platform-reserved). + if isReservedEnvVarError(err) { + return fmt.Errorf("the platform reserves AGENT_* environment variables for internal use.\n\n" + + "Deploying optimization candidates for hosted (container) agents requires the\n" + + "optimization service to create versions with elevated privileges.\n\n" + + "Contact the platform team to promote via the optimization service API") + } + return fmt.Errorf("failed to create agent version: %w", err) + } + + // Step 4: Poll until version is active. + fmt.Fprintf(out, " Waiting for version %s to become active...\n", versionObj.Version) + if err := pollVersionActive(ctx, agentClient, agentName, versionObj.Version); err != nil { + return err + } + + // Step 5: Print success. + fmt.Fprintln(out) + color.New(color.FgGreen, color.Bold).Fprintf(out, + " \u2713 Successfully deployed candidate %s as version %s\n", a.flags.candidate, versionObj.Version) + fmt.Fprintf(out, "\n Agent: %s\n", agentName) + fmt.Fprintf(out, " Version: %s\n", versionObj.Version) + + return nil +} + +// upsertAgentYamlEnvVar reads the agent.yaml file, adds or updates the specified +// environment variable in the environment_variables list, and writes back. +func upsertAgentYamlEnvVar(agentYamlPath, key, value string) error { + data, err := os.ReadFile(agentYamlPath) //nolint:gosec // G304: path from azd project + if err != nil { + return fmt.Errorf("reading agent.yaml: %w", err) + } + + var agent agent_yaml.ContainerAgent + if err := yaml.Unmarshal(data, &agent); err != nil { + return fmt.Errorf("parsing agent.yaml: %w", err) + } + + // Upsert the environment variable. + if agent.EnvironmentVariables == nil { + agent.EnvironmentVariables = &[]agent_yaml.EnvironmentVariable{} + } + + found := false + envVars := *agent.EnvironmentVariables + for i := range envVars { + if envVars[i].Name == key { + envVars[i].Value = value + found = true + break + } + } + if !found { + envVars = append(envVars, agent_yaml.EnvironmentVariable{Name: key, Value: value}) + } + agent.EnvironmentVariables = &envVars + + // Marshal back to YAML and write. + out, err := yaml.Marshal(&agent) + if err != nil { + return fmt.Errorf("marshaling agent.yaml: %w", err) + } + + //nolint:gosec // G306: agent.yaml should be readable by tooling + if err := os.WriteFile(agentYamlPath, out, 0644); err != nil { + return fmt.Errorf("writing agent.yaml: %w", err) + } + + return nil +} + +// resolveProjectEndpointForDeploy resolves the Foundry project endpoint using +// the same resolution chain as other agent commands. +func resolveProjectEndpointForDeploy(ctx context.Context, connFlags *optimizeConnectionFlags) (string, error) { + if connFlags.projectEndpoint != "" { + return strings.TrimRight(connFlags.projectEndpoint, "/"), nil + } + + projectEndpoint, err := resolveAgentEndpoint(ctx, "", "") + if err != nil { + if ep := os.Getenv("AZURE_AI_PROJECT_ENDPOINT"); ep != "" { + return strings.TrimRight(ep, "/"), nil + } + return "", fmt.Errorf("could not resolve project endpoint: %w\n\n"+ + "Provide --project-endpoint (-p), or run 'azd ai agent init'", err) + } + return projectEndpoint, nil +} + +// isReservedEnvVarError checks if a version creation error is due to +// the platform rejecting reserved AGENT_* or FOUNDRY_* environment variables. +func isReservedEnvVarError(err error) bool { + if err == nil { + return false + } + msg := err.Error() + return strings.Contains(msg, "reserved for platform use") || + strings.Contains(msg, "AGENT_* variables are reserved") +} + +// --- Skill file download --- + +// isSkillFile returns true if the manifest entry represents a skill file. +func isSkillFile(f optimize_api.CandidateFile) bool { + return f.Type == "skill" || strings.HasPrefix(f.Path, "skills/") +} + +// downloadSkillFiles fetches the candidate manifest, downloads all skill files, +// and writes them into serviceDir. Returns the number of files written. +func downloadSkillFiles( + ctx context.Context, + client *optimize_api.OptimizeClient, + candidateID string, + serviceDir string, + out io.Writer, +) (int, error) { + manifest, err := client.GetCandidate(ctx, candidateID) + if err != nil { + return 0, fmt.Errorf("fetching candidate manifest: %w", err) + } + + var skillFiles []optimize_api.CandidateFile + for _, f := range manifest.Files { + if isSkillFile(f) { + skillFiles = append(skillFiles, f) + } + } + if len(skillFiles) == 0 { + return 0, nil + } + + count := 0 + for _, f := range skillFiles { + if f.Path == "" { + continue + } + + content, err := client.GetCandidateFile(ctx, candidateID, f.Path) + if err != nil { + fmt.Fprintf(out, " warning: failed to download skill file %s: %s\n", f.Path, err) + continue + } + + // Write relative to serviceDir. + // "skills/math/SKILL.md" becomes "/skills/math/SKILL.md". + outPath := filepath.Join(serviceDir, filepath.FromSlash(f.Path)) + + if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil { + return count, fmt.Errorf("creating directory for %s: %w", f.Path, err) + } + + //nolint:gosec // G306: skill files should be readable + if err := os.WriteFile(outPath, []byte(content), 0644); err != nil { + return count, fmt.Errorf("writing skill file %s: %w", f.Path, err) + } + + fmt.Fprintf(out, " → %s (%d bytes)\n", outPath, len(content)) + count++ + } + + return count, nil +} + +// extractLatestDefinition gets the latest version's definition as a map for flexible field access. +func extractLatestDefinition(agent *agent_api.AgentObject) (map[string]any, error) { + defBytes, err := json.Marshal(agent.Versions.Latest.Definition) + if err != nil { + return nil, fmt.Errorf("failed to read agent definition: %w", err) + } + + var defMap map[string]any + if err := json.Unmarshal(defBytes, &defMap); err != nil { + return nil, fmt.Errorf("failed to parse agent definition: %w", err) + } + return defMap, nil +} + +// extractEnvVars extracts existing environment variables from a definition map. +func extractEnvVars(def map[string]any) map[string]string { + result := make(map[string]string) + if envRaw, ok := def["environment_variables"]; ok { + if envMap, ok := envRaw.(map[string]any); ok { + for k, v := range envMap { + if s, ok := v.(string); ok { + result[k] = s + } + } + } + } + return result +} + +// buildDeployDefinition creates the definition map for the new version, +// preserving all fields from the current version but overriding env vars. +func buildDeployDefinition(currentDef map[string]any, envVars map[string]string) map[string]any { + newDef := make(map[string]any) + for k, v := range currentDef { + if k != "environment_variables" { + newDef[k] = v + } + } + newDef["environment_variables"] = envVars + normalizeProtocolVersions(newDef) + return newDef +} + +// normalizeProtocolVersions ensures container_protocol_versions use the +// canonical "1.0.0" format instead of the legacy "v1" format that the +// platform no longer accepts for new versions. +func normalizeProtocolVersions(def map[string]any) { + raw, ok := def["container_protocol_versions"] + if !ok { + return + } + protocols, ok := raw.([]any) + if !ok { + return + } + for _, p := range protocols { + pMap, ok := p.(map[string]any) + if !ok { + continue + } + if ver, ok := pMap["version"].(string); ok && ver == "v1" { + pMap["version"] = "1.0.0" + } + } +} + +// pollVersionActive polls the agent version until its status is "active" or a timeout occurs. +func pollVersionActive( + ctx context.Context, + client *agent_api.AgentClient, + agentName, versionNum string, +) error { + timeout := 5 * time.Minute + interval := 5 * time.Second + deadline := time.Now().Add(timeout) + + for { + if time.Now().After(deadline) { + return fmt.Errorf("timed out waiting for version %s to become active after %s", versionNum, timeout) + } + + version, err := client.GetAgentVersion(ctx, agentName, versionNum, DefaultAgentAPIVersion) + if err != nil { + return fmt.Errorf("failed to poll version status: %w", err) + } + + if version.Status == "active" { + return nil + } + + if version.Status == "failed" { + return fmt.Errorf("version %s failed to activate", versionNum) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go new file mode 100644 index 00000000000..bc4f0e408a7 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go @@ -0,0 +1,156 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOptimizeDeployCommand_HasRequiredFlags(t *testing.T) { + cmd := newOptimizeDeployCommand() + + candidateFlag := cmd.Flags().Lookup("candidate") + require.NotNil(t, candidateFlag, "--candidate flag should be registered") + + agentFlag := cmd.Flags().Lookup("agent") + require.NotNil(t, agentFlag, "--agent flag should be registered") +} + +func TestOptimizeDeployCommand_CandidateIsRequired(t *testing.T) { + cmd := newOptimizeDeployCommand() + + // Set only --agent, omit --candidate + cmd.SetArgs([]string{"--agent", "my-agent"}) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "candidate") +} + +func TestOptimizeDeployCommand_AgentResolvedFromFlagOrYaml(t *testing.T) { + cmd := newOptimizeDeployCommand() + + // --agent is no longer MarkFlagRequired; it falls back to agent.yaml + agentFlag := cmd.Flags().Lookup("agent") + require.NotNil(t, agentFlag) + // Without --agent and without agent.yaml, should error about agent name + cmd.SetArgs([]string{"--candidate", "cand_123"}) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "agent") +} + +func TestOptimizeDeployCommand_HasConnectionFlags(t *testing.T) { + cmd := newOptimizeDeployCommand() + + assert.NotNil(t, cmd.Flags().Lookup("endpoint")) + assert.NotNil(t, cmd.Flags().Lookup("project-endpoint")) + + // Should NOT have subscription/resource-group/workspace + assert.Nil(t, cmd.Flags().Lookup("subscription")) + assert.Nil(t, cmd.Flags().Lookup("resource-group")) + assert.Nil(t, cmd.Flags().Lookup("workspace")) +} + +func TestOptimizeCommand_HasDeploySubCommand(t *testing.T) { + cmd := newOptimizeCommand(&azdext.ExtensionContext{}) + + var actual []string + for _, sub := range cmd.Commands() { + actual = append(actual, sub.Name()) + } + + assert.Contains(t, actual, "deploy", "optimize should have 'deploy' sub-command") +} + +func TestExtractEnvVars_EmptyDef(t *testing.T) { + def := map[string]any{"kind": "hosted"} + result := extractEnvVars(def) + assert.Empty(t, result) +} + +func TestExtractEnvVars_WithVars(t *testing.T) { + def := map[string]any{ + "kind": "hosted", + "environment_variables": map[string]any{ + "FOO": "bar", + "BAZ": "qux", + }, + } + result := extractEnvVars(def) + assert.Equal(t, "bar", result["FOO"]) + assert.Equal(t, "qux", result["BAZ"]) + assert.Len(t, result, 2) +} + +func TestBuildDeployDefinition_PreservesFieldsAndOverridesEnvVars(t *testing.T) { + currentDef := map[string]any{ + "kind": "hosted", + "image": "myimage:latest", + "cpu": "1.0", + "memory": "2Gi", + "environment_variables": map[string]any{ + "EXISTING_VAR": "keep_me", + }, + } + + envVars := map[string]string{ + "EXISTING_VAR": "keep_me", + "OPTIMIZATION_CONFIG": `{"key":"value"}`, + } + + newDef := buildDeployDefinition(currentDef, envVars) + + assert.Equal(t, "hosted", newDef["kind"]) + assert.Equal(t, "myimage:latest", newDef["image"]) + assert.Equal(t, "1.0", newDef["cpu"]) + assert.Equal(t, "2Gi", newDef["memory"]) + + newEnvVars, ok := newDef["environment_variables"].(map[string]string) + require.True(t, ok) + assert.Equal(t, "keep_me", newEnvVars["EXISTING_VAR"]) + assert.Equal(t, `{"key":"value"}`, newEnvVars["OPTIMIZATION_CONFIG"]) +} + +func TestBuildDeployDefinition_NormalizesProtocolVersion(t *testing.T) { + currentDef := map[string]any{ + "kind": "hosted", + "image": "myimage:latest", + "cpu": "1.0", + "memory": "2Gi", + "container_protocol_versions": []any{ + map[string]any{"protocol": "responses", "version": "v1"}, + }, + "environment_variables": map[string]any{}, + } + + newDef := buildDeployDefinition(currentDef, map[string]string{"FOO": "bar"}) + + protocols := newDef["container_protocol_versions"].([]any) + p := protocols[0].(map[string]any) + assert.Equal(t, "1.0.0", p["version"], "v1 should be normalized to 1.0.0") + assert.Equal(t, "responses", p["protocol"]) +} + +func TestNormalizeProtocolVersions_NoOp(t *testing.T) { + // Already 1.0.0 — should not change + def := map[string]any{ + "container_protocol_versions": []any{ + map[string]any{"protocol": "responses", "version": "1.0.0"}, + }, + } + normalizeProtocolVersions(def) + + protocols := def["container_protocol_versions"].([]any) + p := protocols[0].(map[string]any) + assert.Equal(t, "1.0.0", p["version"]) +} + +func TestNormalizeProtocolVersions_MissingField(t *testing.T) { + def := map[string]any{"kind": "hosted"} + normalizeProtocolVersions(def) // should not panic +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go new file mode 100644 index 00000000000..a6d0006c80d --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "fmt" + "os" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/spf13/cobra" +) + +// optimizeConnectionFlags holds connection settings shared across all optimize sub-commands. +type optimizeConnectionFlags struct { + projectEndpoint string + endpoint string // override: direct optimization service URL (for local dev only) +} + +// register adds the connection flags to the given cobra command. +func (f *optimizeConnectionFlags) register(cmd *cobra.Command) { + cmd.Flags().StringVarP(&f.projectEndpoint, "project-endpoint", "p", "", "Foundry project endpoint URL") + cmd.Flags().StringVar(&f.endpoint, "endpoint", "", "Optimization service endpoint (for local dev)") +} + +// resolve returns the project endpoint for optimize API calls. +// Priority: --endpoint flag → AZURE_AI_OPTIMIZE_ENDPOINT → --project-endpoint → azd environment → AZURE_AI_PROJECT_ENDPOINT env var. +func (f *optimizeConnectionFlags) resolve(ctx context.Context) (string, error) { + if f.endpoint != "" { + return strings.TrimRight(f.endpoint, "/"), nil + } + if ep := os.Getenv("AZURE_AI_OPTIMIZE_ENDPOINT"); ep != "" { + return strings.TrimRight(ep, "/"), nil + } + + // Explicit --project-endpoint flag + if f.projectEndpoint != "" { + return strings.TrimRight(f.projectEndpoint, "/"), nil + } + + // Try azd environment (works when running under azd) + projectEndpoint, err := resolveAgentEndpoint(ctx, "", "") + if err != nil { + // Fall back to AZURE_AI_PROJECT_ENDPOINT env var (works standalone) + if ep := os.Getenv("AZURE_AI_PROJECT_ENDPOINT"); ep != "" { + return strings.TrimRight(ep, "/"), nil + } + return "", fmt.Errorf("could not resolve project endpoint\n\n" + + "Set AZURE_AI_PROJECT_ENDPOINT, provide --project-endpoint (-p),\n" + + "or run 'azd ai agent init'") + } + + return projectEndpoint, nil +} + +// optimizeAPIVersion is the API version used for optimization service calls. +const optimizeAPIVersion = "v1" + +// optimizeLastJobIDKey is the azd environment key for the last optimization job ID. +const optimizeLastJobIDKey = "OPTIMIZE_LAST_OPERATION_ID" + +// tokenRequestOptions returns the token request options for Azure AI scope. +func tokenRequestOptions() policy.TokenRequestOptions { + return policy.TokenRequestOptions{ + Scopes: []string{"https://ai.azure.com/.default"}, + } +} + +// saveLastOptimizeJobID stores the operation ID in the azd environment. +// Best-effort — silently ignores errors (e.g., when running outside azd). +func saveLastOptimizeJobID(ctx context.Context, operationID string) { + azdClient, err := azdext.NewAzdClient() + if err != nil { + return + } + defer azdClient.Close() + + envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if err != nil || envResp == nil { + return + } + + _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envResp.Environment.Name, + Key: optimizeLastJobIDKey, + Value: operationID, + }) +} + +// loadLastOptimizeJobID retrieves the last operation ID from the azd environment. +// Returns empty string if not available. +func loadLastOptimizeJobID(ctx context.Context) string { + azdClient, err := azdext.NewAzdClient() + if err != nil { + return "" + } + defer azdClient.Close() + + envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if err != nil || envResp == nil { + return "" + } + + resp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envResp.Environment.Name, + Key: optimizeLastJobIDKey, + }) + if err != nil || resp == nil { + return "" + } + return resp.Value +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go new file mode 100644 index 00000000000..c7944074181 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOptimizeConnectionFlags_Resolve_AllEmpty(t *testing.T) { + t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "") + + f := &optimizeConnectionFlags{} + _, err := f.resolve(context.Background()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "endpoint") +} + +func TestOptimizeConnectionFlags_Resolve_FromEnv(t *testing.T) { + t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://example.com") + + f := &optimizeConnectionFlags{} + endpoint, err := f.resolve(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "https://example.com", endpoint) +} + +func TestOptimizeConnectionFlags_Resolve_FlagsOverrideEnv(t *testing.T) { + t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://from-env.com") + + f := &optimizeConnectionFlags{ + endpoint: "https://from-flag.com", + } + endpoint, err := f.resolve(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "https://from-flag.com", endpoint) +} + +func TestOptimizeConnectionFlags_Resolve_TrimsTrailingSlash(t *testing.T) { + t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://example.com/") + + f := &optimizeConnectionFlags{} + endpoint, err := f.resolve(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "https://example.com", endpoint) +} + +func TestOptimizeConnectionFlags_Resolve_ProjectEndpointFlag(t *testing.T) { + f := &optimizeConnectionFlags{ + projectEndpoint: "https://my-project.services.ai.azure.com/", + } + endpoint, err := f.resolve(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "https://my-project.services.ai.azure.com", endpoint) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go new file mode 100644 index 00000000000..36de799fd3b --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "fmt" + "io" + "strings" + + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type optimizeListFlags struct { + limit int + status string + optimizeConnectionFlags +} + +func newOptimizeListCommand() *cobra.Command { + flags := &optimizeListFlags{} + + cmd := &cobra.Command{ + Use: "list", + Short: "List recent optimization runs.", + Long: `List recent optimization and evaluation runs. + +Use --status to filter by job status and --limit to control page size.`, + Example: ` # List all recent runs + azd ai agent optimize list + + # List only completed runs + azd ai agent optimize list --status completed + + # Show last 5 runs + azd ai agent optimize list --limit 5`, + RunE: func(cmd *cobra.Command, args []string) error { + return runOptimizeList(cmd, flags) + }, + } + + cmd.Flags().IntVar(&flags.limit, "limit", 20, "Maximum number of results") + cmd.Flags().StringVar(&flags.status, "status", "", "Filter by status (pending/running/completed/failed/cancelled)") + flags.optimizeConnectionFlags.register(cmd) + + return cmd +} + +func runOptimizeList(cmd *cobra.Command, flags *optimizeListFlags) error { + // Validate --status flag before making API call + if flags.status != "" { + valid := map[string]bool{"pending": true, "running": true, "completed": true, "failed": true, "cancelled": true} + if !valid[flags.status] { + return fmt.Errorf("invalid --status %q: must be one of pending, running, completed, failed, cancelled", flags.status) + } + } + + endpoint, err := flags.resolve(cmd.Context()) + if err != nil { + return err + } + + credential, err := newAgentCredential() + if err != nil { + return err + } + + client := optimize_api.NewOptimizeClient(endpoint, credential) + + listResp, err := client.ListOptimizeJobs(cmd.Context(), flags.limit, flags.status) + if err != nil { + return fmt.Errorf("failed to list optimization jobs: %w\n\nCheck that the endpoint %q is reachable", err, endpoint) + } + + out := cmd.OutOrStdout() + + if len(listResp.Data) == 0 { + fmt.Fprintln(out, " No optimization jobs found.") + if flags.status != "" { + fmt.Fprintf(out, "\n Try removing the --status filter or run a new job with:\n") + fmt.Fprintf(out, " azd ai agent optimize run --config spec.yaml\n") + } + return nil + } + + printOptimizeListTable(out, listResp.Data) + return nil +} + +func printOptimizeListTable(out io.Writer, jobs []optimize_api.OptimizeJobStatus) { + bold := color.New(color.Bold) + + bold.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", "ID", "Status", "Agent", "Score", "Created") + fmt.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", + strings.Repeat("─", 38), strings.Repeat("─", 12), + strings.Repeat("─", 14), strings.Repeat("─", 7), strings.Repeat("─", 19)) + + for _, job := range jobs { + scoreStr := "—" + if job.Best != nil { + scoreStr = fmt.Sprintf("%.2f", job.Best.AvgScore) + } + + agentName := "—" + if job.Agent != nil && job.Agent.AgentName != "" { + agentName = job.Agent.AgentName + } + + created := job.CreatedAt + if created == "" { + created = "—" + } + + fmt.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", + job.OperationID, + formatOptimizeStatus(job.Status), + truncateString(agentName, 14), + scoreStr, + truncateString(created, 19), + ) + } + fmt.Fprintln(out) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list_test.go new file mode 100644 index 00000000000..4aa5390a9f9 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list_test.go @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOptimizeListCommand_AcceptsLimitAndStatusFlags(t *testing.T) { + cmd := newOptimizeListCommand() + + limitFlag := cmd.Flags().Lookup("limit") + require.NotNil(t, limitFlag, "--limit flag should be registered") + + limitVal, err := cmd.Flags().GetInt("limit") + require.NoError(t, err) + assert.Equal(t, 20, limitVal, "--limit should default to 20") + + statusFlag := cmd.Flags().Lookup("status") + require.NotNil(t, statusFlag, "--status flag should be registered") + + statusVal, err := cmd.Flags().GetString("status") + require.NoError(t, err) + assert.Equal(t, "", statusVal, "--status should default to empty") +} + +func TestOptimizeListCommand_HasConnectionFlags(t *testing.T) { + cmd := newOptimizeListCommand() + + assert.NotNil(t, cmd.Flags().Lookup("endpoint")) + assert.NotNil(t, cmd.Flags().Lookup("project-endpoint")) + + assert.Nil(t, cmd.Flags().Lookup("subscription")) + assert.Nil(t, cmd.Flags().Lookup("resource-group")) + assert.Nil(t, cmd.Flags().Lookup("workspace")) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go new file mode 100644 index 00000000000..680aa7b95d3 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "fmt" + "io" + + "azureaiagent/internal/pkg/agents/optimize_api" + + azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type optimizeStatusFlags struct { + watch bool + pollInterval int + optimizeConnectionFlags +} + +func newOptimizeStatusCommand() *cobra.Command { + flags := &optimizeStatusFlags{} + + cmd := &cobra.Command{ + Use: "status [operation-id]", + Short: "Check the status of an optimization job.", + Long: `Check the status of an optimization job by its operation ID. + +If no operation ID is provided, uses the last optimization job from this project. +Use --watch to poll until the job completes.`, + Example: ` # Check last job status (auto-resolved) + azd ai agent optimize status + + # Check specific job status + azd ai agent optimize status opt_abc123 + + # Watch until complete + azd ai agent optimize status opt_abc123 --watch`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + operationID := "" + if len(args) > 0 { + operationID = args[0] + } else { + operationID = loadLastOptimizeJobID(ctx) + if operationID == "" { + return fmt.Errorf("operation ID is required: provide it as an argument, or run 'azd ai agent optimize' first") + } + fmt.Fprintf(cmd.OutOrStdout(), " Using last job: %s\n\n", operationID) + } + return runOptimizeStatus(cmd, flags, operationID) + }, + } + + cmd.Flags().BoolVar(&flags.watch, "watch", false, "Poll until job completes") + cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") + flags.optimizeConnectionFlags.register(cmd) + + return cmd +} + +func runOptimizeStatus(cmd *cobra.Command, flags *optimizeStatusFlags, operationID string) error { + endpoint, err := flags.resolve(cmd.Context()) + if err != nil { + return err + } + + credential, err := newAgentCredential() + if err != nil { + return err + } + + client := optimize_api.NewOptimizeClient(endpoint, credential) + out := cmd.OutOrStdout() + + status, err := client.GetOptimizeStatus(cmd.Context(), operationID) + if err != nil { + return fmt.Errorf("failed to get job status: %w\n\nCheck that the operation ID %q is correct", err, operationID) + } + + printOptimizeJobSummary(out, status) + + if flags.watch && !optimize_api.IsTerminal(status.Status) { + finalStatus, err := pollOptimizeJob(cmd, client, flags.pollInterval, operationID) + if err != nil { + return err + } + printOptimizeResults(out, finalStatus) + } else if len(status.Candidates) > 0 { + printOptimizeResults(out, status) + } + + if status.Error != nil { + return fmt.Errorf("optimization job failed: %s", status.Error.Message) + } + + return nil +} + +// printOptimizeJobSummary prints a brief summary of an optimization job's state. +func printOptimizeJobSummary(out io.Writer, status *optimize_api.OptimizeJobStatus) { + fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(status.OperationID)) + fmt.Fprintf(out, " Status: %s\n", formatOptimizeStatus(status.Status)) + if status.Agent != nil && status.Agent.AgentName != "" { + fmt.Fprintf(out, " Agent: %s\n", status.Agent.AgentName) + } + if status.AllStrategiesFailed { + fmt.Fprintf(out, " Strategy: %s\n", color.YellowString("failed (baseline only — no candidates generated)")) + } else if status.Progress != nil && status.Progress.CurrentStrategy != "" { + fmt.Fprintf(out, " Strategy: %s\n", status.Progress.CurrentStrategy) + } + if status.Best != nil { + fmt.Fprintf(out, " Best: %.2f\n", status.Best.AvgScore) + } + if status.CreatedAt != "" { + fmt.Fprintf(out, " Created: %s\n", status.CreatedAt) + } + if status.Error != nil { + fmt.Fprintf(out, " Error: %s\n", color.RedString(status.Error.Message)) + } + if len(status.Warnings) > 0 { + for _, w := range status.Warnings { + fmt.Fprintf(out, " Warning: %s\n", color.YellowString(w)) + } + } + fmt.Fprintln(out) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status_test.go new file mode 100644 index 00000000000..7996b6dc7ee --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status_test.go @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOptimizeStatusCommand_AcceptsOptionalPositionalArg(t *testing.T) { + cmd := newOptimizeStatusCommand() + + // Zero args is now OK (uses last job ID) + err := cmd.Args(cmd, []string{}) + assert.NoError(t, err) + + // One arg is OK + err = cmd.Args(cmd, []string{"opt_abc123"}) + assert.NoError(t, err) + + // Two args is rejected + err = cmd.Args(cmd, []string{"opt_abc123", "extra"}) + assert.Error(t, err) +} + +func TestOptimizeStatusCommand_HasWatchFlag(t *testing.T) { + cmd := newOptimizeStatusCommand() + + f := cmd.Flags().Lookup("watch") + require.NotNil(t, f, "--watch flag should be registered") + + watchVal, err := cmd.Flags().GetBool("watch") + require.NoError(t, err) + assert.False(t, watchVal, "--watch should default to false for status") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go new file mode 100644 index 00000000000..21f3c8ef33d --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOptimizeCommand_HasExpectedSubCommands(t *testing.T) { + cmd := newOptimizeCommand(&azdext.ExtensionContext{}) + + expected := []string{"status", "list", "cancel", "deploy"} + var actual []string + for _, sub := range cmd.Commands() { + actual = append(actual, sub.Name()) + } + + for _, name := range expected { + assert.Contains(t, actual, name, "optimize should have sub-command %q", name) + } + assert.NotContains(t, actual, "run", "optimize should not have 'run' sub-command (merged into root)") +} + +func TestOptimizeCommand_AcceptsPositionalArg(t *testing.T) { + cmd := newOptimizeCommand(&azdext.ExtensionContext{}) + + err := cmd.Args(cmd, []string{"my-agent"}) + assert.NoError(t, err) + + err = cmd.Args(cmd, []string{}) + assert.NoError(t, err) + + err = cmd.Args(cmd, []string{"my-agent", "extra"}) + assert.Error(t, err) +} + +func TestOptimizeCommand_AcceptsConfigFlag(t *testing.T) { + cmd := newOptimizeCommand(&azdext.ExtensionContext{}) + + f := cmd.Flags().Lookup("config") + require.NotNil(t, f, "--config flag should be registered") + assert.Equal(t, "c", f.Shorthand, "--config should have -c shorthand") + + assert.NotNil(t, cmd.Flags().Lookup("watch")) + assert.NotNil(t, cmd.Flags().Lookup("poll-interval")) + assert.NotNil(t, cmd.Flags().Lookup("endpoint")) + assert.NotNil(t, cmd.Flags().Lookup("agent")) + assert.NotNil(t, cmd.Flags().Lookup("strategy")) +} + +func TestOptimizeCommand_DefaultFlags(t *testing.T) { + cmd := newOptimizeCommand(&azdext.ExtensionContext{}) + + watchVal, err := cmd.Flags().GetBool("watch") + require.NoError(t, err) + assert.True(t, watchVal, "--watch should default to true") + + pollVal, err := cmd.Flags().GetInt("poll-interval") + require.NoError(t, err) + assert.Equal(t, 5, pollVal, "--poll-interval should default to 5") +} + +func TestIsTerminal_ViaOptimizeAPI(t *testing.T) { + assert.True(t, optimize_api.IsTerminal(optimize_api.StatusCompleted)) + assert.True(t, optimize_api.IsTerminal(optimize_api.StatusFailed)) + assert.True(t, optimize_api.IsTerminal(optimize_api.StatusCancelled)) + assert.False(t, optimize_api.IsTerminal(optimize_api.StatusRunning)) + assert.False(t, optimize_api.IsTerminal(optimize_api.StatusPending)) + assert.False(t, optimize_api.IsTerminal("")) +} + +func TestTruncateString(t *testing.T) { + assert.Equal(t, "abc", truncateString("abc", 10)) + assert.Equal(t, "abcdefg...", truncateString("abcdefghijk", 10)) + assert.Equal(t, "ab", truncateString("abcdef", 2)) +} + +func TestFormatOptimizeStatus(t *testing.T) { + assert.NotEmpty(t, formatOptimizeStatus(optimize_api.StatusCompleted)) + assert.NotEmpty(t, formatOptimizeStatus(optimize_api.StatusFailed)) + assert.NotEmpty(t, formatOptimizeStatus(optimize_api.StatusCancelled)) + assert.NotEmpty(t, formatOptimizeStatus(optimize_api.StatusRunning)) + assert.NotEmpty(t, formatOptimizeStatus("unknown")) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go index d65d1c0b8e5..712bcc92fa7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go @@ -60,6 +60,8 @@ func NewRootCommand() *cobra.Command { rootCmd.AddCommand(newMonitorCommand(extCtx)) rootCmd.AddCommand(newFilesCommand(extCtx)) rootCmd.AddCommand(newSessionCommand(extCtx)) + rootCmd.AddCommand(newEvalCommand(extCtx)) + rootCmd.AddCommand(newOptimizeCommand(extCtx)) return rootCmd } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go index b7122d17d7e..931b9d86730 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go @@ -16,6 +16,7 @@ type AgentKind string const ( AgentKindHosted AgentKind = "hosted" AgentKindWorkflow AgentKind = "workflow" + AgentKindPrompt AgentKind = "prompt" ) // IsValidAgentKind checks if the provided AgentKind is valid @@ -28,6 +29,7 @@ func ValidAgentKinds() []AgentKind { return []AgentKind{ AgentKindHosted, AgentKindWorkflow, + AgentKindPrompt, } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go new file mode 100644 index 00000000000..79aacaf485b --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package dataset_api + +// CreateDatasetRequest is the request body for creating (uploading) a dataset. +type CreateDatasetRequest struct { + Name string `json:"name"` + Version string `json:"version"` + Format string `json:"format"` + Content string `json:"content"` +} + +// Dataset is the response for dataset operations. +type Dataset struct { + Name string `json:"name"` + Version string `json:"version"` + BlobURI string `json:"blob_uri,omitempty"` + Format string `json:"format,omitempty"` + DataURI string `json:"data_uri,omitempty"` + ContentURI string `json:"content_uri,omitempty"` +} + +// ResolvedBlobURI returns the best available blob URI. Prefers blob_uri, +// falls back to data_uri, then content_uri. +func (d *Dataset) ResolvedBlobURI() string { + if d.BlobURI != "" { + return d.BlobURI + } + if d.DataURI != "" { + return d.DataURI + } + return d.ContentURI +} + +// DatasetCredential is the response for dataset credential (SAS token) requests. +type DatasetCredential struct { + BlobURI string `json:"blob_uri,omitempty"` + SAS string `json:"sas,omitempty"` + // SASUri is the full URI with SAS token appended, ready for download. + SASUri string `json:"sas_uri,omitempty"` +} + +// ResolvedDownloadURI returns the URL to download the dataset. +// Prefers sas_uri (complete), falls back to blob_uri + sas query string. +func (c *DatasetCredential) ResolvedDownloadURI() string { + if c.SASUri != "" { + return c.SASUri + } + if c.BlobURI != "" && c.SAS != "" { + return c.BlobURI + "?" + c.SAS + } + return c.BlobURI +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go new file mode 100644 index 00000000000..2f3a083066d --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package dataset_api + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// --------------------------------------------------------------------------- +// Dataset +// --------------------------------------------------------------------------- + +func TestDataset_ResolvedBlobURI(t *testing.T) { + t.Parallel() + tests := []struct { + name string + dataset Dataset + expected string + }{ + { + name: "prefers blob_uri", + dataset: Dataset{BlobURI: "https://blob.example", DataURI: "https://data.example"}, + expected: "https://blob.example", + }, + { + name: "falls back to data_uri", + dataset: Dataset{DataURI: "https://data.example", ContentURI: "https://content.example"}, + expected: "https://data.example", + }, + { + name: "falls back to content_uri", + dataset: Dataset{ContentURI: "https://content.example"}, + expected: "https://content.example", + }, + { + name: "empty when no URI", + dataset: Dataset{Name: "test"}, + expected: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.expected, tc.dataset.ResolvedBlobURI()) + }) + } +} + +// --------------------------------------------------------------------------- +// DatasetCredential +// --------------------------------------------------------------------------- + +func TestDatasetCredential_ResolvedDownloadURI(t *testing.T) { + t.Parallel() + tests := []struct { + name string + cred DatasetCredential + expected string + }{ + { + name: "prefers sas_uri", + cred: DatasetCredential{SASUri: "https://blob.example/data?sig=abc", BlobURI: "https://blob.example/data"}, + expected: "https://blob.example/data?sig=abc", + }, + { + name: "combines blob_uri and sas", + cred: DatasetCredential{BlobURI: "https://blob.example/data", SAS: "sig=abc&se=2025"}, + expected: "https://blob.example/data?sig=abc&se=2025", + }, + { + name: "blob_uri only", + cred: DatasetCredential{BlobURI: "https://blob.example/data"}, + expected: "https://blob.example/data", + }, + { + name: "empty when no fields", + cred: DatasetCredential{}, + expected: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.expected, tc.cred.ResolvedDownloadURI()) + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go new file mode 100644 index 00000000000..9027bfd4ade --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go @@ -0,0 +1,220 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package dataset_api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + + "azureaiagent/internal/version" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming" + "github.com/azure/azure-dev/cli/azd/pkg/azsdk" +) + +// API path prefix for dataset endpoints. +const pathDatasets = "/datasets" + +// DatasetClient provides methods for dataset upload, download, and metadata retrieval. +type DatasetClient struct { + endpoint string + pipeline runtime.Pipeline +} + +// NewDatasetClient creates a new DatasetClient. +func NewDatasetClient(endpoint string, cred azcore.TokenCredential) *DatasetClient { + userAgent := fmt.Sprintf("azd-ext-azure-ai-agents/%s", version.Version) + + clientOptions := &policy.ClientOptions{ + Logging: policy.LogOptions{ + AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, + IncludeBody: true, + }, + PerCallPolicies: []policy.Policy{ + runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), + azsdk.NewMsCorrelationPolicy(), + azsdk.NewUserAgentPolicy(userAgent), + }, + } + + pipeline := runtime.NewPipeline( + "azure-ai-datasets", + "v1.0.0", + runtime.PipelineOptions{}, + clientOptions, + ) + + return &DatasetClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + +// CreateDataset registers a dataset with inline content (upload). +func (c *DatasetClient) CreateDataset( + ctx context.Context, + request *CreateDatasetRequest, + apiVersion string, +) (*Dataset, error) { + return doRequestTyped[Dataset](c, ctx, http.MethodPost, pathDatasets, nil, request, apiVersion) +} + +// GetDataset retrieves metadata for a dataset by name and version. +func (c *DatasetClient) GetDataset( + ctx context.Context, + name string, + version string, + apiVersion string, +) (*Dataset, error) { + path := fmt.Sprintf("%s/%s/versions/%s", pathDatasets, url.PathEscape(name), url.PathEscape(version)) + return doRequestTyped[Dataset](c, ctx, http.MethodGet, path, nil, nil, apiVersion) +} + +// GetDatasetCredential retrieves a SAS credential for downloading a dataset from blob storage. +func (c *DatasetClient) GetDatasetCredential( + ctx context.Context, + name string, + version string, + apiVersion string, +) (*DatasetCredential, error) { + path := fmt.Sprintf( + "%s/%s/versions/%s/credentials", + pathDatasets, url.PathEscape(name), url.PathEscape(version), + ) + return doRequestTyped[DatasetCredential](c, ctx, http.MethodPost, path, nil, nil, apiVersion) +} + +// DownloadDataset downloads dataset content from blob storage using a SAS-authenticated URL. +// Returns the raw content as bytes. The downloadURL should be the full URL with SAS token +// (e.g., from DatasetCredential.ResolvedDownloadURI()). +func (c *DatasetClient) DownloadDataset(ctx context.Context, downloadURL string) ([]byte, error) { + log.Printf("[dataset_api] downloading dataset from blob: %s", downloadURL) + + req, err := runtime.NewRequest(ctx, http.MethodGet, downloadURL) + if err != nil { + return nil, fmt.Errorf("failed to create download request: %w", err) + } + + // Use a plain HTTP client for blob downloads — the SAS token in the URL provides + // authentication, and Azure SDK pipeline policies (bearer token, correlation ID) + // should not be sent to Azure Blob Storage endpoints. + httpClient := &http.Client{} + resp, err := httpClient.Do(req.Raw()) + if err != nil { + return nil, fmt.Errorf("failed to download dataset from blob: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("blob download failed with status %d", resp.StatusCode) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read dataset content: %w", err) + } + + log.Printf("[dataset_api] downloaded %d bytes", len(data)) + return data, nil +} + +// doRequest performs an HTTP request against the dataset API and returns the raw response body. +func (c *DatasetClient) doRequest( + ctx context.Context, + method string, + path string, + query map[string]string, + body any, + apiVersion string, +) ([]byte, error) { + u, err := url.Parse(c.endpoint) + if err != nil { + return nil, fmt.Errorf("invalid endpoint URL: %w", err) + } + + u.Path += path + q := u.Query() + if apiVersion != "" { + q.Set("api-version", apiVersion) + } + for k, v := range query { + q.Set(k, v) + } + u.RawQuery = q.Encode() + + req, err := runtime.NewRequest(ctx, method, u.String()) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + log.Printf("[dataset_api] %s %s", method, u.String()) + + if body != nil { + payload, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + log.Printf("[dataset_api] request body: %s", string(payload)) + if err := req.SetBody(streaming.NopCloser(bytes.NewReader(payload)), "application/json"); err != nil { + return nil, fmt.Errorf("failed to set request body: %w", err) + } + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + log.Printf("[dataset_api] response status: %d", resp.StatusCode) + log.Printf("[dataset_api] response body: %s", string(respBody)) + + if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusCreated, http.StatusAccepted) { + resp.Body = io.NopCloser(bytes.NewReader(respBody)) + return nil, runtime.NewResponseError(resp) + } + + return respBody, nil +} + +// doRequestTyped performs an HTTP request and unmarshals the response into T. +func doRequestTyped[T any]( + c *DatasetClient, + ctx context.Context, + method string, + path string, + query map[string]string, + body any, + apiVersion string, +) (*T, error) { + respBody, err := c.doRequest(ctx, method, path, query, body, apiVersion) + if err != nil { + return nil, err + } + + if len(respBody) == 0 { + return new(T), nil + } + + var result T + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go new file mode 100644 index 00000000000..9223597888d --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package dataset_api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// test helpers +// --------------------------------------------------------------------------- + +type fakeCredential struct{} + +func (f *fakeCredential) GetToken( + _ context.Context, + _ policy.TokenRequestOptions, +) (azcore.AccessToken, error) { + return azcore.AccessToken{Token: "fake-token"}, nil +} + +func newTestClient(t *testing.T, handler http.Handler) (*DatasetClient, *httptest.Server) { + t.Helper() + server := httptest.NewServer(handler) + t.Cleanup(server.Close) + client := NewDatasetClient(server.URL, &fakeCredential{}) + return client, server +} + +func jsonHandler(status int, body map[string]any) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + data, _ := json.Marshal(body) + _, _ = w.Write(data) + } +} + +// --------------------------------------------------------------------------- +// NewDatasetClient +// --------------------------------------------------------------------------- + +func TestNewDatasetClient(t *testing.T) { + t.Parallel() + + client := NewDatasetClient("https://example.ai.azure.com", &fakeCredential{}) + require.NotNil(t, client) + assert.Equal(t, "https://example.ai.azure.com", client.endpoint) +} + +// --------------------------------------------------------------------------- +// CreateDataset +// --------------------------------------------------------------------------- + +func TestCreateDataset_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + data, _ := json.Marshal(map[string]any{"name": "my-ds", "version": "v1"}) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.CreateDataset(t.Context(), &CreateDatasetRequest{ + Name: "my-ds", + Version: "v1", + Format: "jsonl", + Content: `{"input":"hello"}`, + }, "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/datasets", capturedPath) + assert.Equal(t, "my-ds", result.Name) + assert.Equal(t, "v1", result.Version) +} + +// --------------------------------------------------------------------------- +// GetDataset +// --------------------------------------------------------------------------- + +func TestGetDataset_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + data, _ := json.Marshal(map[string]any{ + "name": "golden", + "version": "v2", + "blob_uri": "https://storage.blob.core.windows.net/datasets/golden.jsonl", + }) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetDataset(t.Context(), "golden", "v2", "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/datasets/golden/versions/v2", capturedPath) + assert.Equal(t, "golden", result.Name) + assert.Equal(t, "v2", result.Version) + assert.Equal(t, "https://storage.blob.core.windows.net/datasets/golden.jsonl", result.BlobURI) +} + +// --------------------------------------------------------------------------- +// GetDatasetCredential +// --------------------------------------------------------------------------- + +func TestGetDatasetCredential_Success(t *testing.T) { + t.Parallel() + + var capturedPath, capturedMethod string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + capturedMethod = r.Method + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + data, _ := json.Marshal(map[string]any{ + "blob_uri": "https://storage.blob.core.windows.net/datasets/golden.jsonl", + "sas": "sig=abc&se=2025-12-31", + }) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetDatasetCredential(t.Context(), "golden", "v2", "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/datasets/golden/versions/v2/credentials", capturedPath) + assert.Equal(t, http.MethodPost, capturedMethod) + assert.Equal(t, "https://storage.blob.core.windows.net/datasets/golden.jsonl", result.BlobURI) + assert.Equal(t, "sig=abc&se=2025-12-31", result.SAS) +} + +// --------------------------------------------------------------------------- +// DownloadDataset +// --------------------------------------------------------------------------- + +func TestDownloadDataset_Success(t *testing.T) { + t.Parallel() + + blobContent := `{"input":"hello","expected":"world"}` + "\n" + + `{"input":"foo","expected":"bar"}` + "\n" + + blobServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/octet-stream") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(blobContent)) + })) + t.Cleanup(blobServer.Close) + + client := NewDatasetClient("https://example.ai.azure.com", &fakeCredential{}) + data, err := client.DownloadDataset(t.Context(), blobServer.URL+"/datasets/golden.jsonl?sig=abc") + + require.NoError(t, err) + assert.Equal(t, blobContent, string(data)) +} + +func TestDownloadDataset_Error(t *testing.T) { + t.Parallel() + + blobServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + })) + t.Cleanup(blobServer.Close) + + client := NewDatasetClient("https://example.ai.azure.com", &fakeCredential{}) + _, err := client.DownloadDataset(t.Context(), blobServer.URL+"/datasets/golden.jsonl?sig=expired") + + require.Error(t, err) + assert.Contains(t, err.Error(), "403") +} + +// --------------------------------------------------------------------------- +// Error handling +// --------------------------------------------------------------------------- + +func TestGetDataset_NotFound(t *testing.T) { + t.Parallel() + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error":"not found"}`)) + }) + + client, _ := newTestClient(t, handler) + _, err := client.GetDataset(t.Context(), "missing", "v1", "2025-11-15-preview") + + require.Error(t, err) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go new file mode 100644 index 00000000000..c494bf186f3 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -0,0 +1,202 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "time" + + "azureaiagent/internal/pkg/agents/dataset_api" + "azureaiagent/internal/pkg/agents/opteval" +) + +// foundryDir is the directory under .azure where eval artifacts are stored. +const foundryDir = ".azure/.foundry" + +// ResolveEvalOutputPath resolves the eval output config path. If output is +// already absolute it is returned as-is; otherwise it is joined with the +// agent project directory. +func ResolveEvalOutputPath(output, agentProject string) string { + if filepath.IsAbs(output) { + return output + } + return filepath.Join(agentProject, output) +} + +// ResolveEvalConfigPath resolves the eval config path for reading. Follows the +// same logic as ResolveEvalOutputPath. +func ResolveEvalConfigPath(config, agentProject string) string { + return ResolveEvalOutputPath(config, agentProject) +} + +// EnsureFoundryDirs creates the .azure/.foundry directory tree under the +// project root if it doesn't already exist. +func EnsureFoundryDirs(projectRoot string) error { + dir := filepath.Join(projectRoot, foundryDir) + return os.MkdirAll(dir, 0750) +} + +// SaveDatasetGenerationResult saves the raw JSON result of a dataset generation +// job under .azure/.foundry/datasets/.json. +func SaveDatasetGenerationResult(projectRoot, datasetName string, result json.RawMessage) { + if datasetName == "" || len(result) == 0 { + return + } + dir := filepath.Join(projectRoot, foundryDir, "datasets") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create dataset dir: %v", err) + return + } + path := filepath.Join(dir, datasetName+".json") + if err := os.WriteFile(path, result, 0600); err != nil { + log.Printf("[debug] failed to save dataset result: %v", err) + } +} + +// DownloadDatasetArtifact downloads the dataset referenced by dsRef and saves +// it under .azure/.foundry/datasets/.jsonl. +func DownloadDatasetArtifact( + ctx context.Context, + client *dataset_api.DatasetClient, + projectRoot string, + dsRef *opteval.DatasetRef, + apiVersion string, +) error { + if dsRef == nil || dsRef.Name == "" { + return fmt.Errorf("dataset reference is empty") + } + + ds, err := client.GetDataset(ctx, dsRef.Name, dsRef.Version, apiVersion) + if err != nil { + return fmt.Errorf("failed to get dataset %q: %w", dsRef.Name, err) + } + + cred, err := client.GetDatasetCredential(ctx, dsRef.Name, dsRef.Version, apiVersion) + if err != nil { + return fmt.Errorf("failed to get dataset credential: %w", err) + } + + downloadURL := cred.ResolvedDownloadURI() + if downloadURL == "" { + downloadURL = ds.ResolvedBlobURI() + } + if downloadURL == "" { + return fmt.Errorf("no download URL available for dataset %q", dsRef.Name) + } + + data, err := client.DownloadDataset(ctx, downloadURL) + if err != nil { + return fmt.Errorf("failed to download dataset: %w", err) + } + + dir := filepath.Join(projectRoot, foundryDir, "datasets") + if err := os.MkdirAll(dir, 0750); err != nil { + return fmt.Errorf("failed to create dataset dir: %w", err) + } + + path := filepath.Join(dir, dsRef.Name+".jsonl") + if err := os.WriteFile(path, data, 0600); err != nil { + return fmt.Errorf("failed to write dataset artifact: %w", err) + } + + return nil +} + +// DatasetArtifactPath returns the local path where a downloaded dataset +// artifact is stored. +func DatasetArtifactPath(projectRoot string, dsRef *opteval.DatasetRef) string { + if dsRef == nil || dsRef.Name == "" { + return "" + } + return filepath.Join(projectRoot, foundryDir, "datasets", dsRef.Name+".jsonl") +} + +// SaveEvaluatorResult saves the raw JSON result of an evaluator generation job +// under .azure/.foundry/evaluators/.json. +func SaveEvaluatorResult(projectRoot, evaluatorName string, result json.RawMessage) { + if evaluatorName == "" || len(result) == 0 { + return + } + dir := filepath.Join(projectRoot, foundryDir, "evaluators") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create evaluator dir: %v", err) + return + } + path := filepath.Join(dir, evaluatorName+".json") + if err := os.WriteFile(path, result, 0600); err != nil { + log.Printf("[debug] failed to save evaluator result: %v", err) + } +} + +// WriteEvalReviewArtifacts writes human-readable review artifacts for the eval +// config under .azure/.foundry/review/. +func WriteEvalReviewArtifacts(projectRoot string, cfg *EvalConfig) { + if cfg == nil { + return + } + dir := filepath.Join(projectRoot, foundryDir, "review") + if err := os.MkdirAll(dir, 0750); err != nil { + log.Printf("[debug] failed to create review dir: %v", err) + return + } + data, err := json.MarshalIndent(cfg, "", " ") + if err != nil { + log.Printf("[debug] failed to marshal eval config for review: %v", err) + return + } + path := filepath.Join(dir, "eval-config.json") + if err := os.WriteFile(path, data, 0600); err != nil { + log.Printf("[debug] failed to write review artifact: %v", err) + } +} + +// WriteJSONFile writes a value as indented JSON to the specified path. +func WriteJSONFile(path string, v any) error { + if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + data, err := json.MarshalIndent(v, "", " ") + if err != nil { + return fmt.Errorf("marshalling JSON: %w", err) + } + return os.WriteFile(path, data, 0600) +} + +// FormatTimestamp formats a timestamp value (int64, float64, or string) as a +// human-readable UTC string. +func FormatTimestamp(ts any) string { + switch v := ts.(type) { + case int64: + if v == 0 { + return "" + } + return time.Unix(v, 0).UTC().Format("2006-01-02 15:04:05 UTC") + case float64: + if v == 0 { + return "" + } + return time.Unix(int64(v), 0).UTC().Format("2006-01-02 15:04:05 UTC") + case int: + if v == 0 { + return "" + } + return time.Unix(int64(v), 0).UTC().Format("2006-01-02 15:04:05 UTC") + case string: + if v == "" { + return "" + } + t, err := time.Parse(time.RFC3339, v) + if err != nil { + return v + } + return t.UTC().Format("2006-01-02 15:04:05 UTC") + default: + return "" + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go new file mode 100644 index 00000000000..55d6c2abef9 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -0,0 +1,143 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "fmt" + "os" + "path/filepath" + + "azureaiagent/internal/pkg/agents/opteval" + + "go.yaml.in/yaml/v3" +) + +// EvalConfig extends the shared Config with eval-specific fields and helpers. +type EvalConfig struct { + opteval.Config `yaml:",inline"` + + // Options holds run-time options (eval_model, etc.). + Options *opteval.Options `yaml:"options,omitempty"` + + // GenerationInstruction is the prompt used to generate adaptive evaluators + // and synthetic eval datasets. + GenerationInstruction string `yaml:"generation_instruction,omitempty"` + + // MaxSamples is the maximum number of data samples to generate. + MaxSamples int `yaml:"max_samples,omitempty"` + + // TraceDays is the number of days of agent traces to include (0 = none). + TraceDays int `yaml:"trace_days,omitempty"` +} + +// LoadEvalConfig reads and parses a YAML eval config file. +func LoadEvalConfig(path string) (*EvalConfig, error) { + data, err := os.ReadFile(path) //nolint:gosec // path is provided by user for local config + if err != nil { + return nil, fmt.Errorf("failed to read eval config %q: %w", path, err) + } + + var cfg EvalConfig + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse eval config %q: %w", path, err) + } + + return &cfg, nil +} + +// WriteEvalConfig writes the eval config to a YAML file. +func WriteEvalConfig(path string, cfg *EvalConfig) error { + if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { + return fmt.Errorf("creating config directory: %w", err) + } + + data, err := yaml.Marshal(cfg) + if err != nil { + return fmt.Errorf("failed to marshal eval config: %w", err) + } + + if err := os.WriteFile(path, data, 0600); err != nil { + return fmt.Errorf("failed to write eval config %q: %w", path, err) + } + + return nil +} + +// Validate checks required fields for the eval command. +func (c *EvalConfig) Validate() error { + if c.Agent.Name == "" { + return fmt.Errorf("agent.name is required") + } + + hasFile := c.DatasetFile != "" + hasRef := c.DatasetReference != nil + + if hasFile && hasRef { + return fmt.Errorf("dataset_file and dataset_reference are mutually exclusive; specify one, not both") + } + + if !hasFile && !hasRef { + return fmt.Errorf("one of dataset_file or dataset_reference is required") + } + + return nil +} + +// ToAgentTargetAdaptableEvalGroupRequest builds the request body for creating an OpenAI eval +// with agent target completions and adaptable evaluator schema. +func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalRequest { + request := &CreateOpenAIEvalRequest{ + Name: c.Name, + Metadata: map[string]string{ + "azd_agent": c.Agent.Name, + "azd_agent_version": c.Agent.Version, + }, + DataSourceConfig: &DataSourceConfig{ + Type: "custom", + ItemSchema: map[string]any{}, + IncludeSampleSchema: true, + Schema: &DataSourceSchema{ + Item: map[string]any{ + "type": "object", + "properties": map[string]any{ + "query": map[string]any{"type": "string"}, + }, + }, + Sample: map[string]any{ + "type": "object", + "properties": map[string]any{ + "output_text": map[string]any{"type": "string"}, + }, + }, + }, + }, + } + + // Build testing_criteria from evaluators. + evalModel := "" + if c.Options != nil { + evalModel = c.Options.EvalModel + } + for _, evaluator := range c.Evaluators { + criterion := TestingCriterion{ + Type: "azure_ai_evaluator", + Name: evaluator, + EvaluatorName: evaluator, + DataMapping: map[string]string{ + "messages": "{{item.messages}}", + "query": "{{item.query}}", + "response": "{{sample.output_text}}", + }, + } + if evalModel != "" { + criterion.InitializationParameters = map[string]string{ + "model": evalModel, + "deployment_name": evalModel, + } + } + request.TestingCriteria = append(request.TestingCriteria, criterion) + } + + return request +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go new file mode 100644 index 00000000000..6fbe3d43d33 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -0,0 +1,269 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// Validate +// --------------------------------------------------------------------------- + +func TestValidate_RequiresAgentName(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{}, + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + }, + } + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "agent.name is required") +} + +func TestValidate_RequiresDataset(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent-1"}, + }, + } + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "dataset_file or dataset_reference is required") +} + +func TestValidate_MutuallyExclusiveDataset(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent-1"}, + DatasetFile: "tasks.jsonl", + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + }, + } + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") +} + +func TestValidate_ValidWithDatasetFile(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent-1"}, + DatasetFile: "tasks.jsonl", + }, + } + assert.NoError(t, cfg.Validate()) +} + +func TestValidate_ValidWithDatasetReference(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent-1"}, + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + }, + } + assert.NoError(t, cfg.Validate()) +} + +// --------------------------------------------------------------------------- +// LoadEvalConfig / WriteEvalConfig round-trip +// --------------------------------------------------------------------------- + +func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "eval.yaml") + + original := &EvalConfig{ + Config: opteval.Config{ + Name: "full-test", + Agent: opteval.AgentRef{ + Name: "booking-agent", + Kind: "hosted", + Version: "v3", + Model: "gpt-4.1", + }, + DatasetReference: &opteval.DatasetRef{Name: "golden-data", Version: "v2"}, + Evaluators: []string{"builtin.task_adherence", "custom-quality"}, + }, + Options: &opteval.Options{ + EvalModel: "gpt-4o", + }, + GenerationInstruction: "This agent handles restaurant reservations", + MaxSamples: 75, + } + + require.NoError(t, WriteEvalConfig(path, original)) + loaded, err := LoadEvalConfig(path) + require.NoError(t, err) + + assert.Equal(t, "full-test", loaded.Name) + assert.Equal(t, "booking-agent", loaded.Agent.Name) + assert.Equal(t, agent_yaml.AgentKind("hosted"), loaded.Agent.Kind) + assert.Equal(t, "v3", loaded.Agent.Version) + assert.Equal(t, "gpt-4.1", loaded.Agent.Model) + require.NotNil(t, loaded.DatasetReference) + assert.Equal(t, "golden-data", loaded.DatasetReference.Name) + assert.Equal(t, "v2", loaded.DatasetReference.Version) + require.Len(t, loaded.Evaluators, 2) + assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0]) + assert.Equal(t, "custom-quality", loaded.Evaluators[1]) + assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) + assert.Equal(t, "This agent handles restaurant reservations", loaded.GenerationInstruction) + assert.Equal(t, 75, loaded.MaxSamples) +} + +func TestEvalConfig_RoundTrip_MinimalFields(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "eval.yaml") + + original := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "simple-agent"}, + DatasetFile: "data.jsonl", + }, + } + + require.NoError(t, WriteEvalConfig(path, original)) + loaded, err := LoadEvalConfig(path) + require.NoError(t, err) + + assert.Equal(t, "simple-agent", loaded.Agent.Name) + assert.Equal(t, "data.jsonl", loaded.DatasetFile) + assert.Nil(t, loaded.DatasetReference) + assert.Empty(t, loaded.Evaluators) + assert.Empty(t, loaded.GenerationInstruction) + assert.Zero(t, loaded.MaxSamples) +} + +func TestLoadEvalConfig_MissingFile(t *testing.T) { + t.Parallel() + _, err := LoadEvalConfig("/nonexistent/path/eval.yaml") + assert.Error(t, err) +} + +func TestLoadEvalConfig_InvalidYAML(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "bad.yaml") + require.NoError(t, os.WriteFile(path, []byte("{{invalid yaml}}"), 0600)) + _, err := LoadEvalConfig(path) + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse") +} + +func TestWriteEvalConfig_CreatesDirectory(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "subdir", "nested", "eval.yaml") + + cfg := &EvalConfig{ + Config: opteval.Config{ + Agent: opteval.AgentRef{Name: "agent-1"}, + }, + } + + require.NoError(t, WriteEvalConfig(path, cfg)) + assert.FileExists(t, path) +} + +// --------------------------------------------------------------------------- +// ToAgentTargetAdaptableEvalGroupRequest +// --------------------------------------------------------------------------- + +func TestToAgentTargetAdaptableEvalGroupRequest_WithEvaluators(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Name: "test-eval", + Agent: opteval.AgentRef{Name: "agent-1", Version: "v1"}, + Evaluators: []string{"builtin.quality", "custom-1"}, + DatasetFile: "tasks.jsonl", + }, + Options: &opteval.Options{EvalModel: "gpt-4o"}, + } + + req := cfg.ToAgentTargetAdaptableEvalGroupRequest() + + assert.Equal(t, "test-eval", req.Name) + assert.Equal(t, "agent-1", req.Metadata["azd_agent"]) + assert.Equal(t, "v1", req.Metadata["azd_agent_version"]) + require.NotNil(t, req.DataSourceConfig) + assert.Equal(t, "custom", req.DataSourceConfig.Type) + require.Len(t, req.TestingCriteria, 2) + assert.Equal(t, "azure_ai_evaluator", req.TestingCriteria[0].Type) + assert.Equal(t, "builtin.quality", req.TestingCriteria[0].EvaluatorName) + assert.Equal(t, "gpt-4o", req.TestingCriteria[0].InitializationParameters["model"]) + assert.Equal(t, "{{item.messages}}", req.TestingCriteria[0].DataMapping["messages"]) + assert.Equal(t, "custom-1", req.TestingCriteria[1].EvaluatorName) +} + +func TestToAgentTargetAdaptableEvalGroupRequest_WithDatasetReference(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Name: "ref-eval", + Agent: opteval.AgentRef{Name: "agent-1"}, + DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + }, + } + + req := cfg.ToAgentTargetAdaptableEvalGroupRequest() + // DataSourceConfig is always set with the custom schema. + require.NotNil(t, req.DataSourceConfig) + assert.Equal(t, "custom", req.DataSourceConfig.Type) +} + +func TestToAgentTargetAdaptableEvalGroupRequest_NoEvaluators(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Name: "test-eval", + Agent: opteval.AgentRef{Name: "agent-1"}, + DatasetFile: "tasks.jsonl", + }, + } + + req := cfg.ToAgentTargetAdaptableEvalGroupRequest() + assert.Empty(t, req.TestingCriteria) +} + +func TestToAgentTargetAdaptableEvalGroupRequest_MetadataFields(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opteval.Config{ + Name: "meta-test", + Agent: opteval.AgentRef{Name: "my-agent", Version: "v5"}, + DatasetFile: "tasks.jsonl", + }, + } + + req := cfg.ToAgentTargetAdaptableEvalGroupRequest() + assert.Equal(t, "my-agent", req.Metadata["azd_agent"]) + assert.Equal(t, "v5", req.Metadata["azd_agent_version"]) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go new file mode 100644 index 00000000000..d678cbd614d --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -0,0 +1,146 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "path/filepath" + "strings" + "time" +) + +// --------------------------------------------------------------------------- +// Generation source building +// --------------------------------------------------------------------------- + +// TraceOptions holds optional trace inclusion parameters for generation sources. +type TraceOptions struct { + Days int +} + +// BuildGenerationSources constructs the sources array for generation jobs. +// For prompt agents (agentKind == "prompt"), only the agent source is included. +// For other agent kinds, a prompt source is included when instruction is +// non-empty, along with the agent source. When traces is non-nil and Days > 0, +// a traces source is appended with start_time computed from the current time. +func BuildGenerationSources(agentKind, agentName, version, instruction string, traces *TraceOptions) []GenerationSource { + var sources []GenerationSource + + if agentKind != "prompt" && instruction != "" { + sources = append(sources, GenerationSource{ + Type: "prompt", + Prompt: instruction, + }) + } + + agentSource := GenerationSource{ + Type: "agent", + AgentName: agentName, + } + if version != "" { + agentSource.AgentVersion = version + } + sources = append(sources, agentSource) + + if traces != nil && traces.Days > 0 { + startTime := time.Now().AddDate(0, 0, -traces.Days).Unix() + sources = append(sources, GenerationSource{ + Type: "traces", + AgentName: agentName, + StartTime: startTime, + }) + } + + return sources +} + +// --------------------------------------------------------------------------- +// Request builders +// --------------------------------------------------------------------------- + +// NewDataGenerationJobRequest builds a DataGenerationJobRequest from the +// provided parameters. When sources contain a "traces" entry, the generation +// type is set to "traces"; otherwise it defaults to "simple_qna". +func NewDataGenerationJobRequest( + name, evalModel string, + maxSamples int, + sources []GenerationSource, +) *DataGenerationJobRequest { + genType := "simple_qna" + for _, s := range sources { + if s.Type == "traces" { + genType = "traces" + break + } + } + return &DataGenerationJobRequest{ + Inputs: DataGenerationInputs{ + Name: name, + Scenario: "evaluation", + Options: DataGenerationOptions{ + Type: genType, + MaxSamples: maxSamples, + ModelOptions: ModelOptions{ + Model: evalModel, + }, + }, + Sources: sources, + }, + } +} + +// NewEvaluatorGenerationJobRequest builds an EvaluatorGenerationJobRequest +// from the provided parameters. +func NewEvaluatorGenerationJobRequest( + name, evalModel string, + sources []GenerationSource, +) *EvaluatorGenerationJobRequest { + return &EvaluatorGenerationJobRequest{ + Name: name, + EvaluatorName: name, + Category: "quality", + Model: evalModel, + Sources: sources, + } +} + +// --------------------------------------------------------------------------- +// Evaluator classification +// --------------------------------------------------------------------------- + +// IsBuiltinEvaluator returns true when the evaluator name has the "builtin." +// prefix. +func IsBuiltinEvaluator(name string) bool { + return strings.HasPrefix(name, "builtin.") +} + +// SplitEvaluators partitions evaluators into generated (non-builtin) and +// built-in lists. +func SplitEvaluators(evaluators []string) (generated, builtin []string) { + for _, e := range evaluators { + if IsBuiltinEvaluator(e) { + builtin = append(builtin, e) + } else { + generated = append(generated, e) + } + } + return generated, builtin +} + +// --------------------------------------------------------------------------- +// Dataset name detection +// --------------------------------------------------------------------------- + +// IsDatasetName returns true when the value looks like a registered dataset +// name rather than a local file path. A name has no path separators and no +// common data-file extension (.jsonl, .json, .csv). +func IsDatasetName(value string) bool { + if value == "" { + return false + } + if strings.ContainsAny(value, "/\\") { + return false + } + ext := strings.ToLower(filepath.Ext(value)) + return ext != ".jsonl" && ext != ".json" && ext != ".csv" +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go new file mode 100644 index 00000000000..1fe67341922 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// BuildGenerationSources +// --------------------------------------------------------------------------- + +func TestBuildGenerationSources_HostedWithInstruction(t *testing.T) { + t.Parallel() + sources := BuildGenerationSources("hosted", "my-agent", "v2", "Test interactions", nil) + require.Len(t, sources, 2) + assert.Equal(t, "prompt", sources[0].Type) + assert.Equal(t, "Test interactions", sources[0].Prompt) + assert.Equal(t, "agent", sources[1].Type) + assert.Equal(t, "my-agent", sources[1].AgentName) + assert.Equal(t, "v2", sources[1].AgentVersion) +} + +func TestBuildGenerationSources_PromptAgentOnly(t *testing.T) { + t.Parallel() + sources := BuildGenerationSources("prompt", "prompt-agent", "v1", "ignored", nil) + require.Len(t, sources, 1) + assert.Equal(t, "agent", sources[0].Type) + assert.Equal(t, "prompt-agent", sources[0].AgentName) + assert.Equal(t, "v1", sources[0].AgentVersion) +} + +func TestBuildGenerationSources_NoVersion(t *testing.T) { + t.Parallel() + sources := BuildGenerationSources("prompt", "agent", "", "", nil) + require.Len(t, sources, 1) + assert.Empty(t, sources[0].AgentVersion) +} + +func TestBuildGenerationSources_HostedNoInstruction(t *testing.T) { + t.Parallel() + sources := BuildGenerationSources("hosted", "agent", "v1", "", nil) + require.Len(t, sources, 1) + assert.Equal(t, "agent", sources[0].Type) +} + +// --------------------------------------------------------------------------- +// NewDataGenerationJobRequest +// --------------------------------------------------------------------------- + +func TestNewDataGenerationJobRequest(t *testing.T) { + t.Parallel() + sources := []GenerationSource{{Type: "agent", AgentName: "a1"}} + req := NewDataGenerationJobRequest("eval-suite", "gpt-4o", 50, sources) + assert.Equal(t, "eval-suite", req.Inputs.Name) + assert.Equal(t, "evaluation", req.Inputs.Scenario) + assert.Equal(t, "simple_qna", req.Inputs.Options.Type) + assert.Equal(t, 50, req.Inputs.Options.MaxSamples) + assert.Equal(t, "gpt-4o", req.Inputs.Options.ModelOptions.Model) + require.Len(t, req.Inputs.Sources, 1) +} + +// --------------------------------------------------------------------------- +// NewEvaluatorGenerationJobRequest +// --------------------------------------------------------------------------- + +func TestNewEvaluatorGenerationJobRequest(t *testing.T) { + t.Parallel() + sources := []GenerationSource{{Type: "agent", AgentName: "a1"}} + req := NewEvaluatorGenerationJobRequest("eval-suite", "gpt-4o", sources) + assert.Equal(t, "eval-suite", req.Name) + assert.Equal(t, "eval-suite", req.EvaluatorName) + assert.Equal(t, "quality", req.Category) + assert.Equal(t, "gpt-4o", req.Model) + require.Len(t, req.Sources, 1) +} + +// --------------------------------------------------------------------------- +// IsBuiltinEvaluator +// --------------------------------------------------------------------------- + +func TestIsBuiltinEvaluator(t *testing.T) { + t.Parallel() + assert.True(t, IsBuiltinEvaluator("builtin.task_adherence")) + assert.True(t, IsBuiltinEvaluator("builtin.")) + assert.False(t, IsBuiltinEvaluator("my-quality")) + assert.False(t, IsBuiltinEvaluator("")) + assert.False(t, IsBuiltinEvaluator("builtins.quality")) +} + +// --------------------------------------------------------------------------- +// SplitEvaluators +// --------------------------------------------------------------------------- + +func TestSplitEvaluators(t *testing.T) { + t.Parallel() + + t.Run("mixed", func(t *testing.T) { + t.Parallel() + gen, bi := SplitEvaluators([]string{"builtin.task_adherence", "my-quality", "builtin.safety"}) + assert.Equal(t, []string{"my-quality"}, gen) + assert.Equal(t, []string{"builtin.task_adherence", "builtin.safety"}, bi) + }) + + t.Run("all builtin", func(t *testing.T) { + t.Parallel() + gen, bi := SplitEvaluators([]string{"builtin.quality", "builtin.safety"}) + assert.Nil(t, gen) + assert.Equal(t, []string{"builtin.quality", "builtin.safety"}, bi) + }) + + t.Run("nil", func(t *testing.T) { + t.Parallel() + gen, bi := SplitEvaluators(nil) + assert.Nil(t, gen) + assert.Nil(t, bi) + }) +} + +// --------------------------------------------------------------------------- +// IsDatasetName +// --------------------------------------------------------------------------- + +func TestIsDatasetName(t *testing.T) { + t.Parallel() + assert.True(t, IsDatasetName("eval-data-2026")) + assert.True(t, IsDatasetName("my-dataset.v2")) + assert.False(t, IsDatasetName("golden.jsonl")) + assert.False(t, IsDatasetName("data.json")) + assert.False(t, IsDatasetName("results.csv")) + assert.False(t, IsDatasetName("./tests/golden.jsonl")) + assert.False(t, IsDatasetName("")) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go new file mode 100644 index 00000000000..ec2bb142fb0 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -0,0 +1,368 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import "encoding/json" + +// --------------------------------------------------------------------------- +// Data Generation Jobs +// --------------------------------------------------------------------------- + +// DataGenerationJobRequest is the request body for CreateDataGenerationJob. +type DataGenerationJobRequest struct { + Inputs DataGenerationInputs `json:"inputs"` +} + +// DataGenerationInputs holds the inputs for a data generation job. +type DataGenerationInputs struct { + Name string `json:"name"` + Scenario string `json:"scenario"` + Options DataGenerationOptions `json:"options"` + Sources []GenerationSource `json:"sources"` +} + +// DataGenerationOptions holds configuration for data generation. +type DataGenerationOptions struct { + Type string `json:"type"` + MaxSamples int `json:"max_samples"` + ModelOptions ModelOptions `json:"model_options"` +} + +// ModelOptions holds the model selection for generation. +type ModelOptions struct { + Model string `json:"model"` +} + +// GenerationSource describes a source used for dataset or evaluator generation. +type GenerationSource struct { + Type string `json:"type"` + Prompt string `json:"prompt,omitempty"` + AgentName string `json:"agent_name,omitempty"` + AgentVersion string `json:"agent_version,omitempty"` + StartTime int64 `json:"start_time,omitempty"` +} + +// GenerationJob is the response for data and evaluator generation job operations. +type GenerationJob struct { + ID string `json:"id"` + Status string `json:"status"` + Result json.RawMessage `json:"result,omitempty"` + DatasetName string `json:"dataset_name,omitempty"` + DatasetVersion string `json:"dataset_version,omitempty"` + EvaluatorName string `json:"evaluator_name,omitempty"` + Name string `json:"name,omitempty"` + Version string `json:"version,omitempty"` +} + +// OperationID returns the job's operation identifier. +func (j *GenerationJob) OperationID() string { + return j.ID +} + +// NormalizedStatus returns the lowercase status, defaulting to "running". +func (j *GenerationJob) NormalizedStatus() string { + if j.Status == "" { + return "running" + } + return j.Status +} + +// ResolvedDatasetName returns dataset_name falling back to result.name, then name. +func (j *GenerationJob) ResolvedDatasetName() string { + if j.DatasetName != "" { + return j.DatasetName + } + if name := j.resultStringField("name"); name != "" { + return name + } + return j.Name +} + +// ResolvedDatasetVersion returns dataset_version falling back to result.version, then version. +func (j *GenerationJob) ResolvedDatasetVersion() string { + if j.DatasetVersion != "" { + return j.DatasetVersion + } + if v := j.resultStringField("version"); v != "" { + return v + } + if j.Version != "" { + return j.Version + } + return "v1" +} + +// ResolvedEvaluatorName returns evaluator_name falling back to result.name, then name. +func (j *GenerationJob) ResolvedEvaluatorName() string { + if j.EvaluatorName != "" { + return j.EvaluatorName + } + if name := j.resultStringField("name"); name != "" { + return name + } + return j.Name +} + +// resultStringField extracts a string field from the raw Result JSON. +func (j *GenerationJob) resultStringField(key string) string { + if len(j.Result) == 0 { + return "" + } + var m map[string]json.RawMessage + if err := json.Unmarshal(j.Result, &m); err != nil { + return "" + } + raw, ok := m[key] + if !ok { + return "" + } + var s string + if err := json.Unmarshal(raw, &s); err != nil { + return "" + } + return s +} + +// --------------------------------------------------------------------------- +// Evaluator Generation Jobs +// --------------------------------------------------------------------------- + +// EvaluatorGenerationJobRequest is the request body for CreateEvaluatorGenerationJob. +type EvaluatorGenerationJobRequest struct { + Name string `json:"name"` + EvaluatorName string `json:"evaluator_name"` + Category string `json:"category"` + Model string `json:"model"` + Sources []GenerationSource `json:"sources"` +} + +// --------------------------------------------------------------------------- +// Datasets +// --------------------------------------------------------------------------- + +// CreateDatasetRequest is the request body for CreateDataset. +type CreateDatasetRequest struct { + Name string `json:"name"` + Version string `json:"version"` + Format string `json:"format"` + Content string `json:"content"` +} + +// Dataset is the response for dataset operations. +type Dataset struct { + Name string `json:"name"` + Version string `json:"version"` +} + +// --------------------------------------------------------------------------- +// OpenAI Evals +// --------------------------------------------------------------------------- + +// DataSourceConfig describes the data source for an OpenAI eval. +type DataSourceConfig struct { + Type string `json:"type"` + ItemSchema map[string]any `json:"item_schema"` + IncludeSampleSchema bool `json:"include_sample_schema"` + Schema *DataSourceSchema `json:"schema,omitempty"` +} + +// DataSourceSchema defines the item and sample schemas for an eval data source. +type DataSourceSchema struct { + Item map[string]any `json:"item,omitempty"` + Sample map[string]any `json:"sample,omitempty"` +} + +// TestingCriterion describes a single evaluator in testing_criteria. +type TestingCriterion struct { + Type string `json:"type"` + Name string `json:"name"` + EvaluatorName string `json:"evaluator_name"` + InitializationParameters map[string]string `json:"initialization_parameters,omitempty"` + DataMapping map[string]string `json:"data_mapping,omitempty"` +} + +// CreateOpenAIEvalRequest is the request body for CreateOpenAIEval. +type CreateOpenAIEvalRequest struct { + Name string `json:"name"` + Metadata map[string]string `json:"metadata,omitempty"` + DataSourceConfig *DataSourceConfig `json:"data_source_config,omitempty"` + TestingCriteria []TestingCriterion `json:"testing_criteria,omitempty"` +} + +// OpenAIEval is the response for an OpenAI eval definition. +type OpenAIEval struct { + ID string `json:"id"` + Name string `json:"name,omitempty"` + CreatedAt any `json:"created_at,omitempty"` + ModifiedAt any `json:"modified_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// ResolvedID returns the eval's ID, falling back to name. +func (e *OpenAIEval) ResolvedID() string { + if e.ID != "" { + return e.ID + } + return e.Name +} + +// OpenAIEvalList is the response for listing OpenAI eval definitions. +type OpenAIEvalList struct { + Data []OpenAIEval `json:"data"` +} + +// --------------------------------------------------------------------------- +// OpenAI Eval Runs +// --------------------------------------------------------------------------- + +// CreateOpenAIEvalRunRequest is the request body for CreateOpenAIEvalRun. +type CreateOpenAIEvalRunRequest struct { + Name string `json:"name"` + DataSource *EvalRunDataSource `json:"data_source,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// EvalRunDataSourceType defines the type for an eval run data source. +type EvalRunDataSourceType = string + +const ( + // EvalRunDataSourceTypeAgentTarget is the data source type for agent target completions. + EvalRunDataSourceTypeAgentTarget EvalRunDataSourceType = "azure_ai_target_completions" +) + +// EvalRunDataContentType defines the source type for eval run data content. +type EvalRunDataContentType = string + +const ( + EvalRunDataContentTypeFileContent EvalRunDataContentType = "file_content" + EvalRunDataContentTypeFileID EvalRunDataContentType = "file_id" +) + +// EvalRunDataSource describes the data source for an eval run with agent target completions. +type EvalRunDataSource struct { + Type EvalRunDataSourceType `json:"type"` + InputMessages *EvalRunInputMessages `json:"input_messages,omitempty"` + Source *EvalRunDataContent `json:"source,omitempty"` + Target *EvalRunTarget `json:"target,omitempty"` +} + +// EvalRunInputMessages describes how input messages are constructed from dataset items. +type EvalRunInputMessages struct { + Type string `json:"type"` + Template []EvalRunMessageTemplate `json:"template"` +} + +// EvalRunMessageTemplate describes a single message in the input template. +type EvalRunMessageTemplate struct { + Role string `json:"role"` + Content *EvalRunMessageContent `json:"content"` + Type string `json:"type"` +} + +// EvalRunMessageContent describes the content of a template message. +type EvalRunMessageContent struct { + Type string `json:"type"` + Text string `json:"text"` +} + +// EvalRunTarget describes the agent target for completions. +type EvalRunTarget struct { + Type string `json:"type"` + Name string `json:"name"` + Version *string `json:"version"` + ToolDescriptions []string `json:"tool_descriptions"` +} + +// EvalRunDataContent holds the source reference within an EvalRunDataSource. +type EvalRunDataContent struct { + Type EvalRunDataContentType `json:"type"` + ID string `json:"id,omitempty"` + Content []map[string]any `json:"content,omitempty"` +} + +// NewAgentTargetDataSource builds an EvalRunDataSource configured for agent target completions. +// The source field must be set separately via SetFileContent or SetFileID. +func NewAgentTargetDataSource(agentName string, agentVersion *string) *EvalRunDataSource { + return &EvalRunDataSource{ + Type: EvalRunDataSourceTypeAgentTarget, + InputMessages: &EvalRunInputMessages{ + Type: "template", + Template: []EvalRunMessageTemplate{ + { + Role: "user", + Content: &EvalRunMessageContent{ + Type: "input_text", + Text: "{{item.query}}", + }, + Type: "message", + }, + }, + }, + Target: &EvalRunTarget{ + Type: "azure_ai_agent", + Name: agentName, + Version: agentVersion, + ToolDescriptions: []string{}, + }, + } +} + +// SetFileContent sets the data source to use inline file content. +func (ds *EvalRunDataSource) SetFileContent(items []map[string]any) { + ds.Source = &EvalRunDataContent{ + Type: EvalRunDataContentTypeFileContent, + Content: items, + } +} + +// SetFileID sets the data source to reference a remote dataset by ID. +func (ds *EvalRunDataSource) SetFileID(fileID string) { + ds.Source = &EvalRunDataContent{ + Type: EvalRunDataContentTypeFileID, + ID: fileID, + } +} + +// OpenAIEvalRun is the response for an OpenAI eval run. +type OpenAIEvalRun struct { + ID string `json:"id"` + EvalID string `json:"eval_id,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status,omitempty"` + CreatedAt any `json:"created_at,omitempty"` + ModifiedAt any `json:"modified_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + DataSource *EvalRunDataSource `json:"data_source,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + ReportURL string `json:"report_url,omitempty"` + + // Result summary + ResultCounts *EvalRunResultCounts `json:"result_counts,omitempty"` + PerTestingCriteria []EvalRunCriteriaResult `json:"per_testing_criteria_results,omitempty"` + Error any `json:"error,omitempty"` +} + +// EvalRunResultCounts holds pass/fail/error/skip counts for a run. +type EvalRunResultCounts struct { + Total int `json:"total"` + Passed int `json:"passed"` + Failed int `json:"failed"` + Errored int `json:"errored"` + Skipped int `json:"skipped"` +} + +// EvalRunCriteriaResult holds per-testing-criteria pass/fail counts. +type EvalRunCriteriaResult struct { + TestingCriteria string `json:"testing_criteria"` + Passed int `json:"passed"` + Failed int `json:"failed"` + Errored int `json:"errored"` + Skipped int `json:"skipped"` +} + +// OpenAIEvalRunList is the response for listing OpenAI eval runs. +type OpenAIEvalRunList struct { + Data []OpenAIEvalRun `json:"data"` +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go new file mode 100644 index 00000000000..e51ab769177 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + "strconv" + + "azureaiagent/internal/version" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming" + "github.com/azure/azure-dev/cli/azd/pkg/azsdk" +) + +// API path prefixes for eval service endpoints. +const ( + pathDataGenerationJobs = "/data_generation_jobs" + pathEvaluatorGenerationJobs = "/evaluator_generation_jobs" + pathDatasets = "/datasets" + pathOpenAIEvals = "/openai/evals" +) + +// EvalClient provides methods for interacting with the Azure AI eval APIs. +type EvalClient struct { + endpoint string + pipeline runtime.Pipeline +} + +// NewEvalClient creates a new EvalClient. +func NewEvalClient(endpoint string, cred azcore.TokenCredential) *EvalClient { + userAgent := fmt.Sprintf("azd-ext-azure-ai-agents/%s", version.Version) + + clientOptions := &policy.ClientOptions{ + Logging: policy.LogOptions{ + AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, + IncludeBody: true, + }, + PerCallPolicies: []policy.Policy{ + runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), + azsdk.NewMsCorrelationPolicy(), + azsdk.NewUserAgentPolicy(userAgent), + }, + } + + pipeline := runtime.NewPipeline( + "azure-ai-evals", + "v1.0.0", + runtime.PipelineOptions{}, + clientOptions, + ) + + return &EvalClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + +// CreateDataGenerationJob starts a dataset generation job for eval onboarding. +func (c *EvalClient) CreateDataGenerationJob( + ctx context.Context, + request *DataGenerationJobRequest, + apiVersion string, +) (*GenerationJob, error) { + return doRequestTyped[GenerationJob](c, ctx, http.MethodPost, pathDataGenerationJobs, nil, request, apiVersion) +} + +// GetDataGenerationJob gets the current state of a dataset generation job. +func (c *EvalClient) GetDataGenerationJob( + ctx context.Context, + operationID string, + apiVersion string, +) (*GenerationJob, error) { + path := pathDataGenerationJobs + "/" + url.PathEscape(operationID) + return doRequestTyped[GenerationJob](c, ctx, http.MethodGet, path, nil, nil, apiVersion) +} + +// CreateEvaluatorGenerationJob starts an evaluator generation job for eval onboarding. +func (c *EvalClient) CreateEvaluatorGenerationJob( + ctx context.Context, + request *EvaluatorGenerationJobRequest, + apiVersion string, +) (*GenerationJob, error) { + return doRequestTyped[GenerationJob](c, ctx, http.MethodPost, pathEvaluatorGenerationJobs, nil, request, apiVersion) +} + +// GetEvaluatorGenerationJob gets the current state of an evaluator generation job. +func (c *EvalClient) GetEvaluatorGenerationJob( + ctx context.Context, + operationID string, + apiVersion string, +) (*GenerationJob, error) { + path := pathEvaluatorGenerationJobs + "/" + url.PathEscape(operationID) + return doRequestTyped[GenerationJob](c, ctx, http.MethodGet, path, nil, nil, apiVersion) +} + +// CreateOpenAIEval creates an OpenAI eval definition. +func (c *EvalClient) CreateOpenAIEval( + ctx context.Context, + request *CreateOpenAIEvalRequest, + apiVersion string, +) (*OpenAIEval, error) { + return doRequestTyped[OpenAIEval](c, ctx, http.MethodPost, pathOpenAIEvals, nil, request, apiVersion) +} + +// ListOpenAIEvals lists OpenAI eval definitions. +func (c *EvalClient) ListOpenAIEvals(ctx context.Context, limit int, apiVersion string) (*OpenAIEvalList, error) { + query := map[string]string{} + if limit > 0 { + query["limit"] = strconv.Itoa(limit) + } + + return doRequestTyped[OpenAIEvalList](c, ctx, http.MethodGet, pathOpenAIEvals, query, nil, apiVersion) +} + +// GetOpenAIEval gets an OpenAI eval definition. +func (c *EvalClient) GetOpenAIEval(ctx context.Context, evalID string, apiVersion string) (*OpenAIEval, error) { + path := pathOpenAIEvals + "/" + url.PathEscape(evalID) + return doRequestTyped[OpenAIEval](c, ctx, http.MethodGet, path, nil, nil, apiVersion) +} + +// CreateOpenAIEvalRun starts a run for an OpenAI eval definition. +func (c *EvalClient) CreateOpenAIEvalRun( + ctx context.Context, + evalID string, + request *CreateOpenAIEvalRunRequest, + apiVersion string, +) (*OpenAIEvalRun, error) { + path := fmt.Sprintf("%s/%s/runs", pathOpenAIEvals, url.PathEscape(evalID)) + return doRequestTyped[OpenAIEvalRun](c, ctx, http.MethodPost, path, nil, request, apiVersion) +} + +// ListOpenAIEvalRuns lists runs for an OpenAI eval definition. +func (c *EvalClient) ListOpenAIEvalRuns( + ctx context.Context, + evalID string, + limit int, + apiVersion string, +) (*OpenAIEvalRunList, error) { + query := map[string]string{} + if limit > 0 { + query["limit"] = strconv.Itoa(limit) + } + + path := fmt.Sprintf("%s/%s/runs", pathOpenAIEvals, url.PathEscape(evalID)) + return doRequestTyped[OpenAIEvalRunList](c, ctx, http.MethodGet, path, query, nil, apiVersion) +} + +// GetOpenAIEvalRun gets a run for an OpenAI eval definition. +func (c *EvalClient) GetOpenAIEvalRun( + ctx context.Context, + evalID string, + runID string, + apiVersion string, +) (*OpenAIEvalRun, error) { + path := fmt.Sprintf("%s/%s/runs/%s", pathOpenAIEvals, url.PathEscape(evalID), url.PathEscape(runID)) + return doRequestTyped[OpenAIEvalRun](c, ctx, http.MethodGet, path, nil, nil, apiVersion) +} + +func (c *EvalClient) doRequest( + ctx context.Context, + method string, + path string, + query map[string]string, + body any, + apiVersion string, +) ([]byte, error) { + u, err := url.Parse(c.endpoint) + if err != nil { + return nil, fmt.Errorf("invalid endpoint URL: %w", err) + } + + u.Path += path + q := u.Query() + if apiVersion != "" { + q.Set("api-version", apiVersion) + } + for k, v := range query { + q.Set(k, v) + } + u.RawQuery = q.Encode() + + req, err := runtime.NewRequest(ctx, method, u.String()) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + log.Printf("[eval_api] %s %s", method, u.String()) + + if body != nil { + payload, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + log.Printf("[eval_api] request body: %s", string(payload)) + if err := req.SetBody(streaming.NopCloser(bytes.NewReader(payload)), "application/json"); err != nil { + return nil, fmt.Errorf("failed to set request body: %w", err) + } + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + log.Printf("[eval_api] response status: %d", resp.StatusCode) + log.Printf("[eval_api] response body: %s", string(respBody)) + + if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusCreated, http.StatusAccepted) { + // Restore the body so runtime.NewResponseError can read it. + resp.Body = io.NopCloser(bytes.NewReader(respBody)) + return nil, runtime.NewResponseError(resp) + } + + return respBody, nil +} + +// doRequestTyped performs an HTTP request and unmarshals the response into T. +func doRequestTyped[T any]( + c *EvalClient, + ctx context.Context, + method string, + path string, + query map[string]string, + body any, + apiVersion string, +) (*T, error) { + respBody, err := c.doRequest(ctx, method, path, query, body, apiVersion) + if err != nil { + return nil, err + } + + if len(respBody) == 0 { + return new(T), nil + } + + var result T + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go new file mode 100644 index 00000000000..c912f77b7fa --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go @@ -0,0 +1,443 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// test helpers +// --------------------------------------------------------------------------- + +// fakeCredential satisfies azcore.TokenCredential for tests without real auth. +type fakeCredential struct{} + +func (f *fakeCredential) GetToken( + _ context.Context, + _ policy.TokenRequestOptions, +) (azcore.AccessToken, error) { + return azcore.AccessToken{Token: "fake-token"}, nil +} + +// newTestClient creates an EvalClient pointed at a test HTTP server. +func newTestClient(t *testing.T, handler http.Handler) (*EvalClient, *httptest.Server) { + t.Helper() + server := httptest.NewServer(handler) + t.Cleanup(server.Close) + client := NewEvalClient(server.URL, &fakeCredential{}) + return client, server +} + +// jsonHandler returns an http.HandlerFunc that responds with the given body and status. +func jsonHandler(status int, body map[string]any) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + data, _ := json.Marshal(body) + _, _ = w.Write(data) + } +} + +// --------------------------------------------------------------------------- +// NewEvalClient +// --------------------------------------------------------------------------- + +func TestNewEvalClient(t *testing.T) { + t.Parallel() + + client := NewEvalClient("https://example.ai.azure.com", &fakeCredential{}) + require.NotNil(t, client) + assert.Equal(t, "https://example.ai.azure.com", client.endpoint) +} + +// --------------------------------------------------------------------------- +// CreateDataGenerationJob +// --------------------------------------------------------------------------- + +func TestCreateDataGenerationJob_Success(t *testing.T) { + t.Parallel() + + var capturedPath, capturedAPIVersion string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + capturedAPIVersion = r.URL.Query().Get("api-version") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + resp := map[string]any{"id": "op-123", "status": "running"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.CreateDataGenerationJob(t.Context(), &DataGenerationJobRequest{ + Inputs: DataGenerationInputs{ + Name: "test", + Scenario: "evaluation", + }, + }, "v1") + + require.NoError(t, err) + assert.Equal(t, "/data_generation_jobs", capturedPath) + assert.Equal(t, "v1", capturedAPIVersion) + assert.Equal(t, "op-123", result.ID) + assert.Equal(t, "running", result.Status) +} + +// --------------------------------------------------------------------------- +// GetDataGenerationJob +// --------------------------------------------------------------------------- + +func TestGetDataGenerationJob_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{"id": "op-123", "status": "completed", "dataset_name": "test-ds"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetDataGenerationJob(t.Context(), "op-123", "v1") + + require.NoError(t, err) + assert.Equal(t, "/data_generation_jobs/op-123", capturedPath) + assert.Equal(t, "completed", result.Status) + assert.Equal(t, "test-ds", result.DatasetName) +} + +// --------------------------------------------------------------------------- +// CreateEvaluatorGenerationJob +// --------------------------------------------------------------------------- + +func TestCreateEvaluatorGenerationJob_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + resp := map[string]any{"id": "eval-op-456", "status": "running"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.CreateEvaluatorGenerationJob( + t.Context(), &EvaluatorGenerationJobRequest{Name: "my-eval"}, "2025-11-15-preview", + ) + + require.NoError(t, err) + assert.Equal(t, "/evaluator_generation_jobs", capturedPath) + assert.Equal(t, "eval-op-456", result.ID) +} + +// --------------------------------------------------------------------------- +// GetEvaluatorGenerationJob +// --------------------------------------------------------------------------- + +func TestGetEvaluatorGenerationJob_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{"id": "eval-op-456", "status": "completed", "evaluator_name": "quality"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetEvaluatorGenerationJob(t.Context(), "eval-op-456", "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/evaluator_generation_jobs/eval-op-456", capturedPath) + assert.Equal(t, "completed", result.Status) + assert.Equal(t, "quality", result.EvaluatorName) +} + +// --------------------------------------------------------------------------- +// CreateOpenAIEval +// --------------------------------------------------------------------------- + +func TestCreateOpenAIEval_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + resp := map[string]any{"id": "eval-001", "name": "smoke-core"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.CreateOpenAIEval( + t.Context(), &CreateOpenAIEvalRequest{Name: "smoke-core"}, "2025-11-15-preview", + ) + + require.NoError(t, err) + assert.Equal(t, "/openai/evals", capturedPath) + assert.Equal(t, "eval-001", result.ID) +} + +// --------------------------------------------------------------------------- +// ListOpenAIEvals +// --------------------------------------------------------------------------- + +func TestListOpenAIEvals_Success(t *testing.T) { + t.Parallel() + + var capturedLimit string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedLimit = r.URL.Query().Get("limit") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{ + "data": []any{ + map[string]any{"id": "eval-1"}, + map[string]any{"id": "eval-2"}, + }, + } + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.ListOpenAIEvals(t.Context(), 10, "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "10", capturedLimit) + assert.Len(t, result.Data, 2) +} + +func TestListOpenAIEvals_ZeroLimit(t *testing.T) { + t.Parallel() + + var hasLimitParam bool + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + hasLimitParam = r.URL.Query().Has("limit") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"data":[]}`)) + }) + + client, _ := newTestClient(t, handler) + _, err := client.ListOpenAIEvals(t.Context(), 0, "2025-11-15-preview") + + require.NoError(t, err) + assert.False(t, hasLimitParam, "limit should not be set when 0") +} + +// --------------------------------------------------------------------------- +// GetOpenAIEval +// --------------------------------------------------------------------------- + +func TestGetOpenAIEval_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{"id": "eval-001", "name": "smoke-core", "metadata": map[string]string{"azd_agent": "agent-1"}} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetOpenAIEval(t.Context(), "eval-001", "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/openai/evals/eval-001", capturedPath) + assert.Equal(t, "smoke-core", result.Name) +} + +// --------------------------------------------------------------------------- +// CreateOpenAIEvalRun +// --------------------------------------------------------------------------- + +func TestCreateOpenAIEvalRun_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + resp := map[string]any{"id": "run-001", "status": "running"} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.CreateOpenAIEvalRun( + t.Context(), "eval-001", &CreateOpenAIEvalRunRequest{ + Metadata: map[string]string{"agent": "a"}, + }, "2025-11-15-preview", + ) + + require.NoError(t, err) + assert.Equal(t, "/openai/evals/eval-001/runs", capturedPath) + assert.Equal(t, "run-001", result.ID) +} + +// --------------------------------------------------------------------------- +// ListOpenAIEvalRuns +// --------------------------------------------------------------------------- + +func TestListOpenAIEvalRuns_Success(t *testing.T) { + t.Parallel() + + var capturedPath, capturedLimit string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + capturedLimit = r.URL.Query().Get("limit") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{"data": []any{map[string]any{"id": "run-1"}}} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.ListOpenAIEvalRuns(t.Context(), "eval-001", 5, "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/openai/evals/eval-001/runs", capturedPath) + assert.Equal(t, "5", capturedLimit) + assert.Len(t, result.Data, 1) +} + +// --------------------------------------------------------------------------- +// GetOpenAIEvalRun +// --------------------------------------------------------------------------- + +func TestGetOpenAIEvalRun_Success(t *testing.T) { + t.Parallel() + + var capturedPath string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedPath = r.URL.Path + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{"id": "run-001", "status": "completed", "score": 0.92} + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + }) + + client, _ := newTestClient(t, handler) + result, err := client.GetOpenAIEvalRun(t.Context(), "eval-001", "run-001", "2025-11-15-preview") + + require.NoError(t, err) + assert.Equal(t, "/openai/evals/eval-001/runs/run-001", capturedPath) + assert.Equal(t, "completed", result.Status) +} + +// --------------------------------------------------------------------------- +// Error handling +// --------------------------------------------------------------------------- + +func TestDoRequest_ServerError(t *testing.T) { + t.Parallel() + + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + }) + + client, _ := newTestClient(t, handler) + _, err := client.CreateOpenAIEval(t.Context(), &CreateOpenAIEvalRequest{}, "2025-11-15-preview") + assert.Error(t, err) +} + +func TestDoRequest_EmptyBody(t *testing.T) { + t.Parallel() + + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + client, _ := newTestClient(t, handler) + result, err := client.ListOpenAIEvals(t.Context(), 0, "2025-11-15-preview") + require.NoError(t, err) + assert.Empty(t, result.Data) +} + +func TestDoRequest_APIVersionInQuery(t *testing.T) { + t.Parallel() + + var capturedAPIVersion string + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedAPIVersion = r.URL.Query().Get("api-version") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + }) + + client, _ := newTestClient(t, handler) + _, err := client.GetOpenAIEval(t.Context(), "eval-1", "2025-11-15-preview") + require.NoError(t, err) + assert.Equal(t, "2025-11-15-preview", capturedAPIVersion) +} + +func TestDoRequest_RequestBodySent(t *testing.T) { + t.Parallel() + + var capturedBody map[string]any + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + _ = json.NewDecoder(r.Body).Decode(&capturedBody) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{"id":"ok"}`)) + }) + + client, _ := newTestClient(t, handler) + req := &DataGenerationJobRequest{ + Inputs: DataGenerationInputs{ + Name: "test-eval", + Scenario: "evaluation", + }, + } + _, err := client.CreateDataGenerationJob(t.Context(), req, "v1") + + require.NoError(t, err) + require.NotNil(t, capturedBody) + inputs, ok := capturedBody["inputs"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "test-eval", inputs["name"]) + assert.Equal(t, "evaluation", inputs["scenario"]) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go new file mode 100644 index 00000000000..1c33582ecae --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go @@ -0,0 +1,186 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "context" + "fmt" + "log" + "strings" + "time" +) + +// --------------------------------------------------------------------------- +// JobStatus — typed status with terminal/failed semantics +// --------------------------------------------------------------------------- + +// JobStatus represents the normalized status of a generation job. +type JobStatus string + +const ( + JobStatusRunning JobStatus = "running" + JobStatusCompleted JobStatus = "completed" + JobStatusSucceeded JobStatus = "succeeded" + JobStatusFailed JobStatus = "failed" + JobStatusCancelled JobStatus = "cancelled" + JobStatusCanceled JobStatus = "canceled" +) + +// ParseJobStatus normalizes a raw status string into a JobStatus. +// An empty string is treated as "running". +func ParseJobStatus(s string) JobStatus { + if s == "" { + return JobStatusRunning + } + return JobStatus(strings.ToLower(s)) +} + +// IsTerminal returns true when the status represents a final state. +func (s JobStatus) IsTerminal() bool { + switch s { + case JobStatusCompleted, JobStatusSucceeded, JobStatusFailed, JobStatusCancelled, JobStatusCanceled: + return true + } + return false +} + +// IsFailed returns true when the status represents a failure or cancellation. +func (s JobStatus) IsFailed() bool { + switch s { + case JobStatusFailed, JobStatusCancelled, JobStatusCanceled: + return true + } + return false +} + +// String returns the status as a plain string. +func (s JobStatus) String() string { + return string(s) +} + +// --------------------------------------------------------------------------- +// JobFailedError — returned when a polled job reaches a failed state +// --------------------------------------------------------------------------- + +// JobFailedError is returned when a generation job reaches a failed terminal state. +type JobFailedError struct { + Job *GenerationJob + Status JobStatus +} + +func (e *JobFailedError) Error() string { + return fmt.Sprintf("job failed with status %q", e.Status) +} + +// --------------------------------------------------------------------------- +// PollerTimeoutError — returned when polling exhausts all attempts +// --------------------------------------------------------------------------- + +// PollerTimeoutError is returned when a generation job has not reached a +// terminal state within the configured number of polling attempts. +type PollerTimeoutError struct { + OperationID string + Attempts int +} + +func (e *PollerTimeoutError) Error() string { + return fmt.Sprintf( + "operation %s did not complete within %d attempts", + e.OperationID, e.Attempts, + ) +} + +// --------------------------------------------------------------------------- +// GetJobFunc — callback type for fetching job state +// --------------------------------------------------------------------------- + +// GetJobFunc fetches the current state of a generation job by operation ID. +type GetJobFunc func(ctx context.Context, operationID, apiVersion string) (*GenerationJob, error) + +// --------------------------------------------------------------------------- +// PollerOptions — configurable polling behavior +// --------------------------------------------------------------------------- + +// PollerOptions configures the polling interval and attempt limit. +type PollerOptions struct { + Interval time.Duration + MaxAttempts int +} + +// DefaultPollerOptions returns sensible defaults: 2 s interval, 300 attempts (~10 min). +func DefaultPollerOptions() PollerOptions { + return PollerOptions{ + Interval: 2 * time.Second, + MaxAttempts: 300, + } +} + +// --------------------------------------------------------------------------- +// Poller — polls a generation job until it reaches a terminal state +// --------------------------------------------------------------------------- + +// Poller polls a GenerationJob until it reaches a terminal status. +type Poller struct { + OperationID string + APIVersion string + GetJob GetJobFunc + Options PollerOptions + // OnPoll is called after each successful poll with the latest status. + // Callers can use this for progress reporting (e.g. debug logging). + OnPoll func(status JobStatus) +} + +// NewPoller creates a Poller with default options. +func NewPoller(operationID, apiVersion string, getJob GetJobFunc) *Poller { + return &Poller{ + OperationID: operationID, + APIVersion: apiVersion, + GetJob: getJob, + Options: DefaultPollerOptions(), + } +} + +// Poll blocks until the job reaches a terminal state, the context is +// cancelled, or the maximum number of attempts is exhausted. +// +// On success it returns the completed GenerationJob. +// On failure it returns a *JobFailedError (which wraps the job for inspection). +// On timeout it returns a plain error. +func (p *Poller) Poll(ctx context.Context) (*GenerationJob, error) { + if p.OperationID == "" { + return nil, fmt.Errorf("operation ID is empty") + } + + for range p.Options.MaxAttempts { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(p.Options.Interval): + } + + job, err := p.GetJob(ctx, p.OperationID, p.APIVersion) + if err != nil { + return nil, err + } + + status := ParseJobStatus(job.Status) + log.Printf("[poller] operationID=%s status=%s", p.OperationID, status) + + if p.OnPoll != nil { + p.OnPoll(status) + } + + if status.IsTerminal() { + if status.IsFailed() { + return nil, &JobFailedError{Job: job, Status: status} + } + return job, nil + } + } + + return nil, &PollerTimeoutError{ + OperationID: p.OperationID, + Attempts: p.Options.MaxAttempts, + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller_test.go new file mode 100644 index 00000000000..88afdb0f935 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller_test.go @@ -0,0 +1,228 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// JobStatus +// --------------------------------------------------------------------------- + +func TestParseJobStatus(t *testing.T) { + t.Parallel() + + assert.Equal(t, JobStatusRunning, ParseJobStatus("")) + assert.Equal(t, JobStatusCompleted, ParseJobStatus("completed")) + assert.Equal(t, JobStatusCompleted, ParseJobStatus("Completed")) + assert.Equal(t, JobStatusFailed, ParseJobStatus("Failed")) + assert.Equal(t, JobStatus("pending"), ParseJobStatus("pending")) +} + +func TestJobStatus_IsTerminal(t *testing.T) { + t.Parallel() + + tests := []struct { + status string + terminal bool + }{ + {"completed", true}, + {"Completed", true}, + {"succeeded", true}, + {"failed", true}, + {"cancelled", true}, + {"canceled", true}, + {"running", false}, + {"pending", false}, + {"", false}, + } + for _, tt := range tests { + t.Run(tt.status, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.terminal, ParseJobStatus(tt.status).IsTerminal()) + }) + } +} + +func TestJobStatus_IsFailed(t *testing.T) { + t.Parallel() + + tests := []struct { + status string + failed bool + }{ + {"failed", true}, + {"Failed", true}, + {"cancelled", true}, + {"canceled", true}, + {"completed", false}, + {"succeeded", false}, + {"running", false}, + } + for _, tt := range tests { + t.Run(tt.status, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.failed, ParseJobStatus(tt.status).IsFailed()) + }) + } +} + +// --------------------------------------------------------------------------- +// Poller +// --------------------------------------------------------------------------- + +func TestPoller_EmptyOperationID(t *testing.T) { + t.Parallel() + + p := NewPoller("", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + return nil, nil + }) + _, err := p.Poll(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "operation ID is empty") +} + +func TestPoller_CompletedImmediately(t *testing.T) { + t.Parallel() + + calls := 0 + p := NewPoller("op-1", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + calls++ + return &GenerationJob{ID: id, Status: "completed"}, nil + }) + p.Options.Interval = time.Millisecond + + job, err := p.Poll(t.Context()) + require.NoError(t, err) + assert.Equal(t, "op-1", job.ID) + assert.Equal(t, 1, calls) +} + +func TestPoller_SucceededAfterPending(t *testing.T) { + t.Parallel() + + calls := 0 + p := NewPoller("op-2", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + calls++ + if calls < 3 { + return &GenerationJob{ID: id, Status: "running"}, nil + } + return &GenerationJob{ID: id, Status: "succeeded"}, nil + }) + p.Options.Interval = time.Millisecond + + job, err := p.Poll(t.Context()) + require.NoError(t, err) + assert.Equal(t, "succeeded", job.Status) + assert.Equal(t, 3, calls) +} + +func TestPoller_FailedReturnsJobFailedError(t *testing.T) { + t.Parallel() + + p := NewPoller("op-3", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + return &GenerationJob{ID: id, Status: "failed"}, nil + }) + p.Options.Interval = time.Millisecond + + _, err := p.Poll(t.Context()) + require.Error(t, err) + + var jfe *JobFailedError + require.True(t, errors.As(err, &jfe)) + assert.Equal(t, JobStatusFailed, jfe.Status) + assert.Equal(t, "op-3", jfe.Job.ID) +} + +func TestPoller_APIError(t *testing.T) { + t.Parallel() + + p := NewPoller("op-4", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + return nil, fmt.Errorf("network error") + }) + p.Options.Interval = time.Millisecond + + _, err := p.Poll(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "network error") +} + +func TestPoller_MaxAttemptsExhausted(t *testing.T) { + t.Parallel() + + p := NewPoller("op-5", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + return &GenerationJob{ID: id, Status: "running"}, nil + }) + p.Options.Interval = time.Millisecond + p.Options.MaxAttempts = 3 + + _, err := p.Poll(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "did not complete") + timeoutErr, ok := errors.AsType[*PollerTimeoutError](err) + require.True(t, ok) + assert.Equal(t, "op-5", timeoutErr.OperationID) + assert.Equal(t, 3, timeoutErr.Attempts) +} + +func TestPoller_ContextCancelled(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(t.Context()) + cancel() // cancel immediately + + p := NewPoller("op-6", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + return &GenerationJob{ID: id, Status: "running"}, nil + }) + p.Options.Interval = time.Millisecond + + _, err := p.Poll(ctx) + require.Error(t, err) + assert.ErrorIs(t, err, context.Canceled) +} + +func TestPoller_OnPollCallback(t *testing.T) { + t.Parallel() + + calls := 0 + var observed []JobStatus + + p := NewPoller("op-7", "v1", func(ctx context.Context, id, ver string) (*GenerationJob, error) { + calls++ + if calls < 2 { + return &GenerationJob{ID: id, Status: "running"}, nil + } + return &GenerationJob{ID: id, Status: "completed"}, nil + }) + p.Options.Interval = time.Millisecond + p.OnPoll = func(status JobStatus) { + observed = append(observed, status) + } + + _, err := p.Poll(t.Context()) + require.NoError(t, err) + assert.Equal(t, []JobStatus{JobStatusRunning, JobStatusCompleted}, observed) +} + +// --------------------------------------------------------------------------- +// JobFailedError +// --------------------------------------------------------------------------- + +func TestJobFailedError_Error(t *testing.T) { + t.Parallel() + + e := &JobFailedError{ + Job: &GenerationJob{ID: "op-1"}, + Status: JobStatusFailed, + } + assert.Contains(t, e.Error(), "failed") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go new file mode 100644 index 00000000000..eb59a1268e1 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package opteval + +import ( + "fmt" + "os" + "path/filepath" + "slices" + + "azureaiagent/internal/pkg/agents/agent_yaml" + + "go.yaml.in/yaml/v3" +) + +// Config is the shared YAML configuration for eval and optimize commands. +// +// Contains fields common to both commands. Optimize-specific fields +// (Criteria, ValidationReference, etc) live in +// the OptimizeConfig wrapper in the cmd package. +// +// Runtime state (operation IDs, eval IDs, status) is stored in +// the azd environment rather than in this config file. +type Config struct { + Name string `yaml:"name,omitempty"` + Agent AgentRef `yaml:"agent"` + DatasetFile string `yaml:"dataset_file,omitempty"` + DatasetReference *DatasetRef `yaml:"dataset_reference,omitempty"` + Evaluators []string `yaml:"evaluators,omitempty"` +} + +// AgentRef references the agent under evaluation/optimization. +type AgentRef struct { + Name string `yaml:"name"` + Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` + Version string `yaml:"version,omitempty"` + Model string `yaml:"model,omitempty"` +} + +// DatasetRef references a named/versioned dataset. +type DatasetRef struct { + Name string `yaml:"name"` + Version string `yaml:"version,omitempty"` +} + +// Options holds run-time options for eval and optimize. +// Eval only uses EvalModel; optimize uses all fields. +type Options struct { + EvalModel string `yaml:"eval_model,omitempty"` + Mode string `yaml:"mode,omitempty"` + Strategies []string `yaml:"strategies,omitempty"` + Budget int `yaml:"budget,omitempty"` + MaxIterations int `yaml:"max_iterations,omitempty"` + MinImprovement float64 `yaml:"min_improvement,omitempty"` + ImprovementThreshold float64 `yaml:"improvement_threshold,omitempty"` + PassThreshold float64 `yaml:"pass_threshold,omitempty"` + KeepVersions bool `yaml:"keep_versions,omitempty"` + TasksPerIteration int `yaml:"tasks_per_iteration,omitempty"` + ReflectionModel string `yaml:"reflection_model,omitempty"` +} + +// DefaultStrategies are the default optimization strategies. +var DefaultStrategies = []string{"instruction", "skill", "agents-optimization-job"} + +// UnmarshalYAML populates default strategies when the field is absent in YAML. +func (o *Options) UnmarshalYAML(value *yaml.Node) error { + // Alias avoids infinite recursion. + type raw Options + if err := value.Decode((*raw)(o)); err != nil { + return err + } + if len(o.Strategies) == 0 { + o.Strategies = slices.Clone(DefaultStrategies) + o.MaxIterations = 2 + } + return nil +} + +// Read reads a YAML config file (eval or optimize format). +func Read(path string) (*Config, error) { + data, err := os.ReadFile(path) //nolint:gosec // path is provided by user for local config + if err != nil { + return nil, fmt.Errorf("failed to read config %q: %w", path, err) + } + + var cfg Config + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse config %q: %w", path, err) + } + + return &cfg, nil +} + +// Write writes a YAML config file. +func Write(path string, cfg *Config) error { + if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { + return fmt.Errorf("creating config directory: %w", err) + } + + data, err := yaml.Marshal(cfg) + if err != nil { + return fmt.Errorf("failed to marshal config: %w", err) + } + + return os.WriteFile(path, data, 0600) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go new file mode 100644 index 00000000000..a97f1dfd0d0 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go @@ -0,0 +1,165 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package opteval + +import ( + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/agent_yaml" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.yaml.in/yaml/v3" +) + +// --------------------------------------------------------------------------- +// Config Read / Write round-trip +// --------------------------------------------------------------------------- + +func TestConfig_RoundTrip(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + + original := &Config{ + Name: "test-config", + Agent: AgentRef{ + Name: "my-agent", + Kind: agent_yaml.AgentKindHosted, + Version: "v1", + Model: "gpt-4o", + }, + DatasetFile: "tasks.jsonl", + Evaluators: []string{"builtin.quality", "custom-1"}, + } + + require.NoError(t, Write(path, original)) + loaded, err := Read(path) + require.NoError(t, err) + + assert.Equal(t, "test-config", loaded.Name) + assert.Equal(t, "my-agent", loaded.Agent.Name) + assert.Equal(t, agent_yaml.AgentKindHosted, loaded.Agent.Kind) + assert.Equal(t, "v1", loaded.Agent.Version) + assert.Equal(t, "gpt-4o", loaded.Agent.Model) + assert.Equal(t, "tasks.jsonl", loaded.DatasetFile) + require.Len(t, loaded.Evaluators, 2) + assert.Equal(t, "builtin.quality", loaded.Evaluators[0]) + assert.Equal(t, "custom-1", loaded.Evaluators[1]) +} + +func TestConfig_RoundTrip_DatasetReference(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + + original := &Config{ + Agent: AgentRef{Name: "a1"}, + DatasetReference: &DatasetRef{Name: "golden", Version: "v2"}, + } + + require.NoError(t, Write(path, original)) + loaded, err := Read(path) + require.NoError(t, err) + + require.NotNil(t, loaded.DatasetReference) + assert.Equal(t, "golden", loaded.DatasetReference.Name) + assert.Equal(t, "v2", loaded.DatasetReference.Version) + assert.Empty(t, loaded.DatasetFile) +} + +func TestRead_MissingFile(t *testing.T) { + t.Parallel() + _, err := Read("/nonexistent/config.yaml") + assert.Error(t, err) +} + +func TestWrite_CreatesDirectory(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "sub", "nested", "config.yaml") + + cfg := &Config{Agent: AgentRef{Name: "a1"}} + require.NoError(t, Write(path, cfg)) + assert.FileExists(t, path) +} + +// --------------------------------------------------------------------------- +// AgentRef fields +// --------------------------------------------------------------------------- + +func TestAgentRef_YAMLFields(t *testing.T) { + t.Parallel() + + input := ` +name: test-agent +kind: prompt +version: v5 +model: gpt-4.1 +` + var ref AgentRef + require.NoError(t, yaml.Unmarshal([]byte(input), &ref)) + + assert.Equal(t, "test-agent", ref.Name) + assert.Equal(t, agent_yaml.AgentKindPrompt, ref.Kind) + assert.Equal(t, "v5", ref.Version) + assert.Equal(t, "gpt-4.1", ref.Model) +} + +// --------------------------------------------------------------------------- +// DatasetRef fields +// --------------------------------------------------------------------------- + +func TestDatasetRef_YAMLFields(t *testing.T) { + t.Parallel() + + input := ` +name: golden-data +version: v3 +` + var ref DatasetRef + require.NoError(t, yaml.Unmarshal([]byte(input), &ref)) + + assert.Equal(t, "golden-data", ref.Name) + assert.Equal(t, "v3", ref.Version) +} + +// --------------------------------------------------------------------------- +// Options fields +// --------------------------------------------------------------------------- + +func TestOptions_YAMLFields(t *testing.T) { + t.Parallel() + + input := ` +eval_model: gpt-4.1 +mode: full +strategies: + - prompt + - tool +budget: 500 +max_iterations: 10 +min_improvement: 0.05 +improvement_threshold: 0.1 +pass_threshold: 0.8 +keep_versions: true +tasks_per_iteration: 20 +reflection_model: gpt-4o +` + var opts Options + require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) + + assert.Equal(t, "gpt-4.1", opts.EvalModel) + assert.Equal(t, "full", opts.Mode) + assert.Equal(t, []string{"prompt", "tool"}, opts.Strategies) + assert.Equal(t, 500, opts.Budget) + assert.Equal(t, 10, opts.MaxIterations) + assert.InDelta(t, 0.05, opts.MinImprovement, 0.001) + assert.InDelta(t, 0.1, opts.ImprovementThreshold, 0.001) + assert.InDelta(t, 0.8, opts.PassThreshold, 0.001) + assert.True(t, opts.KeepVersions) + assert.Equal(t, 20, opts.TasksPerIteration) + assert.Equal(t, "gpt-4o", opts.ReflectionModel) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go new file mode 100644 index 00000000000..14f1b67a1e6 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -0,0 +1,362 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + netURL "net/url" + + "azureaiagent/internal/version" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming" + "github.com/azure/azure-dev/cli/azd/pkg/azsdk" +) + +// OptimizeClient provides methods for interacting with the Agents Optimization API. +type OptimizeClient struct { + endpoint string + pipeline runtime.Pipeline +} + +// NewOptimizeClient creates a new OptimizeClient with the given endpoint and credential. +func NewOptimizeClient(endpoint string, cred azcore.TokenCredential) *OptimizeClient { + userAgent := fmt.Sprintf("azd-ext-azure-ai-agents/%s", version.Version) + + clientOptions := &policy.ClientOptions{ + Logging: policy.LogOptions{ + AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, + IncludeBody: true, + }, + PerCallPolicies: []policy.Policy{ + runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), + azsdk.NewMsCorrelationPolicy(), + azsdk.NewUserAgentPolicy(userAgent), + }, + } + + pipeline := runtime.NewPipeline( + "agents-optimization", + "v1.0.0", + runtime.PipelineOptions{}, + clientOptions, + ) + + return &OptimizeClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + +// NewOptimizeClientFromPipeline creates an OptimizeClient with a pre-built pipeline. +// This is intended for tests that need to bypass auth policies. +func NewOptimizeClientFromPipeline(endpoint string, pipeline runtime.Pipeline) *OptimizeClient { + return &OptimizeClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + +// StartOptimize submits a new optimization job. +func (c *OptimizeClient) StartOptimize( + ctx context.Context, + optimizeReq *OptimizeRequest, +) (*OptimizeResponse, error) { + url := fmt.Sprintf("%s/optimize?api-version=v1", c.endpoint) + + payload, err := json.Marshal(optimizeReq) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := runtime.NewRequest(ctx, http.MethodPost, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + if err := req.SetBody(streaming.NopCloser(bytes.NewReader(payload)), "application/json"); err != nil { + return nil, fmt.Errorf("failed to set request body: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusAccepted) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var result OptimizeResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} + +// GetOptimizeStatus retrieves the status of an optimization job. +func (c *OptimizeClient) GetOptimizeStatus( + ctx context.Context, + operationID string, +) (*OptimizeJobStatus, error) { + url := fmt.Sprintf("%s/optimize/%s?api-version=v1", c.endpoint, operationID) + + req, err := runtime.NewRequest(ctx, http.MethodGet, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var result OptimizeJobStatus + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} + +// ListOptimizeJobs lists optimization jobs with optional filtering. +func (c *OptimizeClient) ListOptimizeJobs( + ctx context.Context, + limit int, + status string, +) (*OptimizeListResponse, error) { + url := fmt.Sprintf("%s/optimize?api-version=v1&limit=%d", c.endpoint, limit) + if status != "" { + url += "&status=" + status + } + + req, err := runtime.NewRequest(ctx, http.MethodGet, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var result OptimizeListResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} + +// CancelOptimize cancels a running optimization job. +func (c *OptimizeClient) CancelOptimize( + ctx context.Context, + operationID string, +) (*OptimizeCancelResponse, error) { + url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=v1", c.endpoint, operationID) + + req, err := runtime.NewRequest(ctx, http.MethodPost, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var result OptimizeCancelResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &result, nil +} + +// ReportDeployment notifies the optimization service that a candidate has been +// deployed. This allows FAOS to track which candidates have been deployed. +func (c *OptimizeClient) ReportDeployment( + ctx context.Context, + report *DeploymentReport, +) error { + url := fmt.Sprintf( + "%s/optimize/candidates/%s/deployments?api-version=v1", + c.endpoint, report.CandidateID, + ) + + payload, err := json.Marshal(report) + if err != nil { + return fmt.Errorf("failed to marshal deployment report: %w", err) + } + + req, err := runtime.NewRequest(ctx, http.MethodPost, url) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + if err := req.SetBody( + streaming.NopCloser(bytes.NewReader(payload)), "application/json", + ); err != nil { + return fmt.Errorf("failed to set request body: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusNoContent) { + return runtime.NewResponseError(resp) + } + + return nil +} + +// GetCandidateConfig fetches the candidate configuration from the optimization service. +// GET /optimize/candidates/{id}/config +func (c *OptimizeClient) GetCandidateConfig( + ctx context.Context, + candidateID string, +) (any, error) { + url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=v1", c.endpoint, candidateID) + + req, err := runtime.NewRequest(ctx, http.MethodGet, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var config any + if err := json.Unmarshal(body, &config); err != nil { + return nil, fmt.Errorf("failed to parse candidate config: %w", err) + } + return config, nil +} + +// GetCandidate fetches the candidate manifest (metadata + file list) from FAOS. +// GET /optimize/candidates/{id} +func (c *OptimizeClient) GetCandidate( + ctx context.Context, + candidateID string, +) (*CandidateManifest, error) { + url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=v1", c.endpoint, candidateID) + + req, err := runtime.NewRequest(ctx, http.MethodGet, url) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return nil, runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var manifest CandidateManifest + if err := json.Unmarshal(body, &manifest); err != nil { + return nil, fmt.Errorf("failed to parse candidate manifest: %w", err) + } + return &manifest, nil +} + +// GetCandidateFile downloads a single file from a candidate. +// GET /optimize/candidates/{id}/files?path={path} +func (c *OptimizeClient) GetCandidateFile( + ctx context.Context, + candidateID string, + filePath string, +) (string, error) { + url := fmt.Sprintf("%s/optimize/candidates/%s/files?api-version=v1&path=%s", + c.endpoint, candidateID, netURL.QueryEscape(filePath)) + + req, err := runtime.NewRequest(ctx, http.MethodGet, url) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.pipeline.Do(req) + if err != nil { + return "", fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if !runtime.HasStatusCode(resp, http.StatusOK) { + return "", runtime.NewResponseError(resp) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response body: %w", err) + } + + return string(body), nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go new file mode 100644 index 00000000000..2396468a4bf --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go @@ -0,0 +1,267 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// newTestClient builds an OptimizeClient that talks to the given httptest server +// with no auth (bare pipeline). +func newTestClient(serverURL string) *OptimizeClient { + pipeline := runtime.NewPipeline( + "test", + "v0.0.0", + runtime.PipelineOptions{}, + &policy.ClientOptions{}, + ) + return &OptimizeClient{ + endpoint: serverURL, + pipeline: pipeline, + } +} + +// stubCredential satisfies azcore.TokenCredential for constructor tests. +type stubCredential struct{} + +func (stubCredential) GetToken(_ context.Context, _ policy.TokenRequestOptions) (azcore.AccessToken, error) { + return azcore.AccessToken{Token: "stub"}, nil +} + +func TestNewOptimizeClient(t *testing.T) { + t.Parallel() + client := NewOptimizeClient("https://example.com", stubCredential{}) + require.NotNil(t, client) + assert.Equal(t, "https://example.com", client.endpoint) +} + +func TestStartOptimize(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodPost, r.Method) + assert.True(t, strings.HasSuffix(r.URL.Path, "/optimize")) + assert.Contains(t, r.URL.RawQuery, "api-version=v1") + + w.WriteHeader(http.StatusAccepted) + _ = json.NewEncoder(w).Encode(OptimizeResponse{ + OperationID: "op-abc", + Status: StatusQueued, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.StartOptimize(context.Background(), &OptimizeRequest{ + Agent: AgentDefinition{ + FoundryProjectURL: "https://example.com/proj", + AgentName: "agent-1", + }, + Options: OptimizeOptions{EvalModel: "gpt-4o-mini"}, + }) + + require.NoError(t, err) + assert.Equal(t, "op-abc", resp.OperationID) + assert.Equal(t, StatusQueued, resp.Status) +} + +func TestGetOptimizeStatus(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Contains(t, r.URL.Path, "/optimize/op-123") + assert.Contains(t, r.URL.RawQuery, "api-version=v1") + + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-123", + Status: StatusCompleted, + CreatedAt: "2024-01-01T00:00:00Z", + UpdatedAt: "2024-01-01T01:00:00Z", + Best: &CandidateResult{ + Name: "candidate-1", + AvgScore: 0.92, + PassRate: 0.95, + }, + Baseline: &CandidateResult{ + Name: "baseline", + AvgScore: 0.6, + }, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + status, err := client.GetOptimizeStatus(context.Background(), "op-123") + + require.NoError(t, err) + assert.Equal(t, "op-123", status.OperationID) + assert.Equal(t, StatusCompleted, status.Status) + require.NotNil(t, status.Best) + assert.InDelta(t, 0.92, status.Best.AvgScore, 0.001) + require.NotNil(t, status.Baseline) + assert.InDelta(t, 0.6, status.Baseline.AvgScore, 0.001) +} + +func TestListOptimizeJobs(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Contains(t, r.URL.RawQuery, "limit=10") + assert.Contains(t, r.URL.RawQuery, "status=running") + assert.Contains(t, r.URL.RawQuery, "api-version=v1") + + _ = json.NewEncoder(w).Encode(OptimizeListResponse{ + Data: []OptimizeJobStatus{ + {OperationID: "op-1", Status: StatusRunning}, + {OperationID: "op-2", Status: StatusRunning}, + }, + FirstID: "op-1", + LastID: "op-2", + HasMore: false, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.ListOptimizeJobs(context.Background(), 10, "running") + + require.NoError(t, err) + assert.Len(t, resp.Data, 2) + assert.Equal(t, "op-1", resp.FirstID) + assert.Equal(t, "op-2", resp.LastID) + assert.False(t, resp.HasMore) +} + +func TestCancelOptimize(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodPost, r.Method) + assert.Contains(t, r.URL.Path, "/optimize/op-xyz/cancel") + assert.Contains(t, r.URL.RawQuery, "api-version=v1") + + _ = json.NewEncoder(w).Encode(OptimizeCancelResponse{ + OperationID: "op-xyz", + Status: StatusCancelled, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.CancelOptimize(context.Background(), "op-xyz") + + require.NoError(t, err) + assert.Equal(t, "op-xyz", resp.OperationID) + assert.Equal(t, StatusCancelled, resp.Status) +} + +func TestStartOptimize_HTTPError(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error": {"code": "BadRequest", "message": "invalid payload"}}`)) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.StartOptimize(context.Background(), &OptimizeRequest{}) + + assert.Nil(t, resp) + require.Error(t, err) + assert.Contains(t, err.Error(), "400") +} + +func TestGetOptimizeStatus_HTTPError(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error": {"code": "NotFound", "message": "job not found"}}`)) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.GetOptimizeStatus(context.Background(), "nonexistent") + + assert.Nil(t, resp) + require.Error(t, err) + assert.Contains(t, err.Error(), "404") +} + +func TestListOptimizeJobs_NoStatusFilter(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.NotContains(t, r.URL.RawQuery, "status=") + _ = json.NewEncoder(w).Encode(OptimizeListResponse{ + Data: []OptimizeJobStatus{}, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + resp, err := client.ListOptimizeJobs(context.Background(), 20, "") + + require.NoError(t, err) + assert.Empty(t, resp.Data) +} + +func TestReportDeployment(t *testing.T) { + t.Parallel() + + var capturedBody DeploymentReport + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodPost, r.Method) + assert.Contains(t, r.URL.Path, "/optimize/candidates/cand-42/deployments") + assert.Contains(t, r.URL.RawQuery, "api-version=v1") + + err := json.NewDecoder(r.Body).Decode(&capturedBody) + assert.NoError(t, err) + + w.WriteHeader(http.StatusCreated) + })) + defer server.Close() + + client := newTestClient(server.URL) + err := client.ReportDeployment(t.Context(), &DeploymentReport{ + CandidateID: "cand-42", + ProjectEndpoint: "https://proj.endpoint", + }) + + require.NoError(t, err) + assert.Equal(t, "cand-42", capturedBody.CandidateID) + assert.Equal(t, "https://proj.endpoint", capturedBody.ProjectEndpoint) +} + +func TestReportDeployment_HTTPError(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":{"code":"BadRequest","message":"invalid candidate"}}`)) + })) + defer server.Close() + + client := newTestClient(server.URL) + err := client.ReportDeployment(t.Context(), &DeploymentReport{ + CandidateID: "bad-id", + }) + + require.Error(t, err) + assert.Contains(t, err.Error(), "400") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go new file mode 100644 index 00000000000..9aa22338094 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import "encoding/json" + +// Optimization job status constants. +const ( + StatusPending = "pending" + StatusRunning = "running" + StatusCompleted = "completed" + StatusFailed = "failed" + StatusCancelled = "cancelled" + + // StatusQueued is a deprecated alias for StatusPending. + // The API returns "pending", not "queued". + StatusQueued = StatusPending +) + +// IsTerminal returns true if the status represents a terminal state. +func IsTerminal(status string) bool { + switch status { + case StatusCompleted, StatusFailed, StatusCancelled: + return true + default: + return false + } +} + +// --- Request models --- + +// OptimizeRequest is the top-level payload sent to POST /optimize. +type OptimizeRequest struct { + Agent AgentDefinition `json:"agent"` + Dataset []DatasetTask `json:"dataset,omitempty"` + TrainDatasetReference *DatasetReference `json:"trainDatasetReference,omitempty"` + ValidationDatasetReference *DatasetReference `json:"validationDatasetReference,omitempty"` + Evaluators []string `json:"evaluators,omitempty"` + Criteria []Criterion `json:"criteria,omitempty"` + Options OptimizeOptions `json:"options"` +} + +// AgentDefinition identifies the agent to optimize. +type AgentDefinition struct { + FoundryProjectURL string `json:"foundryProjectUrl"` + AgentName string `json:"agentName"` + AgentVersion string `json:"agentVersion,omitempty"` + Model string `json:"model,omitempty"` + SystemPrompt string `json:"systemPrompt,omitempty"` + Skills []SkillDefinition `json:"skills,omitempty"` +} + +// SkillDefinition describes a skill attached to an agent. +type SkillDefinition struct { + Name string `json:"name"` + Description string `json:"description"` +} + +// DatasetTask is a single task in an inline dataset. +type DatasetTask struct { + Name string `json:"name,omitempty"` + Query string `json:"query,omitempty"` + Prompt string `json:"prompt"` + GroundTruth string `json:"groundTruth,omitempty"` + Criteria []Criterion `json:"criteria,omitempty"` +} + +// DatasetReference points to a registered dataset by name and version. +type DatasetReference struct { + Name string `json:"name"` + Version string `json:"version"` +} + +// Criterion is a named evaluation criterion. +type Criterion struct { + Name string `json:"name"` + Instruction string `json:"instruction"` +} + +// OptimizeOptions controls the optimization run. +type OptimizeOptions struct { + Budget int `json:"budget,omitempty"` + MaxIterations int `json:"maxIterations,omitempty"` + MinImprovement float64 `json:"minImprovement,omitempty"` + ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` + PassThreshold float64 `json:"passThreshold,omitempty"` + EvalModel string `json:"evalModel"` + Strategies []string `json:"strategies,omitempty"` + KeepVersions bool `json:"keepVersions,omitempty"` + TasksPerIteration int `json:"tasksPerIteration,omitempty"` + MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` + ReflectionModel string `json:"reflectionModel,omitempty"` + Mode string `json:"mode,omitempty"` +} + +// --- Response models --- + +// OptimizeResponse is the immediate response from POST /optimize. +type OptimizeResponse struct { + OperationID string `json:"operationId"` + Status string `json:"status"` +} + +// OptimizeJobStatus is the full status of an optimization job. +type OptimizeJobStatus struct { + OperationID string `json:"operationId"` + Status string `json:"status"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` + Agent *AgentDefinition `json:"agent,omitempty"` + Progress *JobProgress `json:"progress,omitempty"` + Error *JobError `json:"error,omitempty"` + Baseline *CandidateResult `json:"baseline,omitempty"` + Best *CandidateResult `json:"best,omitempty"` + Candidates []CandidateResult `json:"candidates,omitempty"` + AllStrategiesFailed bool `json:"allStrategiesFailed,omitempty"` + Warnings []string `json:"warnings,omitempty"` +} + +// JobProgress reports iteration-level progress. +type JobProgress struct { + CurrentStrategy string `json:"currentStrategy"` + CurrentIteration int `json:"currentIteration"` + TasksCompleted int `json:"tasksCompleted"` + TasksTotal int `json:"tasksTotal"` + BestScore float64 `json:"bestScore"` + ElapsedSeconds float64 `json:"elapsedSeconds"` +} + +// JobError captures an error from a failed job. +// The API sometimes returns a string and sometimes an object — this handles both. +type JobError struct { + Code string `json:"code"` + Message string `json:"message"` +} + +func (e *JobError) UnmarshalJSON(data []byte) error { + // Try as string first + var s string + if err := json.Unmarshal(data, &s); err == nil { + e.Message = s + return nil + } + // Try as object + type alias JobError + var a alias + if err := json.Unmarshal(data, &a); err != nil { + return err + } + *e = JobError(a) + return nil +} + +// CandidateResult holds the evaluation result for a single candidate. +type CandidateResult struct { + Name string `json:"name"` + AvgScore float64 `json:"avgScore"` + AvgTokens float64 `json:"avgTokens"` + PassRate float64 `json:"passRate"` + Mutations map[string]any `json:"mutations,omitempty"` + Rationale string `json:"rationale,omitempty"` + CandidateID string `json:"candidateId,omitempty"` + TaskScores []TaskScore `json:"taskScores,omitempty"` +} + +// TaskScore captures per-task evaluation metrics. +type TaskScore struct { + TaskName string `json:"taskName"` + Scores map[string]float64 `json:"scores"` + CompositeScore float64 `json:"compositeScore"` + Tokens int `json:"tokens"` + Duration float64 `json:"durationSeconds"` + Passed bool `json:"passed"` +} + +// --- List response --- + +// OptimizeListResponse is the paginated list of optimization jobs. +type OptimizeListResponse struct { + Data []OptimizeJobStatus `json:"data"` + FirstID string `json:"firstId"` + LastID string `json:"lastId"` + HasMore bool `json:"hasMore"` +} + +// --- Cancel response --- + +// OptimizeCancelResponse is returned when cancelling an optimization job. +type OptimizeCancelResponse struct { + OperationID string `json:"operationId"` + Status string `json:"status"` +} + +// --- Deployment report --- + +// DeploymentReport is sent to FAOS after a candidate is deployed, +// creating the candidate→deployment mapping. +type DeploymentReport struct { + CandidateID string `json:"candidateId"` + ProjectEndpoint string `json:"projectEndpoint,omitempty"` +} + +// --- Candidate models --- + +// CandidateManifest represents the candidate metadata returned by +// GET /optimize/candidates/{id}. +type CandidateManifest struct { + Files []CandidateFile `json:"files"` +} + +// CandidateFile is a single entry in the candidate manifest's files list. +type CandidateFile struct { + Path string `json:"path"` + Type string `json:"type"` +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go new file mode 100644 index 00000000000..57884b0bbf7 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -0,0 +1,237 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOptimizeRequest_RoundTrip(t *testing.T) { + t.Parallel() + + original := OptimizeRequest{ + Agent: AgentDefinition{ + FoundryProjectURL: "https://example.ai.azure.com/project/my-proj", + AgentName: "my-agent", + AgentVersion: "1", + Model: "gpt-4o", + SystemPrompt: "You are helpful", + Skills: []SkillDefinition{ + {Name: "search", Description: "web search"}, + }, + }, + Dataset: []DatasetTask{ + { + Name: "task1", + Prompt: "What is 2+2?", + GroundTruth: "4", + Criteria: []Criterion{ + {Name: "accuracy", Instruction: "answer must be correct"}, + }, + }, + }, + TrainDatasetReference: &DatasetReference{ + Name: "train-ds", + Version: "1", + }, + Evaluators: []string{"coherence", "relevance"}, + Criteria: []Criterion{ + {Name: "global-crit", Instruction: "be concise"}, + }, + Options: OptimizeOptions{ + Budget: 100, + MaxIterations: 5, + MinImprovement: 0.01, + ImprovementThreshold: 0.05, + PassThreshold: 0.8, + EvalModel: "gpt-4o-mini", + Strategies: []string{"prompt_mutation"}, + KeepVersions: true, + TasksPerIteration: 10, + MaxReflectionTasks: 3, + ReflectionModel: "gpt-4o", + Mode: "full", + }, + } + + data, err := json.Marshal(original) + require.NoError(t, err, "marshal should succeed") + + s := string(data) + // Verify camelCase JSON tags + for _, field := range []string{ + `"agent"`, `"foundryProjectUrl"`, `"agentName"`, `"agentVersion"`, + `"dataset"`, `"trainDatasetReference"`, `"evaluators"`, `"criteria"`, + `"options"`, `"evalModel"`, `"maxIterations"`, `"minImprovement"`, + `"improvementThreshold"`, `"passThreshold"`, `"keepVersions"`, + `"tasksPerIteration"`, `"maxReflectionTasks"`, `"reflectionModel"`, + `"groundTruth"`, `"systemPrompt"`, `"skills"`, + } { + assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) + } + + var got OptimizeRequest + require.NoError(t, json.Unmarshal(data, &got), "unmarshal should succeed") + + assert.Equal(t, original.Agent.AgentName, got.Agent.AgentName) + assert.Equal(t, original.Agent.FoundryProjectURL, got.Agent.FoundryProjectURL) + assert.Equal(t, original.Agent.Model, got.Agent.Model) + assert.Len(t, got.Dataset, 1) + assert.Equal(t, "task1", got.Dataset[0].Name) + assert.Equal(t, "4", got.Dataset[0].GroundTruth) + assert.NotNil(t, got.TrainDatasetReference) + assert.Equal(t, "train-ds", got.TrainDatasetReference.Name) + assert.Equal(t, 100, got.Options.Budget) + assert.Equal(t, "gpt-4o-mini", got.Options.EvalModel) + assert.True(t, got.Options.KeepVersions) + assert.Equal(t, "full", got.Options.Mode) +} + +func TestOptimizeJobStatus_RoundTrip(t *testing.T) { + t.Parallel() + + original := OptimizeJobStatus{ + OperationID: "op-123", + Status: StatusRunning, + CreatedAt: "2024-01-01T00:00:00Z", + UpdatedAt: "2024-01-01T01:00:00Z", + Agent: &AgentDefinition{ + FoundryProjectURL: "https://example.ai.azure.com/project/p", + AgentName: "agent-1", + }, + Progress: &JobProgress{ + CurrentStrategy: "prompt_mutation", + CurrentIteration: 3, + TasksCompleted: 15, + TasksTotal: 20, + BestScore: 0.85, + ElapsedSeconds: 120.5, + }, + Baseline: &CandidateResult{ + Name: "baseline", + AvgScore: 0.6, + PassRate: 0.5, + }, + Best: &CandidateResult{ + Name: "candidate-2", + AvgScore: 0.9, + AvgTokens: 150.0, + PassRate: 0.95, + CandidateID: "cand-2", + Mutations: map[string]any{"systemPrompt": "Be very helpful"}, + Rationale: "Improved prompt clarity", + TaskScores: []TaskScore{ + { + TaskName: "task1", + Scores: map[string]float64{"coherence": 0.9, "relevance": 0.95}, + CompositeScore: 0.925, + Tokens: 200, + Duration: 1.5, + Passed: true, + }, + }, + }, + Candidates: []CandidateResult{ + {Name: "candidate-1", AvgScore: 0.7}, + }, + } + + data, err := json.Marshal(original) + require.NoError(t, err, "marshal should succeed") + + s := string(data) + for _, field := range []string{ + `"operationId"`, `"status"`, `"createdAt"`, `"updatedAt"`, + `"progress"`, `"currentStrategy"`, `"currentIteration"`, + `"tasksCompleted"`, `"tasksTotal"`, `"bestScore"`, `"elapsedSeconds"`, + `"baseline"`, `"best"`, `"candidates"`, `"candidateId"`, + `"avgScore"`, `"avgTokens"`, `"passRate"`, `"mutations"`, + `"rationale"`, `"taskScores"`, `"compositeScore"`, `"durationSeconds"`, + } { + assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) + } + + var got OptimizeJobStatus + require.NoError(t, json.Unmarshal(data, &got), "unmarshal should succeed") + + assert.Equal(t, "op-123", got.OperationID) + assert.Equal(t, StatusRunning, got.Status) + assert.NotNil(t, got.Agent) + assert.Equal(t, "agent-1", got.Agent.AgentName) + assert.NotNil(t, got.Progress) + assert.Equal(t, 3, got.Progress.CurrentIteration) + assert.InDelta(t, 0.85, got.Progress.BestScore, 0.001) + assert.NotNil(t, got.Baseline) + assert.InDelta(t, 0.6, got.Baseline.AvgScore, 0.001) + assert.NotNil(t, got.Best) + assert.Equal(t, "cand-2", got.Best.CandidateID) + assert.Len(t, got.Best.TaskScores, 1) + assert.True(t, got.Best.TaskScores[0].Passed) + assert.Len(t, got.Candidates, 1) +} + +func TestOptimizeJobStatus_ErrorField(t *testing.T) { + t.Parallel() + + original := OptimizeJobStatus{ + OperationID: "op-err", + Status: StatusFailed, + Error: &JobError{ + Code: "InternalError", + Message: "something went wrong", + }, + } + + data, err := json.Marshal(original) + require.NoError(t, err) + + var got OptimizeJobStatus + require.NoError(t, json.Unmarshal(data, &got)) + + assert.Equal(t, StatusFailed, got.Status) + require.NotNil(t, got.Error) + assert.Equal(t, "InternalError", got.Error.Code) + assert.Equal(t, "something went wrong", got.Error.Message) +} + +func TestIsTerminal(t *testing.T) { + t.Parallel() + + assert.True(t, IsTerminal(StatusCompleted)) + assert.True(t, IsTerminal(StatusFailed)) + assert.True(t, IsTerminal(StatusCancelled)) + assert.False(t, IsTerminal(StatusRunning)) + assert.False(t, IsTerminal(StatusQueued)) + assert.False(t, IsTerminal("unknown")) +} + +func TestOptimizeListResponse_RoundTrip(t *testing.T) { + t.Parallel() + + original := OptimizeListResponse{ + Data: []OptimizeJobStatus{ + {OperationID: "op-1", Status: StatusCompleted}, + {OperationID: "op-2", Status: StatusRunning}, + }, + FirstID: "op-1", + LastID: "op-2", + HasMore: true, + } + + data, err := json.Marshal(original) + require.NoError(t, err) + + var got OptimizeListResponse + require.NoError(t, json.Unmarshal(data, &got)) + + assert.Len(t, got.Data, 2) + assert.Equal(t, "op-1", got.FirstID) + assert.Equal(t, "op-2", got.LastID) + assert.True(t, got.HasMore) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go new file mode 100644 index 00000000000..36fbf030042 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import ( + "context" + "fmt" + "time" +) + +// Poller polls an optimization job until it reaches a terminal state. +type Poller struct { + Client *OptimizeClient + OperationID string + Interval time.Duration + OnProgress func(*OptimizeJobStatus) +} + +// PollUntilDone polls GetOptimizeStatus at the configured interval until the +// job reaches a terminal state (completed, failed, cancelled) or the context +// is cancelled. +func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) { + ticker := time.NewTicker(p.Interval) + defer ticker.Stop() + + for { + status, err := p.Client.GetOptimizeStatus(ctx, p.OperationID) + if err != nil { + return nil, fmt.Errorf("failed to get optimization status: %w", err) + } + + if p.OnProgress != nil { + p.OnProgress(status) + } + + if IsTerminal(status.Status) { + return status, nil + } + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-ticker.C: + // continue polling + } + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go new file mode 100644 index 00000000000..35dce98a62b --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go @@ -0,0 +1,161 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package optimize_api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newPollerTestClient(serverURL string) *OptimizeClient { + pipeline := runtime.NewPipeline( + "test", + "v0.0.0", + runtime.PipelineOptions{}, + &policy.ClientOptions{}, + ) + return &OptimizeClient{ + endpoint: serverURL, + pipeline: pipeline, + } +} + +func TestPoller_PollsUntilCompleted(t *testing.T) { + t.Parallel() + + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + n := atomic.AddInt32(&callCount, 1) + status := StatusRunning + if n >= 3 { + status = StatusCompleted + } + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-1", + Status: status, + Progress: &JobProgress{ + CurrentIteration: int(n), + }, + }) + })) + defer server.Close() + + var progressCalls int32 + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-1", + Interval: 10 * time.Millisecond, + OnProgress: func(_ *OptimizeJobStatus) { + atomic.AddInt32(&progressCalls, 1) + }, + } + + result, err := poller.PollUntilDone(context.Background()) + require.NoError(t, err) + assert.Equal(t, StatusCompleted, result.Status) + assert.GreaterOrEqual(t, atomic.LoadInt32(&callCount), int32(3)) + assert.GreaterOrEqual(t, atomic.LoadInt32(&progressCalls), int32(3)) +} + +func TestPoller_PollsUntilFailed(t *testing.T) { + t.Parallel() + + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + n := atomic.AddInt32(&callCount, 1) + status := StatusRunning + if n >= 2 { + status = StatusFailed + } + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-fail", + Status: status, + Error: &JobError{ + Code: "InternalError", + Message: "something broke", + }, + }) + })) + defer server.Close() + + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-fail", + Interval: 10 * time.Millisecond, + } + + result, err := poller.PollUntilDone(context.Background()) + require.NoError(t, err) + assert.Equal(t, StatusFailed, result.Status) +} + +func TestPoller_ContextCancellation(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-cancel", + Status: StatusRunning, + }) + })) + defer server.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-cancel", + Interval: 10 * time.Millisecond, + } + + result, err := poller.PollUntilDone(ctx) + assert.Nil(t, result) + require.Error(t, err) + assert.ErrorIs(t, err, context.DeadlineExceeded) +} + +func TestPoller_OnProgressCalled(t *testing.T) { + t.Parallel() + + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + n := atomic.AddInt32(&callCount, 1) + status := StatusRunning + if n >= 2 { + status = StatusCompleted + } + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-prog", + Status: status, + }) + })) + defer server.Close() + + var statuses []string + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-prog", + Interval: 10 * time.Millisecond, + OnProgress: func(s *OptimizeJobStatus) { + statuses = append(statuses, s.Status) + }, + } + + result, err := poller.PollUntilDone(context.Background()) + require.NoError(t, err) + assert.Equal(t, StatusCompleted, result.Status) + assert.GreaterOrEqual(t, len(statuses), 2) + assert.Equal(t, StatusCompleted, statuses[len(statuses)-1]) +} From 0efde014df4b0775212973eb68687ceebf78de3b Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 09:53:16 -0700 Subject: [PATCH 02/33] more bugbash --- .../azd_observability_bugbash.md | 745 ++++++++++++++++++ .../azure.ai.agents/internal/cmd/eval.go | 2 +- .../azure.ai.agents/internal/cmd/eval_init.go | 7 +- .../internal/cmd/eval_init_jobs.go | 7 +- .../internal/cmd/eval_init_prompts.go | 67 +- .../azure.ai.agents/internal/cmd/eval_run.go | 19 +- .../azure.ai.agents/internal/cmd/optimize.go | 20 +- .../internal/cmd/optimize_status.go | 4 +- .../pkg/agents/eval_api/eval_config.go | 2 +- 9 files changed, 819 insertions(+), 54 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index e69de29bb2d..331e933fb38 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -0,0 +1,745 @@ +# azd ai agent eval + optimize — BugBash + +> **TiP regions** Currently, some dependent APIs are only available in this region for now + +## 1. Install the extension + +Prerequisites: [azd CLI](https://aka.ms/azd), [Go](https://go.dev/dl/), `az login` + +```bash +azd ext install microsoft.azd.extensions +git clone https://github.com/Zyysurely/azure-dev.git +cd azure-dev/cli/azd/extensions/azure.ai.agents +git checkout zyying/opt_eval +azd x build +``` + +After building, register the extension and overlay the custom binary: + +```powershell +# Windows (PowerShell) +azd ext install azure.ai.agents +copy bin\azure-ai-agents-windows-amd64.exe $env:USERPROFILE\.azd\extensions\azure.ai.agents\ -Force +``` + +```bash +# macOS / Linux +azd ext install azure.ai.agents +cp bin/azure-ai-agents-$(uname -s | tr A-Z a-z)-* ~/.azd/extensions/azure.ai.agents/ +``` + +Verify:`azd ai agent eval --help` and `azd ai agent optimize --help` + +## 2. Ensure the access to the bugbash project + +https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/users +Please activate `Foundry User` and `Owner` access + + +## 3. Create an optimization-ready agent + +Navigate to a fresh directory outside the extension repo, init the agent and point to our bugbash project, if you already have an azd project with TiP foundry account, you can continue to use it. + +```bash +mkdir bugbash-azd- && cd bugbash-azd- +azd init -t https://github.com/zyysurely/sample_agent . +azd ai agent init --project-id /subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/providers/Microsoft.CognitiveServices/accounts/azd-bugbash-0514/projects/bugbash-westus2 +# Customize your agent name and model deployment +``` + +The template includes `agent_optimization/` — a small package that reads config +injected by the optimization service at runtime. Your agent calls `load_config()` at startup: + +```python +from agent_optimization import load_config + +config = load_config( + default_instructions="You are a helpful assistant.", + default_model="gpt-4.1-mini", +) +``` + +## 4. Test locally [You can skip since the current sample agent code has been verified] + +```bash +azd ai agent run +# In another terminal: +azd ai agent invoke --local "Hello!" +``` + +## 5. Deploy hosted agent + +Point to an existing Foundry project and deploy (no `azd provision` needed): + +```bash +# Windows (PowerShell) +azd deploy +``` + + +Verify: `azd ai agent invoke "Hello!"` + +> **If you have Owner permissions** and want fresh resources: run `azd provision` before `azd deploy`. + + +## 6. E2E Hero Scenario (inside an azd project with a hosted agent) + +All commands below auto-detect the agent context from the current azd environment. +Run them from your deployed azd project directory. + +### 6a. Initialize an eval suite + +> **Note:** The dataset generation API is not yet available. Use the sample `data.jsonl` included in the template. + +```bash +azd ai agent eval init --dataset ./data.jsonl +``` + +The command resolves your agent from `azure.yaml` and prompts interactively: + +``` +Resolving eval context... + Reading project configuration... + Detecting agent service... + Resolving Foundry project endpoint... + +Detected eval target: + (✓) Service: sample-agent (azure.yaml) + (✓) Agent: sample-agent (AGENT_SAMPLE_AGENT_NAME) + (✓) Version: 1 (AGENT_SAMPLE_AGENT_VERSION) + (✓) Kind: hosted (agent.yaml) + (✓) Endpoint: https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 + (✓) Project: D:\optimization\bugbash-agent-zyying + Eval config: D:\optimization\bugbash-agent-zyying\eval.yaml + +? Eval suite name: smoke-core-zyying +? How would you like to provide the generation instruction?: Type inline +? Describe what this agent does and what scenarios to test: test agent +? Select the model for evaluation and generation: Select another deployment +? Select a model deployment: gpt-4o (gpt-4o) +? Max samples: 100 +\ Evaluator generation... (✓) Done Evaluator generation (1m16s) + + Artifacts: D:\optimization\bugbash-agent-zyying\.azure\.foundry + evaluators/smoke-core-zyying-35368f67.json +Eval suite created + Config: D:\optimization\bugbash-agent-zyying\eval.yaml + Dataset: D:\optimization\bugbash-agent-zyying\data.jsonl + Evaluator: smoke-core-zyying-35368f67 + + Review the generated assets, then run: + azd ai agent eval run +``` + +### 6b. Run an eval + +```bash +azd ai agent eval run +``` + +Reads `eval.yaml`, creates the eval on the Foundry backend, and submits a run against your deployed agent. + +### 6c. Browse eval results + +```bash +# List all evals (table with status, run count, created date) +azd ai agent eval list + +# Show details for the most recent eval (auto-resolved from azd env) +azd ai agent eval show + +# Export results to JSON for offline analysis +azd ai agent eval show -O results.json +``` + +### 6d. Optimize the agent + +After the eval suite is ready, run optimize. It auto-detects the `eval.yaml` you just created. + +```bash +azd ai agent optimize +# → Prompts: "Found eval.yaml in project. Use it for optimization?" +# Select Yes to use your eval config, or No to use the built-in dataset. +``` + +Expected output (takes ~5–20 minutes): + +``` +Optimizing agent "sample-agent"... + Config: D:\optimization\bugbash-agent-zyying\eval.yaml + Job ID: opt_f74131d58c774ebba1765fae1005a9f8 + ⠦ completed · strategy: gepa · iteration 1 · score: 0.95 · 3m0s + +Results: + Candidate Score Pass Tokens + ──────────────────── ─────── ─────── ──────── + baseline 0.73 100% 430 + baseline_instr_v2 0.77 100% 1180 + baseline_instr_v3 0.85 100% 1204 + baseline_instr_v1 ★ 0.92 100% 1063 + + Candidate IDs: + baseline_instr_v2 cand_445fe8e68e224d6d94cbb37b022945eb + baseline_instr_v3 cand_51b87d7ce10b43ba801776483a9b5506 + ★ baseline_instr_v1 cand_6b5c23ed295f4f4e9be87b7fdb3809b0 + + Deploy the best candidate: + azd ai agent optimize deploy --candidate cand_6b5c23ed295f4f4e9be87b7fdb3809b0 +``` + +The ★ marks the best candidate. Copy the deploy command from the output to promote it. + +### 6e. Monitor optimization jobs + +```bash +# Watch a running job in real-time +azd ai agent optimize status --watch + +# List all optimization runs +azd ai agent optimize list + +# Cancel a running job +azd ai agent optimize cancel +``` + +### 6f. Deploy the winning candidate + +The optimize output includes a ready-to-use deploy command: + +```bash +azd ai agent optimize deploy --candidate +``` + +This creates a new agent version with `OPTIMIZATION_CONFIG` set to the candidate's +config (instructions, model, temperature). The agent SDK's `load_config()` reads this +at startup and applies the optimized settings. + +### 6g. Verify the optimized agent + +```bash +azd ai agent invoke "Hello!" +# Expected: agent responds using the optimized configuration +``` + +--- + +## Comprehensive Test Scenarios + +### A. `azd ai agent eval init` + +#### Inside azd project (cd into your deployed azd project) + +```bash +# A1. Default interactive init — auto-detects agent from azd env +azd ai agent eval init --dataset ./data.jsonl +# Expected: prompts for name, instruction, model, max-samples +# writes eval.yaml + artifacts under .azure/.foundry/ + +# A2. Custom eval suite name +azd ai agent eval init --dataset ./data.jsonl --name my-custom-suite +# Expected: config name = "my-custom-suite-" (random suffix appended) + +# A3. Inline gen-instruction (skip prompt) +azd ai agent eval init --dataset ./data.jsonl -g "Test the agent's ability to handle refund requests" +# Expected: uses inline instruction, skips instruction prompt + +# A4. Gen-instruction from file +echo "Test customer support scenarios" > /tmp/instruction.txt +azd ai agent eval init --dataset ./data.jsonl -G /tmp/instruction.txt +# Expected: reads instruction from file + +# A5. Custom eval model +azd ai agent eval init --dataset ./data.jsonl --eval-model gpt-4o +# Expected: uses gpt-4o instead of deployed model default + +# A6. Custom evaluators +azd ai agent eval init --dataset ./data.jsonl --evaluator builtin.task_adherence --evaluator custom_eval +# Expected: eval.yaml has both evaluators listed + +# A7. Custom output path +azd ai agent eval init --dataset ./data.jsonl -O my-eval.yaml +# Expected: writes to my-eval.yaml instead of eval.yaml + +# A8. --no-wait mode +azd ai agent eval init --dataset ./data.jsonl --no-wait +# Expected: submits jobs, prints pending op IDs, returns immediately +# eval.yaml has InitStatus: pending + +# A9. Regeneration — eval.yaml already exists +# (run init once first, then run again) +azd ai agent eval init --dataset ./data.jsonl +# Expected: prompts "Existing dataset: ... Do you want to regenerate?" +# and "Existing evaluator: ... Do you want to regenerate?" + +# A10. Reset defaults — overwrite existing config +azd ai agent eval init --dataset ./data.jsonl --reset-defaults +# Expected: overwrites eval.yaml without prompting about existing config + +# A11. Non-interactive mode (no prompts) +$env:AZD_FORCE_TTY = "false" # PowerShell +azd ai agent eval init --dataset ./data.jsonl +# Expected: uses defaults without prompting. Full regeneration if eval.yaml exists. +# Clean up: Remove-Item env:\AZD_FORCE_TTY + +# A12. Multiple agent services in azure.yaml +# (if your project has 2+ azure.ai.agent services) +azd ai agent eval init --dataset ./data.jsonl +# Expected: prompts to select which agent service +``` + +#### Outside azd project (cd to an empty directory) + +```bash +mkdir /tmp/eval-test && cd /tmp/eval-test + +# A13. No agent flag, no project — should fail +azd ai agent eval init --dataset ./data.jsonl +# Expected: ERROR — "failed to get project config (is there an azure.yaml?)" +# or guidance to use --agent / run from azd project + +# A14. Explicit agent + endpoint — works standalone +azd ai agent eval init --dataset ./data.jsonl \ + --agent sample-agent \ + -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 +# Expected: works without azure.yaml; writes eval.yaml in current dir + +# A15. Missing endpoint — should fail with guidance +azd ai agent eval init --dataset ./data.jsonl --agent sample-agent +# Expected: ERROR — "Foundry project context could not be resolved" +# suggests --project-endpoint or azd ai agent init + +# A16. Endpoint via env var +$env:AZURE_AI_PROJECT_ENDPOINT = "https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2" +azd ai agent eval init --dataset ./data.jsonl --agent sample-agent +# Expected: picks up endpoint from env var, works +# Clean up: Remove-Item env:\AZURE_AI_PROJECT_ENDPOINT +``` + +--- + +### B. `azd ai agent eval run` + +#### Inside azd project + +```bash +# B1. Default run (eval.yaml exists from init) +azd ai agent eval run +# Expected: reads eval.yaml from project dir, creates eval, submits run + +# B2. Custom config path +azd ai agent eval run --config my-eval.yaml +# Expected: uses my-eval.yaml instead of eval.yaml + +# B3. Resume pending init +# (if you used --no-wait during init, eval.yaml has pending status) +azd ai agent eval run +# Expected: detects InitStatus: pending, resumes polling, then runs eval +``` + +#### Outside azd project + +```bash +cd /tmp/eval-test # directory with eval.yaml from A14 + +# B4. eval.yaml in cwd, no azd project +azd ai agent eval run +# Expected: falls back to prompt-based endpoint resolution, runs eval + +# B5. No eval.yaml at all +mkdir /tmp/empty-test && cd /tmp/empty-test +azd ai agent eval run +# Expected: ERROR — cannot read eval.yaml +``` + +--- + +### C. `azd ai agent eval list` + +```bash +# C1. Default list (inside or outside project, needs endpoint) +azd ai agent eval list +# Expected: table with columns: Eval ID, Name, Status, Runs, Created by, Created on +# max 10 results, active eval marked with * + +# C2. Custom limit +azd ai agent eval list --limit 3 +# Expected: at most 3 rows + +# C3. No evals exist +# (on a fresh project with no evals) +azd ai agent eval list +# Expected: "no evaluations found" or empty table +``` + +--- + +### D. `azd ai agent eval show` + +```bash +# D1. Show by eval ID +azd ai agent eval show +# Expected: eval definition + recent run history + +# D2. Auto-resolve eval ID (from azd env) +azd ai agent eval show +# Expected: uses last eval ID from environment + +# D3. No eval ID available +# (fresh environment, no prior eval) +azd ai agent eval show +# Expected: ERROR — eval ID required + +# D4. Show specific run details +azd ai agent eval show --eval-run-id +# Expected: per-criteria breakdown, passed/failed/errored counts + +# D5. Export eval + runs to JSON +azd ai agent eval show -O results.json +# Expected: writes {"eval": ..., "runs": [...]} to results.json + +# D6. Export single run to JSON +azd ai agent eval show --eval-run-id -O run.json +# Expected: writes single run result to run.json + +# D7. Custom run limit +azd ai agent eval show --limit 5 +# Expected: at most 5 runs in history +``` + +--- + +### E. `azd ai agent optimize` (main command) + +#### Inside azd project + +```bash +# E1. Default optimize — auto-detect agent +azd ai agent optimize +# Expected: if no eval.yaml → uses built-in dataset (3 tasks, 12 criteria) +# if eval.yaml exists → prompts "Found eval.yaml in project. Use it?" + +# E2. Accept eval.yaml prompt +# (run eval init first, then run optimize, confirm yes) +azd ai agent optimize +# Expected: loads config from eval.yaml. Output: "Config: /eval.yaml" + +# E3. Decline eval.yaml prompt +# (eval.yaml exists, decline the prompt) +azd ai agent optimize +# Expected: falls back to built-in defaults. Output: "Dataset: built-in (3 tasks, 12 criteria)" + +# E4. eval.yaml + --no-prompt +$env:AZD_FORCE_TTY = "false" +azd ai agent optimize +# Expected: skips eval.yaml prompt, uses built-in defaults +# Clean up: Remove-Item env:\AZD_FORCE_TTY + +# E5. Explicit --config overrides eval.yaml detection +azd ai agent optimize --config spec.yaml +# Expected: uses spec.yaml, ignores eval.yaml entirely + +# E6. Positional agent arg +azd ai agent optimize my-agent +# Expected: uses "my-agent" as agent name + +# E7. --agent flag +azd ai agent optimize --agent my-agent +# Expected: uses flag value + +# E8. Custom eval model +azd ai agent optimize --eval-model gpt-4o +# Expected: overrides options.eval_model in config + +# E9. Custom strategy (single) +azd ai agent optimize -s skill +# Expected: uses only skill strategy + +# E10. Custom strategy (multiple) +azd ai agent optimize -s instruction -s skill +# Expected: uses both strategies + +# E11. --no-wait +azd ai agent optimize --no-wait +# Expected: submits job, prints ID, returns immediately + +# E12. Watch polling progress +azd ai agent optimize +# Expected: spinner shows status, strategy, iteration, score, elapsed time +# final results table with ★ best candidate and deploy command +``` + +#### Outside azd project + +```bash +mkdir /tmp/opt-test && cd /tmp/opt-test + +# E13. No agent flag, no project — should fail +azd ai agent optimize +# Expected: ERROR — "agent name is required: use --agent , or run from an azd project after 'azd deploy'" + +# E14. Explicit agent + endpoint +azd ai agent optimize --agent sample-agent \ + -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 +# Expected: works without project. Uses built-in defaults. + +# E15. Explicit agent via env var +$env:AZURE_AI_PROJECT_ENDPOINT = "https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2" +azd ai agent optimize --agent sample-agent +# Expected: resolves endpoint from env var +# Clean up: Remove-Item env:\AZURE_AI_PROJECT_ENDPOINT + +# E16. With config file, no project +azd ai agent optimize --config spec.yaml +# Expected: loads config from file, no project resolution needed +``` + +#### Config validation (can run anywhere with a config file) + +```bash +# E17. Missing agent name in config +# (create spec.yaml with empty agent.name) +azd ai agent optimize --config spec.yaml +# Expected: ERROR — "agent.name is required" + +# E18. Missing eval model +# (config without options.eval_model) +azd ai agent optimize --config spec.yaml +# Expected: ERROR — "options.eval_model is required" + +# E19. No dataset at all +# (config without dataset_file, dataset_reference, or inline) +azd ai agent optimize --config spec.yaml +# Expected: ERROR — "one of dataset_file or dataset_reference is required" + +# E20. Conflicting dataset +# (config with both dataset_file and dataset_reference) +azd ai agent optimize --config spec.yaml +# Expected: ERROR — "dataset_file and dataset_reference are mutually exclusive" + +# E21. Invalid config file path +azd ai agent optimize --config nonexistent.yaml +# Expected: ERROR — file not found + guidance to check path +``` + +--- + +### F. `azd ai agent optimize status` + +```bash +# F1. Status by operation ID +azd ai agent optimize status +# Expected: job summary — ID, Status, Agent, Strategy, Score, Created + +# F2. Auto-resolve from env (after running optimize in project) +azd ai agent optimize status +# Expected: uses OPTIMIZE_LAST_OPERATION_ID from azd env + +# F3. No ID available +# (fresh env, never ran optimize) +azd ai agent optimize status +# Expected: ERROR — operation ID required + +# F4. --watch mode +azd ai agent optimize status --watch +# Expected: polls until job completes, shows spinner + progress + +# F5. Custom poll interval +azd ai agent optimize status --watch --poll-interval 10 +# Expected: polls every 10 seconds instead of 5 + +# F6. Completed job shows candidates +azd ai agent optimize status +# Expected: results table with candidates, scores, deploy command +``` + +--- + +### G. `azd ai agent optimize list` + +```bash +# G1. Default list +azd ai agent optimize list +# Expected: table — ID, Status, Agent, Best Score, Created. Max 20 rows. + +# G2. Filter by status +azd ai agent optimize list --status completed +# Expected: only completed jobs shown + +# G3. Invalid status filter +azd ai agent optimize list --status invalid +# Expected: ERROR — invalid status value + +# G4. Custom limit +azd ai agent optimize list --limit 3 +# Expected: at most 3 entries + +# G5. No jobs exist +# (fresh project endpoint) +azd ai agent optimize list +# Expected: "no optimization jobs found" message +``` + +--- + +### H. `azd ai agent optimize cancel` + +```bash +# H1. Cancel a running job +# (start optimize --no-wait first, then cancel) +azd ai agent optimize --no-wait +azd ai agent optimize cancel +# Expected: job cancelled, shows guidance + +# H2. Cancel already-completed job +azd ai agent optimize cancel +# Expected: ERROR or message — job already in terminal state + +# H3. Missing ID argument +azd ai agent optimize cancel +# Expected: ERROR — requires exactly 1 argument +``` + +--- + +### I. `azd ai agent optimize apply` (inside azd project only) + +```bash +# I1. Apply candidate config to agent.yaml +azd ai agent optimize apply --candidate +# Expected: fetches candidate config, writes OPTIMIZATION_CONFIG and +# OPTIMIZATION_CANDIDATE_ID into agent.yaml env vars. +# Downloads skill files. Prints "azd deploy --service ". +# Verify: cat agent.yaml — should see new env vars appended + +# I2. Auto-detect agent service +azd ai agent optimize apply --candidate +# Expected: resolves agent service from azure.yaml automatically + +# I3. Explicit agent service name +azd ai agent optimize apply --candidate --agent sample-agent +# Expected: uses specified service + +# I4. Missing --candidate flag +azd ai agent optimize apply +# Expected: ERROR — --candidate is required + +# I5. Outside azd project — should fail +cd /tmp/empty-test +azd ai agent optimize apply --candidate +# Expected: ERROR — requires azd project, suggests "optimize deploy" instead +``` + +--- + +### J. `azd ai agent optimize deploy` (API-based, works anywhere) + +```bash +# J1. Deploy candidate via API +azd ai agent optimize deploy --candidate --agent sample-agent +# Expected: creates new agent version with OPTIMIZATION_CONFIG, shows new version number + +# J2. Auto-detect agent inside project +cd +azd ai agent optimize deploy --candidate +# Expected: resolves agent name from project + environment + +# J3. Outside project with explicit agent + endpoint +cd /tmp/empty-test +azd ai agent optimize deploy --candidate --agent sample-agent \ + -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 +# Expected: works without project context + +# J4. Missing --candidate +azd ai agent optimize deploy +# Expected: ERROR — --candidate required + +# J5. Verify deployed version +azd ai agent invoke "Hello!" +# Expected: agent responds using optimized config +``` + +--- + +### K. End-to-end flows + +```bash +# K1. Full eval → optimize → apply → deploy roundtrip +azd ai agent eval init --dataset ./data.jsonl +azd ai agent eval run +azd ai agent eval list +azd ai agent eval show +azd ai agent optimize # accept eval.yaml prompt +azd ai agent optimize apply --candidate +azd deploy --service sample-agent +azd ai agent invoke "Hello!" + +# K2. Optimize-only flow (no eval init) +azd ai agent optimize +azd ai agent optimize status # auto-resolves last job +azd ai agent optimize deploy --candidate +azd ai agent invoke "Hello!" + +# K3. Standalone flow (outside project) +mkdir /tmp/standalone && cd /tmp/standalone +azd ai agent optimize --agent sample-agent --eval-model gpt-4o --project-id +azd ai agent optimize list +azd ai agent optimize status +``` + +--- + +### L. Error & edge cases + +```bash +# L1. Not logged in +azd auth logout +azd ai agent optimize --agent sample-agent +# Expected: authentication error + +# L2. Invalid endpoint +azd ai agent optimize --agent sample-agent -p https://invalid.endpoint.com +# Expected: error with reachability guidance + +# L3. --help for all commands +azd ai agent eval --help +azd ai agent eval init --help +azd ai agent eval run --help +azd ai agent eval list --help +azd ai agent eval show --help +azd ai agent optimize --help +azd ai agent optimize status --help +azd ai agent optimize list --help +azd ai agent optimize cancel --help +azd ai agent optimize apply --help +azd ai agent optimize deploy --help +# Expected: accurate, complete help text for each + +# L4. Eval model not deployed +azd ai agent optimize --eval-model nonexistent-model +# Expected: job runs but all scores may be zero (known issue — no error message) + +# L5. Artifacts directory structure +# (after eval init completes inside project) +ls .azure/.foundry/ +# Expected: datasets/, evaluators/, results/ subdirectories with generated files +``` + +--- + +## Cleanup: Revert to the official extension binary + +After the bugbash, reinstall the released extension to remove the custom binary: + +```powershell +# Windows (PowerShell) +azd ext uninstall azure.ai.agents +azd ext install azure.ai.agents +``` + +```bash +# macOS / Linux +azd ext uninstall azure.ai.agents +azd ext install azure.ai.agents +``` + +This re-downloads the official published binary and removes the custom build overlay. \ No newline at end of file diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index a01b85117c4..5695b14ce58 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -97,7 +97,7 @@ Use eval init to generate an eval config, then eval run to execute it.`, } cmd.AddCommand(newEvalInitCommand(extCtx)) - cmd.AddCommand(newEvalRunCommand()) + cmd.AddCommand(newEvalRunCommand(extCtx)) cmd.AddCommand(newEvalListCommand()) cmd.AddCommand(newEvalShowCommand()) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index ebcf4f6cd05..36e8dd8f8d7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -79,8 +79,7 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Maximum number of samples to generate") cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, "Built-in or custom evaluator name") cmd.Flags().StringVarP(&flags.output, "out-file", "O", defaultEvalConfigName, "Eval config path") - cmd.Flags().IntVar(&flags.traceDays, "trace-days", 0, "Include agent traces from the last N days (0 = no traces)") - _ = cmd.Flags().MarkHidden("trace-days") + cmd.Flags().IntVar(&flags.traceDays, "trace-days", 0, "Include agent traces from the last N days for evaluator generation (0 = no traces)") cmd.Flags().BoolVar(&flags.resetDefaults, "reset-defaults", false, "Overwrite an existing eval config") return cmd @@ -118,6 +117,10 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error isRegenerate := false var builtinEvals []string + if flags.resetDefaults && resolved.envName != "" { + clearEvalState(ctx, resolved.azdClient, resolved.envName) + } + if hasExisting && !flags.resetDefaults { if noPrompt { // --no-prompt: treat as full regeneration. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 081b0f01fb5..3160fc203b2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -61,12 +61,9 @@ func submitDatasetGeneration( resolved *evalResolvedContext, flags *evalInitFlags, ) (*eval_api.GenerationJob, error) { - var traces *eval_api.TraceOptions - if flags.traceDays > 0 { - traces = &eval_api.TraceOptions{Days: flags.traceDays} - } + // Traces are only supported for evaluator generation, not dataset generation. sources := eval_api.BuildGenerationSources( - string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, traces, + string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, nil, ) request := eval_api.NewDataGenerationJobRequest( resolveEvalName(flags), flags.evalModel, flags.maxSamples, sources, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index b49acf5af90..214ef748b24 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -90,40 +90,39 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } } - // TODO: Re-enable trace prompt once trace support is ready. - // // Ask whether to include traces, unless already set via flags. - // if flags.traceDays == 0 && needsGeneration { - // confirmResp, err := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ - // Options: &azdext.ConfirmOptions{ - // Message: "Include agent traces for evaluation?", - // DefaultValue: new(bool), // default false - // }, - // }) - // if err != nil { - // return fmt.Errorf("prompting for trace inclusion: %w", err) - // } - // if confirmResp.GetValue() { - // rangeChoices := []*azdext.SelectChoice{ - // {Label: "Last Day", Value: "1"}, - // {Label: "Last 7 Days", Value: "7"}, - // {Label: "Last 30 Days", Value: "30"}, - // {Label: "Last 90 Days", Value: "90"}, - // } - // defaultRangeIdx := int32(1) // 7 days - // rangeResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ - // Options: &azdext.SelectOptions{ - // Message: "Select trace time range", - // Choices: rangeChoices, - // SelectedIndex: &defaultRangeIdx, - // }, - // }) - // if err != nil { - // return fmt.Errorf("prompting for trace time range: %w", err) - // } - // days, _ := strconv.Atoi(rangeChoices[int(*rangeResp.Value)].Value) - // flags.traceDays = days - // } - // } + // Ask whether to include traces for evaluator generation, unless already set via flags. + if flags.traceDays == 0 && needsGeneration { + confirmResp, err := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "Include agent traces for evaluator generation?", + DefaultValue: new(bool), // default false + }, + }) + if err != nil { + return fmt.Errorf("prompting for trace inclusion: %w", err) + } + if confirmResp.GetValue() { + rangeChoices := []*azdext.SelectChoice{ + {Label: "Last Day", Value: "1"}, + {Label: "Last 7 Days", Value: "7"}, + {Label: "Last 30 Days", Value: "30"}, + {Label: "Last 90 Days", Value: "90"}, + } + defaultRangeIdx := int32(1) // 7 days + rangeResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "Select trace time range", + Choices: rangeChoices, + SelectedIndex: &defaultRangeIdx, + }, + }) + if err != nil { + return fmt.Errorf("prompting for trace time range: %w", err) + } + days, _ := strconv.Atoi(rangeChoices[int(*rangeResp.Value)].Value) + flags.traceDays = days + } + } if !needsGeneration { return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index 298592fff4f..43febc68e50 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -24,7 +24,7 @@ type evalRunFlags struct { config string } -func newEvalRunCommand() *cobra.Command { +func newEvalRunCommand(extCtx *azdext.ExtensionContext) *cobra.Command { flags := &evalRunFlags{config: defaultEvalConfigName} cmd := &cobra.Command{ Use: "run", @@ -34,14 +34,14 @@ func newEvalRunCommand() *cobra.Command { ctx := azdext.WithAccessToken(cmd.Context()) logCleanup := setupDebugLogging(cmd.Flags()) defer logCleanup() - return runEvalRun(ctx, flags) + return runEvalRun(ctx, flags, extCtx.NoPrompt) }, } cmd.Flags().StringVar(&flags.config, "config", defaultEvalConfigName, "Local eval config YAML") return cmd } -func runEvalRun(ctx context.Context, flags *evalRunFlags) error { +func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { resolved, err := resolveEvalContext(ctx, evalContextOptions{}) if err != nil { return err @@ -69,6 +69,19 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags) error { } evalID := state.EvalID + if evalID != "" && !noPrompt { + // Ask whether to reuse the existing eval or create a new one. + resp, promptErr := resolved.azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: fmt.Sprintf("Found existing eval %s. Reuse it?", evalID), + DefaultValue: new(false), + }, + }) + if promptErr == nil && resp.Value != nil && !*resp.Value { + evalID = "" // user chose to create a new eval + } + } + if evalID == "" { created, err := resolved.evalClient.CreateOpenAIEval( ctx, buildOpenAIEvalRequest(evalCfg), DefaultAgentAPIVersion, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 9a8c88b49e6..52498624d82 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -153,6 +153,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { var cfg *OptimizeConfig configSource := "" // tracks where the config came from for user messaging + hasProject := false if a.flags.configFile != "" { cfg, err = LoadOptimizeConfig(a.flags.configFile) @@ -165,6 +166,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { if err != nil { return err } + hasProject = resolved.agentProject != "" // Check if eval.yaml exists in the agent project and offer to use it if resolved.agentProject != "" { @@ -245,7 +247,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { if err != nil { return err } - printOptimizeResults(out, finalStatus) + printOptimizeResults(out, finalStatus, hasProject) } return nil @@ -298,7 +300,7 @@ func pollOptimizeJob( return finalStatus, nil } -func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus) { +func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus, hasProject bool) { if status.Error != nil { fmt.Fprintf(out, "\n %s %s\n", color.RedString("Error:"), status.Error.Message) } @@ -352,15 +354,21 @@ func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus) } } - // Print deploy command for best candidate + // Print next-step commands for best candidate if status.Best != nil && status.Best.CandidateID != "" { agentName := "" if status.Agent != nil { agentName = status.Agent.AgentName } - fmt.Fprintf(out, "\n Deploy the best candidate:\n") - fmt.Fprintf(out, " azd ai agent optimize deploy --candidate %s --agent %s\n", - status.Best.CandidateID, agentName) + if hasProject { + fmt.Fprintf(out, "\n Apply the best candidate locally, then deploy:\n") + fmt.Fprintf(out, " azd ai agent optimize apply --candidate %s\n", status.Best.CandidateID) + fmt.Fprintf(out, " azd deploy\n") + } else { + fmt.Fprintf(out, "\n Deploy the best candidate:\n") + fmt.Fprintf(out, " azd ai agent optimize deploy --candidate %s --agent %s\n", + status.Best.CandidateID, agentName) + } } fmt.Fprintln(out) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go index 680aa7b95d3..4c6b912aa5b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go @@ -88,9 +88,9 @@ func runOptimizeStatus(cmd *cobra.Command, flags *optimizeStatusFlags, operation if err != nil { return err } - printOptimizeResults(out, finalStatus) + printOptimizeResults(out, finalStatus, false) } else if len(status.Candidates) > 0 { - printOptimizeResults(out, status) + printOptimizeResults(out, status, false) } if status.Error != nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index 55d6c2abef9..68d50dda357 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -125,7 +125,7 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR Name: evaluator, EvaluatorName: evaluator, DataMapping: map[string]string{ - "messages": "{{item.messages}}", + //"messages": "{{item.messages}}", "query": "{{item.query}}", "response": "{{sample.output_text}}", }, From ab3eaa80f5166a41e91748d9152d8efec496e161 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 12:00:34 -0700 Subject: [PATCH 03/33] more bug bash --- .../azd_observability_bugbash.md | 9 ++- .../internal/cmd/eval_helpers.go | 68 +++++++++++++++++++ .../azure.ai.agents/internal/cmd/eval_init.go | 7 +- .../internal/cmd/eval_init_prompts.go | 5 +- .../internal/cmd/eval_init_test.go | 2 +- .../azure.ai.agents/internal/cmd/eval_run.go | 6 +- .../pkg/agents/eval_api/eval_config.go | 14 ++-- .../internal/pkg/agents/eval_api/models.go | 2 +- 8 files changed, 93 insertions(+), 20 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index 331e933fb38..bf315e977ed 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -204,6 +204,8 @@ azd ai agent optimize cancel ### 6f. Deploy the winning candidate +> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. This step is blocked until the API issue is resolved. + The optimize output includes a ready-to-use deploy command: ```bash @@ -216,6 +218,8 @@ at startup and applies the optimized settings. ### 6g. Verify the optimized agent +> **⚠️ Blocked:** This step depends on 6f, which is currently blocked by the FAOS CANDIDATE API issue. + ```bash azd ai agent invoke "Hello!" # Expected: agent responds using the optimized configuration @@ -276,8 +280,7 @@ azd ai agent eval init --dataset ./data.jsonl --reset-defaults # Expected: overwrites eval.yaml without prompting about existing config # A11. Non-interactive mode (no prompts) -$env:AZD_FORCE_TTY = "false" # PowerShell -azd ai agent eval init --dataset ./data.jsonl +azd ai agent eval init --dataset ./data.jsonl --no-prompt # Expected: uses defaults without prompting. Full regeneration if eval.yaml exists. # Clean up: Remove-Item env:\AZD_FORCE_TTY @@ -603,6 +606,8 @@ azd ai agent optimize cancel ### I. `azd ai agent optimize apply` (inside azd project only) +> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize apply` and `optimize deploy` cannot apply the optimized result at this time. These commands will fail when trying to fetch candidate config. + ```bash # I1. Apply candidate config to agent.yaml azd ai agent optimize apply --candidate diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index fac4411a2a8..f64804be7b0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -5,15 +5,21 @@ package cmd import ( "context" + "encoding/base64" "encoding/json" "fmt" "log" "os" "path/filepath" + "strings" "azureaiagent/internal/pkg/agents/dataset_api" "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/opteval" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/google/uuid" ) // foundryBaseDir is the base directory for eval artifacts under the project root. @@ -195,6 +201,68 @@ func writeJSONFile(path string, v any) error { return os.WriteFile(path, data, 0600) } +// buildEvalReportURL constructs the Foundry portal URL for an eval run report. +// It reads AZURE_AI_PROJECT_ID from the azd environment and encodes the subscription ID. +// Returns empty string on any failure. +func buildEvalReportURL(ctx context.Context, azdClient *azdext.AzdClient, envName, evalID, runID string) string { + if azdClient == nil || envName == "" || evalID == "" || runID == "" { + return "" + } + v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: "AZURE_AI_PROJECT_ID", + }) + if err != nil || v.Value == "" { + log.Printf("[debug] could not read AZURE_AI_PROJECT_ID: %v", err) + return "" + } + reportURL, err := evalReportURL(v.Value, evalID, runID) + if err != nil { + log.Printf("[debug] failed to build eval report URL: %v", err) + return "" + } + return reportURL +} + +// evalReportURL constructs a URL to the eval run report in the Foundry portal. +// It parses the ARM resource ID to extract subscription, resource group, account, and project info. +func evalReportURL(projectResourceID, evalID, runID string) (string, error) { + resourceID, err := arm.ParseResourceID(projectResourceID) + if err != nil { + return "", fmt.Errorf("failed to parse project resource ID: %w", err) + } + + encodedSub, err := encodeSubscriptionForURL(resourceID.SubscriptionID) + if err != nil { + return "", fmt.Errorf("failed to encode subscription ID: %w", err) + } + + if resourceID.Parent == nil || + !strings.Contains(string(resourceID.ResourceType.Type), "/") { + return "", fmt.Errorf( + "resource ID does not represent a Foundry project (missing parent account): %s", + projectResourceID, + ) + } + + return fmt.Sprintf( + "https://ai.azure.com/nextgen/r/%s,%s,,%s,%s/build/evaluations/%s/run/%s", + encodedSub, resourceID.ResourceGroupName, + resourceID.Parent.Name, resourceID.Name, + evalID, runID, + ), nil +} + +// encodeSubscriptionForURL encodes a subscription ID GUID as base64 without padding. +func encodeSubscriptionForURL(subscriptionID string) (string, error) { + guid, err := uuid.Parse(subscriptionID) + if err != nil { + return "", fmt.Errorf("invalid subscription ID format: %w", err) + } + guidBytes, _ := guid.MarshalBinary() + return strings.TrimRight(base64.URLEncoding.EncodeToString(guidBytes), "="), nil +} + // formatAny converts any value to a string for display. func formatAny(v any) string { if v == nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index 36e8dd8f8d7..f474e6c5855 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -209,7 +209,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } } else { needDatasetGen = flags.dataset == "" - needEvalGen = len(flags.evaluators) == 0 + needEvalGen = true // always generate adaptive evaluator if !needDatasetGen { // User provided a local dataset file — use it directly. datasetPath, err := resolveLocalDatasetFile(flags.dataset, resolved.agentProject) @@ -218,8 +218,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } evalCfg.DatasetFile = datasetPath } - if !needEvalGen { - evalCfg.Evaluators = evaluatorsFromFlags(flags.evaluators) + // --evaluator values are merged with the generated adaptive evaluator. + if len(flags.evaluators) > 0 { + builtinEvals = evaluatorsFromFlags(flags.evaluators) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 214ef748b24..6144684e66a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -38,7 +38,8 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } } - needsGeneration := flags.dataset == "" || len(flags.evaluators) == 0 + needsGeneration := true // adaptive evaluator is always generated + needsEvalGen := true if flags.genInstruction == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { // Let the user choose between inline text or loading from a file. @@ -91,7 +92,7 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } // Ask whether to include traces for evaluator generation, unless already set via flags. - if flags.traceDays == 0 && needsGeneration { + if flags.traceDays == 0 && needsEvalGen { confirmResp, err := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ Options: &azdext.ConfirmOptions{ Message: "Include agent traces for evaluator generation?", diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 56d6657d76f..88e06deb754 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -412,7 +412,7 @@ func TestBuildOpenAIEvalRequest(t *testing.T) { assert.Equal(t, "gpt-4o", req.TestingCriteria[0].InitializationParameters["model"]) assert.Equal(t, "{{item.messages}}", req.TestingCriteria[0].DataMapping["messages"]) assert.Equal(t, "{{item.query}}", req.TestingCriteria[0].DataMapping["query"]) - assert.Equal(t, "{{sample.output_text}}", req.TestingCriteria[0].DataMapping["response"]) + assert.Equal(t, "{{sample.output_items}}", req.TestingCriteria[0].DataMapping["response"]) } func TestBuildOpenAIEvalRequest_WithDatasetFile(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index 43febc68e50..ffb5b823388 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -140,8 +140,10 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { if run.ID != "" { fmt.Printf(" Run: %s\n", run.ID) } - if run.ReportURL != "" { - fmt.Printf(" Report: %s\n", run.ReportURL) + + reportURL := buildEvalReportURL(ctx, resolved.azdClient, resolved.envName, evalID, run.ID) + if reportURL != "" { + fmt.Printf(" Report: %s\n", color.CyanString(reportURL)) } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index 68d50dda357..9fe905af624 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -96,19 +96,14 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR DataSourceConfig: &DataSourceConfig{ Type: "custom", ItemSchema: map[string]any{}, - IncludeSampleSchema: true, + IncludeSampleSchema: false, Schema: &DataSourceSchema{ Item: map[string]any{ "type": "object", "properties": map[string]any{ "query": map[string]any{"type": "string"}, }, - }, - Sample: map[string]any{ - "type": "object", - "properties": map[string]any{ - "output_text": map[string]any{"type": "string"}, - }, + "required": []string{}, }, }, }, @@ -127,13 +122,14 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR DataMapping: map[string]string{ //"messages": "{{item.messages}}", "query": "{{item.query}}", - "response": "{{sample.output_text}}", + "response": "{{sample.output_items}}", }, } if evalModel != "" { - criterion.InitializationParameters = map[string]string{ + criterion.InitializationParameters = map[string]any{ "model": evalModel, "deployment_name": evalModel, + "threshold": 3, } } request.TestingCriteria = append(request.TestingCriteria, criterion) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index ec2bb142fb0..e9662eb66ae 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -178,7 +178,7 @@ type TestingCriterion struct { Type string `json:"type"` Name string `json:"name"` EvaluatorName string `json:"evaluator_name"` - InitializationParameters map[string]string `json:"initialization_parameters,omitempty"` + InitializationParameters map[string]any `json:"initialization_parameters,omitempty"` DataMapping map[string]string `json:"data_mapping,omitempty"` } From b10de46e49dcb47871d5e35a5c6d8c1bf0351048 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 12:06:07 -0700 Subject: [PATCH 04/33] add options --- .../azd_observability_bugbash.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index bf315e977ed..55073c726af 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -189,6 +189,39 @@ Results: The ★ marks the best candidate. Copy the deploy command from the output to promote it. +#### Customizing optimization options in `eval.yaml` + +You can fine-tune optimization behavior by adding or modifying the `options:` section in your `eval.yaml`. Below are all available fields, their types, and defaults: + +```yaml +options: + eval_model: "gpt-4o" # (string) Model used for evaluation. Default: "gpt-4o" + mode: "optimize" # (string) Run mode. Default: "optimize" + strategies: # ([]string) Optimization strategies to try. + - instruction # Default: ["instruction", "skill", "agents-optimization-job"] + - skill + - agents-optimization-job + budget: 5 # (int) Max optimization budget (number of candidates). Default: 5 + max_iterations: 2 # (int) Max iterations per strategy. Default: 2 (when strategies are default) + min_improvement: 0.0 # (float) Minimum score improvement to accept a candidate. Default: 0 (not set) + improvement_threshold: 0.0 # (float) Threshold for incremental improvement. Default: 0 (not set) + pass_threshold: 0.0 # (float) Minimum passing score. Default: 0 (not set) + keep_versions: false # (bool) Keep all intermediate agent versions. Default: false + tasks_per_iteration: 0 # (int) Number of tasks per iteration. Default: 0 (server decides) + reflection_model: "" # (string) Model for reflection steps. Default: "" (uses eval_model) +``` + +For example, to increase the budget and use a different eval model: + +```yaml +options: + eval_model: "gpt-4.1" + budget: 10 + max_iterations: 3 +``` + +Fields you omit will use the defaults above. The `strategies` field defaults to all three strategies if not specified. + ### 6e. Monitor optimization jobs ```bash From 9c60cec4cc3f124b37dd1fdee6549783689c9c33 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 12:56:54 -0700 Subject: [PATCH 05/33] fix more --- .../pkg/agents/eval_api/eval_config.go | 25 ++++++++--------- .../internal/pkg/agents/eval_api/models.go | 28 ++++++------------- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index 9fe905af624..d1c46d3c384 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "azureaiagent/internal/pkg/agents/opteval" @@ -95,15 +96,11 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR }, DataSourceConfig: &DataSourceConfig{ Type: "custom", - ItemSchema: map[string]any{}, - IncludeSampleSchema: false, - Schema: &DataSourceSchema{ - Item: map[string]any{ - "type": "object", - "properties": map[string]any{ - "query": map[string]any{"type": "string"}, - }, - "required": []string{}, + IncludeSampleSchema: true, + ItemSchema: map[string]any{ + "type": "object", + "properties": map[string]any{ + "query": map[string]any{"type": "string"}, }, }, }, @@ -115,21 +112,23 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR evalModel = c.Options.EvalModel } for _, evaluator := range c.Evaluators { + apiName := strings.TrimPrefix(evaluator, "builtin.") criterion := TestingCriterion{ Type: "azure_ai_evaluator", - Name: evaluator, + Name: apiName, EvaluatorName: evaluator, DataMapping: map[string]string{ //"messages": "{{item.messages}}", - "query": "{{item.query}}", - "response": "{{sample.output_items}}", + "query": "{{item.query}}", + "response": "{{sample.output_items}}", + "tool_calls": "{{sample.tool_calls}}", + "tool_definitions": "{{sample.tool_definitions}}", }, } if evalModel != "" { criterion.InitializationParameters = map[string]any{ "model": evalModel, "deployment_name": evalModel, - "threshold": 3, } } request.TestingCriteria = append(request.TestingCriteria, criterion) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index e9662eb66ae..6ee5414d640 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -161,10 +161,9 @@ type Dataset struct { // DataSourceConfig describes the data source for an OpenAI eval. type DataSourceConfig struct { - Type string `json:"type"` - ItemSchema map[string]any `json:"item_schema"` - IncludeSampleSchema bool `json:"include_sample_schema"` - Schema *DataSourceSchema `json:"schema,omitempty"` + Type string `json:"type"` + ItemSchema map[string]any `json:"item_schema"` + IncludeSampleSchema bool `json:"include_sample_schema"` } // DataSourceSchema defines the item and sample schemas for an eval data source. @@ -256,15 +255,9 @@ type EvalRunInputMessages struct { // EvalRunMessageTemplate describes a single message in the input template. type EvalRunMessageTemplate struct { - Role string `json:"role"` - Content *EvalRunMessageContent `json:"content"` - Type string `json:"type"` -} - -// EvalRunMessageContent describes the content of a template message. -type EvalRunMessageContent struct { - Type string `json:"type"` - Text string `json:"text"` + Role string `json:"role"` + Content string `json:"content"` + Type string `json:"type"` } // EvalRunTarget describes the agent target for completions. @@ -291,12 +284,9 @@ func NewAgentTargetDataSource(agentName string, agentVersion *string) *EvalRunDa Type: "template", Template: []EvalRunMessageTemplate{ { - Role: "user", - Content: &EvalRunMessageContent{ - Type: "input_text", - Text: "{{item.query}}", - }, - Type: "message", + Role: "user", + Content: "{{item.query}}", + Type: "message", }, }, }, From bea8f92b6cb659a0ce6724f3c19b0a717281a24c Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 13:00:51 -0700 Subject: [PATCH 06/33] check ui --- cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index 55073c726af..3cbe0e9e379 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -238,6 +238,7 @@ azd ai agent optimize cancel ### 6f. Deploy the winning candidate > **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. This step is blocked until the API issue is resolved. +But you can check agent optimization job in foundry UI with `azd ai agent optimize` The optimize output includes a ready-to-use deploy command: From f24e4b3448c014438623050d9255387152acb2dd Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 13:35:52 -0700 Subject: [PATCH 07/33] fix --- cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index 3cbe0e9e379..e3612348554 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -238,7 +238,7 @@ azd ai agent optimize cancel ### 6f. Deploy the winning candidate > **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. This step is blocked until the API issue is resolved. -But you can check agent optimization job in foundry UI with `azd ai agent optimize` +But you can check agent optimization job in foundry UI with `?flight=enable_faos_read_ui` The optimize output includes a ready-to-use deploy command: From bbd23ec2fc6b54737937e679f72dc34f1dff2cfb Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 16:56:40 -0700 Subject: [PATCH 08/33] add azd ai agent eval init command --- .../azure.ai.agents/internal/project/service_target_agent.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go b/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go index fbaac59af18..7d1430458b0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go +++ b/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go @@ -1308,7 +1308,8 @@ func (p *AgentServiceTargetProvider) deployArtifacts( if len(endpoints) > 0 { last := artifacts[len(artifacts)-1] last.Metadata["note"] = "For information on invoking the agent, see " + output.WithLinkFormat( - "https://aka.ms/azd-agents-invoke") + "https://aka.ms/azd-agents-invoke") + + "\n\nSet up an evaluation suite in one step with " + output.WithHighLightFormat("azd ai agent eval init") } } From 1e26a02782dab96efbe4a16261114b67bb968c40 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 18:10:41 -0700 Subject: [PATCH 09/33] server change of targetAttributes --- .../internal/cmd/eval_init_jobs.go | 14 ++++++--- .../azure.ai.agents/internal/cmd/optimize.go | 28 +++++++++--------- .../internal/cmd/optimize_config.go | 10 +++---- .../internal/cmd/optimize_config_test.go | 2 +- .../internal/pkg/agents/opteval/yaml.go | 29 +++++++++++++++---- .../internal/pkg/agents/opteval/yaml_test.go | 19 ++++++++++-- .../pkg/agents/optimize_api/models.go | 2 +- .../pkg/agents/optimize_api/models_test.go | 4 +-- 8 files changed, 73 insertions(+), 35 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 3160fc203b2..24420b9ba02 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -18,6 +18,7 @@ import ( "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/opteval" + "github.com/azure/azure-dev/cli/azd/pkg/ux" "github.com/fatih/color" ) @@ -177,12 +178,17 @@ func pollAndFinalizeJobs( !eval_api.ParseJobStatus(state.EvalGenStatus).IsTerminal() // When both jobs run in parallel, disable individual spinners to avoid - // overlapping terminal output. Print status lines upfront instead. + // overlapping terminal output. Show a single combined spinner instead. parallel := pollDataset && pollEval if parallel { - fmt.Println(" Waiting for generation jobs...") - fmt.Printf(" - Dataset generation: %s\n", state.DatasetGenOpID) - fmt.Printf(" - Evaluator generation: %s\n", state.EvalGenOpID) + spinner := ux.NewSpinner(&ux.SpinnerOptions{ + Text: "Generating dataset and evaluators...", + ClearOnStop: true, + }) + if err := spinner.Start(ctx); err != nil { + fmt.Println(" Generating dataset and evaluators...") + } + defer func() { _ = spinner.Stop(ctx) }() } if pollDataset { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 52498624d82..96d4af7da02 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -66,13 +66,13 @@ func resolveOptimizeAgent(ctx context.Context, flagValue string, noPrompt bool) } type optimizeFlags struct { - configFile string - agent string - evalModel string - strategies []string - noWait bool - watch bool - pollInterval int + configFile string + agent string + evalModel string + targetAttributes []string + noWait bool + watch bool + pollInterval int optimizeConnectionFlags } @@ -93,11 +93,11 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi # Optimize a specific agent azd ai agent optimize my-agent - # Optimize with skill strategy - azd ai agent optimize --strategy skill + # Optimize with skill target + azd ai agent optimize --target skill - # Optimize with both strategies - azd ai agent optimize --strategy instruction --strategy skill + # Optimize with multiple target attributes + azd ai agent optimize --target instruction --target skill # Full control via config file azd ai agent optimize --config spec.yaml @@ -124,7 +124,7 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi cmd.Flags().StringVarP(&flags.configFile, "config", "c", "", "Path to YAML config file (optional — uses defaults if omitted)") cmd.Flags().StringVarP(&flags.agent, "agent", "a", "", "Agent name (auto-detected from azd project if omitted)") cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", "gpt-4.1-mini", "Model for evaluation") - cmd.Flags().StringArrayVarP(&flags.strategies, "strategy", "s", nil, "Optimization strategy: instruction, skill (repeatable)") + cmd.Flags().StringArrayVarP(&flags.targetAttributes, "target", "s", nil, "Target attribute for optimization: instruction, skill (repeatable)") cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit job and return immediately without waiting for completion") cmd.Flags().BoolVar(&flags.watch, "watch", true, "Watch for job completion (opposite of --no-wait)") cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") @@ -205,8 +205,8 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { if a.flags.evalModel != "" { cfg.Options.EvalModel = a.flags.evalModel } - if len(a.flags.strategies) > 0 { - cfg.Options.Strategies = a.flags.strategies + if len(a.flags.targetAttributes) > 0 { + cfg.Options.TargetAttributes = a.flags.targetAttributes } out := cmd.OutOrStdout() diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 64f3178854c..0db182880d2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -82,10 +82,10 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { }, InlineDataset: defaultDataset, Options: &opteval.Options{ - EvalModel: "gpt-4o", - Mode: "optimize", - Strategies: []string{"instruction", "skill", "agents-optimization-job"}, - Budget: 5, + EvalModel: "gpt-4o", + Mode: "optimize", + TargetAttributes: []string{"instruction", "skill", "agents-optimization-job"}, + Budget: 5, }, } } @@ -141,7 +141,7 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi MinImprovement: c.Options.MinImprovement, ImprovementThreshold: c.Options.ImprovementThreshold, PassThreshold: c.Options.PassThreshold, - Strategies: c.Options.Strategies, + TargetAttributes: c.Options.TargetAttributes, KeepVersions: c.Options.KeepVersions, TasksPerIteration: c.Options.TasksPerIteration, ReflectionModel: c.Options.ReflectionModel, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index b1b10cab059..2dea9da8713 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -282,7 +282,7 @@ options: require.NotNil(t, cfg.Options) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) assert.Equal(t, "evaluate", cfg.Options.Mode) - assert.Equal(t, []string{"instruction"}, cfg.Options.Strategies) + assert.Equal(t, []string{"instruction"}, cfg.Options.TargetAttributes) assert.Equal(t, 3, cfg.Options.Budget) // Validate + ToRequest diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index eb59a1268e1..a64e6d93888 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -49,7 +49,7 @@ type DatasetRef struct { type Options struct { EvalModel string `yaml:"eval_model,omitempty"` Mode string `yaml:"mode,omitempty"` - Strategies []string `yaml:"strategies,omitempty"` + TargetAttributes []string `yaml:"target_attributes,omitempty"` Budget int `yaml:"budget,omitempty"` MaxIterations int `yaml:"max_iterations,omitempty"` MinImprovement float64 `yaml:"min_improvement,omitempty"` @@ -60,18 +60,35 @@ type Options struct { ReflectionModel string `yaml:"reflection_model,omitempty"` } -// DefaultStrategies are the default optimization strategies. -var DefaultStrategies = []string{"instruction", "skill", "agents-optimization-job"} +// DefaultTargetAttributes are the default optimization target attributes. +var DefaultTargetAttributes = []string{"instruction", "skill", "agents-optimization-job"} -// UnmarshalYAML populates default strategies when the field is absent in YAML. +// Deprecated: DefaultStrategies is an alias for backward compatibility. +var DefaultStrategies = DefaultTargetAttributes + +// UnmarshalYAML populates default target attributes when the field is absent in YAML. +// For backward compatibility, the legacy "strategies" key is also accepted. func (o *Options) UnmarshalYAML(value *yaml.Node) error { // Alias avoids infinite recursion. type raw Options if err := value.Decode((*raw)(o)); err != nil { return err } - if len(o.Strategies) == 0 { - o.Strategies = slices.Clone(DefaultStrategies) + + // Backward compatibility: if "strategies" is present and target_attributes is not, + // migrate the value. + if len(o.TargetAttributes) == 0 { + var legacy struct { + Strategies []string `yaml:"strategies"` + } + _ = value.Decode(&legacy) + if len(legacy.Strategies) > 0 { + o.TargetAttributes = legacy.Strategies + } + } + + if len(o.TargetAttributes) == 0 { + o.TargetAttributes = slices.Clone(DefaultTargetAttributes) o.MaxIterations = 2 } return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go index a97f1dfd0d0..c1d5b56c307 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go @@ -136,7 +136,7 @@ func TestOptions_YAMLFields(t *testing.T) { input := ` eval_model: gpt-4.1 mode: full -strategies: +target_attributes: - prompt - tool budget: 500 @@ -153,7 +153,7 @@ reflection_model: gpt-4o assert.Equal(t, "gpt-4.1", opts.EvalModel) assert.Equal(t, "full", opts.Mode) - assert.Equal(t, []string{"prompt", "tool"}, opts.Strategies) + assert.Equal(t, []string{"prompt", "tool"}, opts.TargetAttributes) assert.Equal(t, 500, opts.Budget) assert.Equal(t, 10, opts.MaxIterations) assert.InDelta(t, 0.05, opts.MinImprovement, 0.001) @@ -163,3 +163,18 @@ reflection_model: gpt-4o assert.Equal(t, 20, opts.TasksPerIteration) assert.Equal(t, "gpt-4o", opts.ReflectionModel) } + +func TestOptions_LegacyStrategiesBackwardCompat(t *testing.T) { + t.Parallel() + + input := ` +eval_model: gpt-4.1 +strategies: + - prompt + - tool +` + var opts Options + require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) + + assert.Equal(t, []string{"prompt", "tool"}, opts.TargetAttributes) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 9aa22338094..6165152cdaa 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -86,7 +86,7 @@ type OptimizeOptions struct { ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` PassThreshold float64 `json:"passThreshold,omitempty"` EvalModel string `json:"evalModel"` - Strategies []string `json:"strategies,omitempty"` + TargetAttributes []string `json:"targetAttributes,omitempty"` KeepVersions bool `json:"keepVersions,omitempty"` TasksPerIteration int `json:"tasksPerIteration,omitempty"` MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index 57884b0bbf7..2af26052379 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -51,7 +51,7 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { ImprovementThreshold: 0.05, PassThreshold: 0.8, EvalModel: "gpt-4o-mini", - Strategies: []string{"prompt_mutation"}, + TargetAttributes: []string{"prompt_mutation"}, KeepVersions: true, TasksPerIteration: 10, MaxReflectionTasks: 3, @@ -71,7 +71,7 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { `"options"`, `"evalModel"`, `"maxIterations"`, `"minImprovement"`, `"improvementThreshold"`, `"passThreshold"`, `"keepVersions"`, `"tasksPerIteration"`, `"maxReflectionTasks"`, `"reflectionModel"`, - `"groundTruth"`, `"systemPrompt"`, `"skills"`, + `"targetAttributes"`, `"groundTruth"`, `"systemPrompt"`, `"skills"`, } { assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) } From a9dbafe4c5c9dcde019edb52606fefc571cb82f1 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 14 May 2026 22:50:36 -0700 Subject: [PATCH 10/33] fix for dataset generation --- .../azure.ai.agents/internal/cmd/eval.go | 2 +- .../azure.ai.agents/internal/cmd/eval_init.go | 6 +- .../internal/cmd/eval_init_jobs.go | 27 +++---- .../internal/cmd/eval_init_prompts.go | 4 +- .../internal/cmd/eval_init_test.go | 28 +++++--- .../azure.ai.agents/internal/cmd/eval_test.go | 24 ++++--- .../internal/cmd/optimize_config.go | 1 + .../internal/pkg/agents/eval_api/models.go | 71 +++++++++---------- .../pkg/agents/eval_api/operations_test.go | 16 +++-- .../pkg/agents/optimize_api/models.go | 26 +++---- .../pkg/agents/optimize_api/models_test.go | 3 +- 11 files changed, 111 insertions(+), 97 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 5695b14ce58..0639d5ed03d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -32,7 +32,7 @@ const ( defaultEvalConfigName = "eval.yaml" defaultEvalName = "smoke-core" defaultEvalModel = "gpt-4o" - defaultEvalSamples = 100 + defaultEvalSamples = 15 ) type evalConfig = eval_api.EvalConfig diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index f474e6c5855..d4eed0e1ea9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -76,7 +76,7 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().StringVarP(&flags.genInstructionFile, "gen-instruction-file", "G", "", "Path to a file containing the generation instruction") cmd.Flags().StringVar(&flags.evalModel, "eval-model", defaultEvalModel, "Model used for evaluation and generation, and also as the default model for evaluation") cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") - cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Maximum number of samples to generate") + cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Number of samples to generate (15-1000)") cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, "Built-in or custom evaluator name") cmd.Flags().StringVarP(&flags.output, "out-file", "O", defaultEvalConfigName, "Eval config path") cmd.Flags().IntVar(&flags.traceDays, "trace-days", 0, "Include agent traces from the last N days for evaluator generation (0 = no traces)") @@ -181,8 +181,8 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error flags.genInstruction == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") } - if flags.maxSamples <= 0 { - return fmt.Errorf("--max-samples must be a positive integer") + if flags.maxSamples < 15 || flags.maxSamples > 1000 { + return fmt.Errorf("--max-samples must be between 15 and 1000") } if resolved.hasProject { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 24420b9ba02..623d63ea5bb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -49,7 +49,8 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon }, }, Options: &opteval.Options{ - EvalModel: flags.evalModel, + EvalModel: flags.evalModel, + TargetAttributes: opteval.DefaultTargetAttributes, }, GenerationInstruction: flags.genInstruction, MaxSamples: flags.maxSamples, @@ -165,8 +166,10 @@ func pollAndFinalizeJobs( state *evalState, builtinEvals []string, ) error { + // Each goroutine writes to distinct fields of evalCfg and state, so no + // mutex is needed for those. Only the error variables are shared across + // both goroutines and guarded by wg.Wait() (written before Wait, read after). var ( - mu sync.Mutex datasetPollErr error evalPollErr error wg sync.WaitGroup @@ -182,11 +185,11 @@ func pollAndFinalizeJobs( parallel := pollDataset && pollEval if parallel { spinner := ux.NewSpinner(&ux.SpinnerOptions{ - Text: "Generating dataset and evaluators...", + Text: "Generating ...", ClearOnStop: true, }) if err := spinner.Start(ctx); err != nil { - fmt.Println(" Generating dataset and evaluators...") + fmt.Println(" Generating ...") } defer func() { _ = spinner.Stop(ctx) }() } @@ -201,15 +204,13 @@ func pollAndFinalizeJobs( !parallel, ) if err != nil { - mu.Lock() datasetPollErr = err - mu.Unlock() return } - mu.Lock() + // Dataset goroutine owns: state.DatasetGenStatus, evalCfg.DatasetReference, evalCfg.DatasetFile. state.DatasetGenStatus = completed.NormalizedStatus() - mu.Unlock() dsRef := datasetFromJob(completed) + evalCfg.DatasetReference = dsRef if resolved.hasProject { saveDatasetGenerationResult( resolved.projectRoot, completed.ResolvedDatasetName(), completed.Result, @@ -217,14 +218,9 @@ func pollAndFinalizeJobs( if err := downloadDatasetArtifact( ctx, resolved.datasetClient, resolved.projectRoot, dsRef, DefaultAgentAPIVersion, ); err != nil { - mu.Lock() datasetPollErr = err - mu.Unlock() return } - mu.Lock() - evalCfg.DatasetFile = datasetArtifactPath(resolved.projectRoot, dsRef) - mu.Unlock() } }() } @@ -239,16 +235,13 @@ func pollAndFinalizeJobs( !parallel, ) if err != nil { - mu.Lock() evalPollErr = err - mu.Unlock() return } + // Evaluator goroutine owns: state.EvalGenStatus, evalCfg.Evaluators. evalName := evaluatorFromJob(completed) - mu.Lock() state.EvalGenStatus = completed.NormalizedStatus() evalCfg.Evaluators = append(builtinEvals, evalName) - mu.Unlock() if resolved.hasProject { saveEvaluatorResult(resolved.projectRoot, evalName, completed.Result) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 6144684e66a..7df4bf2bbc3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -178,8 +178,8 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } if value := strings.TrimSpace(resp.Value); value != "" { parsed, err := strconv.Atoi(value) - if err != nil || parsed <= 0 { - return fmt.Errorf("--max-samples must be a positive integer") + if err != nil || parsed < 15 || parsed > 1000 { + return fmt.Errorf("--max-samples must be between 15 and 1000") } flags.maxSamples = parsed } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 88e06deb754..9a0a9623932 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -165,23 +165,31 @@ func TestDatasetFromJob(t *testing.T) { expectedVersion string }{ { - "standard fields", - &eval_api.GenerationJob{DatasetName: "ds-1", DatasetVersion: "v2"}, + "result fields", + &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"ds-1","version":"v2"}`), + }, "ds-1", "v2", }, { - "name fallback", - &eval_api.GenerationJob{Name: "ds-2"}, + "result name", + &eval_api.GenerationJob{ + Result: json.RawMessage(`{"outputs":[{"name":"ds-2"}]}`), + }, "ds-2", "v1", }, { - "version fallback", - &eval_api.GenerationJob{DatasetName: "ds-3", Version: "v3"}, + "nested outputs format", + &eval_api.GenerationJob{ + Result: json.RawMessage(`{"outputs":[{"name":"ds-3","version":"v3"}]}`), + }, "ds-3", "v3", }, { "empty defaults version to v1", - &eval_api.GenerationJob{Name: "ds-4"}, + &eval_api.GenerationJob{ + Result: json.RawMessage(`{"outputs":[{"name":"ds-4"}]}`), + }, "ds-4", "v1", }, } @@ -270,16 +278,16 @@ func TestBuildModelChoices(t *testing.T) { func TestEvaluatorFromJob(t *testing.T) { t.Parallel() - t.Run("extracts name from job", func(t *testing.T) { + t.Run("extracts name from result", func(t *testing.T) { t.Parallel() job := &eval_api.GenerationJob{ - EvaluatorName: "quality-eval", + Result: json.RawMessage(`{"name":"quality-eval"}`), } name := evaluatorFromJob(job) assert.Equal(t, "quality-eval", name) }) - t.Run("extracts name from result", func(t *testing.T) { + t.Run("extracts name from result display_name", func(t *testing.T) { t.Parallel() job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index ce8211963d7..8820f097ffa 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -78,37 +78,43 @@ func TestGenerationJob_NormalizedStatus(t *testing.T) { func TestGenerationJob_ResolvedDatasetName(t *testing.T) { t.Parallel() - assert.Equal(t, "ds-1", (&eval_api.GenerationJob{DatasetName: "ds-1", Name: "fallback"}).ResolvedDatasetName()) - assert.Equal(t, "fallback", (&eval_api.GenerationJob{Name: "fallback"}).ResolvedDatasetName()) assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedDatasetName()) - // Extracts name from the result JSON when dataset_name and name are empty. + // Extracts name from the result JSON. job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"generated-ds","version":"v2"}`), } assert.Equal(t, "generated-ds", job.ResolvedDatasetName()) + + // Extracts name from result.outputs[0] (nested API response format). + jobNested := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"outputs":[{"type":"dataset","name":"nested-ds","version":"36735"}]}`), + } + assert.Equal(t, "nested-ds", jobNested.ResolvedDatasetName()) } func TestGenerationJob_ResolvedDatasetVersion(t *testing.T) { t.Parallel() - assert.Equal(t, "v2", (&eval_api.GenerationJob{DatasetVersion: "v2"}).ResolvedDatasetVersion()) - assert.Equal(t, "v3", (&eval_api.GenerationJob{Version: "v3"}).ResolvedDatasetVersion()) assert.Equal(t, "v1", (&eval_api.GenerationJob{}).ResolvedDatasetVersion()) - // Extracts version from the result JSON when dataset_version and version are empty. + // Extracts version from the result JSON. job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"ds","version":"v5"}`), } assert.Equal(t, "v5", job.ResolvedDatasetVersion()) + + // Extracts version from result.outputs[0] (nested API response format). + jobNested := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"outputs":[{"type":"dataset","name":"ds","version":"36735"}]}`), + } + assert.Equal(t, "36735", jobNested.ResolvedDatasetVersion()) } func TestGenerationJob_ResolvedEvaluatorName(t *testing.T) { t.Parallel() - assert.Equal(t, "quality", (&eval_api.GenerationJob{EvaluatorName: "quality", Name: "fb"}).ResolvedEvaluatorName()) - assert.Equal(t, "fb", (&eval_api.GenerationJob{Name: "fb"}).ResolvedEvaluatorName()) assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedEvaluatorName()) - // Extracts name from the result JSON when evaluator_name and name are empty. + // Extracts name from the result JSON. job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 0db182880d2..2b997235716 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -141,6 +141,7 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi MinImprovement: c.Options.MinImprovement, ImprovementThreshold: c.Options.ImprovementThreshold, PassThreshold: c.Options.PassThreshold, + Strategies: c.Options.TargetAttributes, TargetAttributes: c.Options.TargetAttributes, KeepVersions: c.Options.KeepVersions, TasksPerIteration: c.Options.TasksPerIteration, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index 6ee5414d640..b71a4470a9e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -45,14 +45,9 @@ type GenerationSource struct { // GenerationJob is the response for data and evaluator generation job operations. type GenerationJob struct { - ID string `json:"id"` - Status string `json:"status"` - Result json.RawMessage `json:"result,omitempty"` - DatasetName string `json:"dataset_name,omitempty"` - DatasetVersion string `json:"dataset_version,omitempty"` - EvaluatorName string `json:"evaluator_name,omitempty"` - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` + ID string `json:"id"` + Status string `json:"status"` + Result json.RawMessage `json:"result,omitempty"` } // OperationID returns the job's operation identifier. @@ -68,43 +63,29 @@ func (j *GenerationJob) NormalizedStatus() string { return j.Status } -// ResolvedDatasetName returns dataset_name falling back to result.name, then name. +// ResolvedDatasetName returns the dataset name from the result JSON (top-level +// or nested outputs[0]). func (j *GenerationJob) ResolvedDatasetName() string { - if j.DatasetName != "" { - return j.DatasetName - } - if name := j.resultStringField("name"); name != "" { - return name - } - return j.Name + return j.resultStringField("name") } -// ResolvedDatasetVersion returns dataset_version falling back to result.version, then version. +// ResolvedDatasetVersion returns the dataset version from the result JSON +// (top-level or nested outputs[0]), defaulting to "v1". func (j *GenerationJob) ResolvedDatasetVersion() string { - if j.DatasetVersion != "" { - return j.DatasetVersion - } if v := j.resultStringField("version"); v != "" { return v } - if j.Version != "" { - return j.Version - } return "v1" } -// ResolvedEvaluatorName returns evaluator_name falling back to result.name, then name. +// ResolvedEvaluatorName returns the evaluator name from the result JSON. func (j *GenerationJob) ResolvedEvaluatorName() string { - if j.EvaluatorName != "" { - return j.EvaluatorName - } - if name := j.resultStringField("name"); name != "" { - return name - } - return j.Name + return j.resultStringField("name") } // resultStringField extracts a string field from the raw Result JSON. +// It first checks for a top-level key, then falls back to outputs[0].key +// to handle the nested response format. func (j *GenerationJob) resultStringField(key string) string { if len(j.Result) == 0 { return "" @@ -113,15 +94,29 @@ func (j *GenerationJob) resultStringField(key string) string { if err := json.Unmarshal(j.Result, &m); err != nil { return "" } - raw, ok := m[key] - if !ok { - return "" + + // Try top-level field first. + if raw, ok := m[key]; ok { + var s string + if err := json.Unmarshal(raw, &s); err == nil && s != "" { + return s + } } - var s string - if err := json.Unmarshal(raw, &s); err != nil { - return "" + + // Fall back to outputs[0].key for nested response format. + if rawOutputs, ok := m["outputs"]; ok { + var outputs []map[string]json.RawMessage + if err := json.Unmarshal(rawOutputs, &outputs); err == nil && len(outputs) > 0 { + if raw, ok := outputs[0][key]; ok { + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return s + } + } + } } - return s + + return "" } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go index c912f77b7fa..4e9c78510b2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go @@ -108,7 +108,11 @@ func TestGetDataGenerationJob_Success(t *testing.T) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - resp := map[string]any{"id": "op-123", "status": "completed", "dataset_name": "test-ds"} + resp := map[string]any{ + "id": "op-123", + "status": "completed", + "result": map[string]any{"name": "test-ds", "version": "v1"}, + } data, _ := json.Marshal(resp) _, _ = w.Write(data) }) @@ -119,7 +123,7 @@ func TestGetDataGenerationJob_Success(t *testing.T) { require.NoError(t, err) assert.Equal(t, "/data_generation_jobs/op-123", capturedPath) assert.Equal(t, "completed", result.Status) - assert.Equal(t, "test-ds", result.DatasetName) + assert.Equal(t, "test-ds", result.ResolvedDatasetName()) } // --------------------------------------------------------------------------- @@ -163,7 +167,11 @@ func TestGetEvaluatorGenerationJob_Success(t *testing.T) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - resp := map[string]any{"id": "eval-op-456", "status": "completed", "evaluator_name": "quality"} + resp := map[string]any{ + "id": "eval-op-456", + "status": "completed", + "result": map[string]any{"name": "quality"}, + } data, _ := json.Marshal(resp) _, _ = w.Write(data) }) @@ -174,7 +182,7 @@ func TestGetEvaluatorGenerationJob_Success(t *testing.T) { require.NoError(t, err) assert.Equal(t, "/evaluator_generation_jobs/eval-op-456", capturedPath) assert.Equal(t, "completed", result.Status) - assert.Equal(t, "quality", result.EvaluatorName) + assert.Equal(t, "quality", result.ResolvedEvaluatorName()) } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 6165152cdaa..df95b41d7d2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -80,18 +80,20 @@ type Criterion struct { // OptimizeOptions controls the optimization run. type OptimizeOptions struct { - Budget int `json:"budget,omitempty"` - MaxIterations int `json:"maxIterations,omitempty"` - MinImprovement float64 `json:"minImprovement,omitempty"` - ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` - PassThreshold float64 `json:"passThreshold,omitempty"` - EvalModel string `json:"evalModel"` - TargetAttributes []string `json:"targetAttributes,omitempty"` - KeepVersions bool `json:"keepVersions,omitempty"` - TasksPerIteration int `json:"tasksPerIteration,omitempty"` - MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` - ReflectionModel string `json:"reflectionModel,omitempty"` - Mode string `json:"mode,omitempty"` + Budget int `json:"budget,omitempty"` + MaxIterations int `json:"maxIterations,omitempty"` + MinImprovement float64 `json:"minImprovement,omitempty"` + ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` + PassThreshold float64 `json:"passThreshold,omitempty"` + EvalModel string `json:"evalModel"` + // Send as both "strategies" (current server) and "targetAttributes" (future). + Strategies []string `json:"strategies,omitempty"` + TargetAttributes []string `json:"targetAttributes,omitempty"` + KeepVersions bool `json:"keepVersions,omitempty"` + TasksPerIteration int `json:"tasksPerIteration,omitempty"` + MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` + ReflectionModel string `json:"reflectionModel,omitempty"` + Mode string `json:"mode,omitempty"` } // --- Response models --- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index 2af26052379..732a0045459 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -51,6 +51,7 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { ImprovementThreshold: 0.05, PassThreshold: 0.8, EvalModel: "gpt-4o-mini", + Strategies: []string{"prompt_mutation"}, TargetAttributes: []string{"prompt_mutation"}, KeepVersions: true, TasksPerIteration: 10, @@ -71,7 +72,7 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { `"options"`, `"evalModel"`, `"maxIterations"`, `"minImprovement"`, `"improvementThreshold"`, `"passThreshold"`, `"keepVersions"`, `"tasksPerIteration"`, `"maxReflectionTasks"`, `"reflectionModel"`, - `"targetAttributes"`, `"groundTruth"`, `"systemPrompt"`, `"skills"`, + `"strategies"`, `"targetAttributes"`, `"groundTruth"`, `"systemPrompt"`, `"skills"`, } { assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) } From 8582bce0a679d192c0037f87622352398bb563e5 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 15 May 2026 08:07:51 -0700 Subject: [PATCH 11/33] more fixes --- .../azure.ai.agents/internal/cmd/eval.go | 38 +---- .../internal/cmd/eval_helpers.go | 40 +---- .../azure.ai.agents/internal/cmd/eval_init.go | 2 +- .../internal/cmd/eval_init_jobs.go | 30 ++-- .../internal/cmd/eval_init_prompts.go | 8 +- .../internal/cmd/eval_progress.go | 141 ++++++++++++++++++ .../azure.ai.agents/internal/cmd/eval_run.go | 4 + .../azure.ai.agents/internal/cmd/eval_test.go | 16 -- .../internal/cmd/optimize_config.go | 2 +- .../internal/cmd/optimize_config_test.go | 12 +- .../internal/pkg/agents/eval_api/artifacts.go | 17 --- .../internal/pkg/agents/opteval/yaml.go | 3 +- 12 files changed, 187 insertions(+), 126 deletions(-) create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 0639d5ed03d..3d3578704e6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -12,7 +12,6 @@ import ( "os" "path/filepath" "strings" - "time" "azureaiagent/internal/exterrors" "azureaiagent/internal/pkg/agents/agent_yaml" @@ -22,7 +21,6 @@ import ( "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/azure/azure-dev/cli/azd/pkg/output" - "github.com/azure/azure-dev/cli/azd/pkg/ux" "github.com/fatih/color" "github.com/spf13/cobra" "go.yaml.in/yaml/v3" @@ -429,64 +427,40 @@ func endpointFromProjectID(projectID string) (string, error) { return buildAgentEndpoint(project.AccountName, project.ProjectName), nil } -func pollEvalOperation( - ctx context.Context, - label string, - operationID string, - get eval_api.GetJobFunc, - apiVersion string, -) (*eval_api.GenerationJob, error) { - return pollEvalOperationWithSpinner(ctx, label, operationID, get, apiVersion, true) -} - func pollEvalOperationWithSpinner( ctx context.Context, label string, operationID string, get eval_api.GetJobFunc, apiVersion string, - showSpinner bool, + progress *evalProgress, ) (*eval_api.GenerationJob, error) { if operationID == "" { return nil, fmt.Errorf("%s did not return an operation ID", strings.ToLower(label)) } - start := time.Now() - if showSpinner { - spinner := ux.NewSpinner(&ux.SpinnerOptions{ - Text: label + "...", - ClearOnStop: true, - }) - if err := spinner.Start(ctx); err != nil { - fmt.Printf("%s: running\n", label) - } - defer func() { _ = spinner.Stop(ctx) }() - } - + progress.setRunning(label) poller := eval_api.NewPoller(operationID, apiVersion, get) job, err := poller.Poll(ctx) - elapsed := time.Since(start).Round(time.Second) - if err != nil { if _, ok := errors.AsType[*eval_api.PollerTimeoutError](err); ok { - fmt.Printf(" %s %s (%s)\n", - color.YellowString("(!) Timed out"), label, elapsed) + progress.setTimedOut(label) return nil, err } if jfe, ok := errors.AsType[*eval_api.JobFailedError](err); ok { if body, marshalErr := json.MarshalIndent(jfe.Job, "", " "); marshalErr == nil { log.Printf("[debug] %s: failed response:\n%s", label, body) } - fmt.Printf(" %s %s (%s)\n", color.RedString("(x) Failed"), label, elapsed) + progress.setFailed(label) return nil, fmt.Errorf("%s failed with status %q", strings.ToLower(label), jfe.Status) } - fmt.Printf(" %s %s\n", color.RedString("(x) Failed"), label) + progress.setFailed(label) return nil, err } log.Printf("[debug] %s: completed successfully", label) - fmt.Printf(" %s %s (%s)\n", color.GreenString("(✓) Done"), label, elapsed) + progress.setDone(label) return job, nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index f64804be7b0..cd7a66dd669 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -39,7 +39,7 @@ func resolveEvalConfigPath(config, agentProject string) string { // subdirectories (datasets, evaluators, results). func ensureFoundryDirs(projectRoot string) error { base := filepath.Join(projectRoot, ".azure", ".foundry") - for _, sub := range []string{"datasets", "evaluators", "results"} { + for _, sub := range []string{"datasets", "evaluators"} { if err := os.MkdirAll(filepath.Join(base, sub), 0750); err != nil { return err } @@ -47,29 +47,6 @@ func ensureFoundryDirs(projectRoot string) error { return nil } -// saveDatasetGenerationResult saves the raw dataset generation result JSON. -func saveDatasetGenerationResult(projectRoot, datasetName string, result json.RawMessage) { - if datasetName == "" || len(result) == 0 { - return - } - dir := filepath.Join(projectRoot, ".azure", ".foundry", "datasets") - if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create dataset dir: %v", err) - return - } - // Pretty-print the JSON for human review. - var pretty json.RawMessage - if err := json.Unmarshal(result, &pretty); err == nil { - if formatted, err := json.MarshalIndent(pretty, "", " "); err == nil { - result = formatted - } - } - path := filepath.Join(dir, datasetName+".json") - if err := os.WriteFile(path, result, 0600); err != nil { - log.Printf("[debug] failed to save dataset result: %v", err) - } -} - // downloadDatasetArtifact downloads the dataset and writes it locally. // If the download fails (e.g., non-TLS test server), a placeholder is written. func downloadDatasetArtifact( @@ -92,20 +69,19 @@ func downloadDatasetArtifact( // Attempt full download via the dataset API. cred, credErr := client.GetDatasetCredential(ctx, ref.Name, ref.Version, apiVersion) if credErr != nil { - // Gracefully write a placeholder when credential fetch fails. - log.Printf("[debug] dataset credential fetch failed: %v — writing placeholder", credErr) - return os.WriteFile(dest, []byte("{}\n"), 0600) + log.Printf("[debug] dataset credential fetch failed: %v", credErr) + return nil } downloadURL := cred.ResolvedDownloadURI() if downloadURL == "" { - return os.WriteFile(dest, []byte("{}\n"), 0600) + return nil } data, dlErr := client.DownloadDataset(ctx, downloadURL) if dlErr != nil { - log.Printf("[debug] dataset download failed: %v — writing placeholder", dlErr) - return os.WriteFile(dest, []byte("{}\n"), 0600) + log.Printf("[debug] dataset download failed: %v", dlErr) + return nil } return os.WriteFile(dest, data, 0600) @@ -157,7 +133,7 @@ func writeEvalReviewArtifacts(projectRoot string, cfg *eval_api.EvalConfig) { return } for _, evaluator := range cfg.Evaluators { - if evaluator == "" { + if evaluator == "" || eval_api.IsBuiltinEvaluator(evaluator) { continue } // Skip if a result JSON already exists. @@ -183,7 +159,7 @@ func writeEvalReviewArtifacts(projectRoot string, cfg *eval_api.EvalConfig) { fmt.Printf(" datasets/%s.jsonl\n", name) } for _, evaluator := range cfg.Evaluators { - if evaluator != "" { + if evaluator != "" && !eval_api.IsBuiltinEvaluator(evaluator) { fmt.Printf(" evaluators/%s.json\n", evaluator) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index d4eed0e1ea9..1e3574fbd88 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -279,6 +279,6 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error fmt.Printf(" Evaluator: %s\n", evaluator) } } - fmt.Printf("\n Review the generated assets, then run:\n %s\n", "azd ai agent eval run") + fmt.Printf("\n Review the generated assets, then run:\n %s\n", color.CyanString("azd ai agent eval run")) return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 623d63ea5bb..cdc963103b2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -18,7 +18,6 @@ import ( "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/opteval" - "github.com/azure/azure-dev/cli/azd/pkg/ux" "github.com/fatih/color" ) @@ -180,19 +179,16 @@ func pollAndFinalizeJobs( pollEval := state.EvalGenOpID != "" && !eval_api.ParseJobStatus(state.EvalGenStatus).IsTerminal() - // When both jobs run in parallel, disable individual spinners to avoid - // overlapping terminal output. Show a single combined spinner instead. - parallel := pollDataset && pollEval - if parallel { - spinner := ux.NewSpinner(&ux.SpinnerOptions{ - Text: "Generating ...", - ClearOnStop: true, - }) - if err := spinner.Start(ctx); err != nil { - fmt.Println(" Generating ...") - } - defer func() { _ = spinner.Stop(ctx) }() + // Build progress display labels. + var labels []string + if pollDataset { + labels = append(labels, "Dataset generation") + } + if pollEval { + labels = append(labels, "Evaluator generation") } + progress := newEvalProgress(labels...) + progress.Start() if pollDataset { wg.Add(1) @@ -201,7 +197,7 @@ func pollAndFinalizeJobs( completed, err := pollEvalOperationWithSpinner( ctx, "Dataset generation", state.DatasetGenOpID, resolved.evalClient.GetDataGenerationJob, DataGenerationAPIVersion, - !parallel, + progress, ) if err != nil { datasetPollErr = err @@ -212,9 +208,6 @@ func pollAndFinalizeJobs( dsRef := datasetFromJob(completed) evalCfg.DatasetReference = dsRef if resolved.hasProject { - saveDatasetGenerationResult( - resolved.projectRoot, completed.ResolvedDatasetName(), completed.Result, - ) if err := downloadDatasetArtifact( ctx, resolved.datasetClient, resolved.projectRoot, dsRef, DefaultAgentAPIVersion, ); err != nil { @@ -232,7 +225,7 @@ func pollAndFinalizeJobs( completed, err := pollEvalOperationWithSpinner( ctx, "Evaluator generation", state.EvalGenOpID, resolved.evalClient.GetEvaluatorGenerationJob, DefaultAgentAPIVersion, - !parallel, + progress, ) if err != nil { evalPollErr = err @@ -249,6 +242,7 @@ func pollAndFinalizeJobs( } wg.Wait() + progress.Stop() // If either job timed out, return a timeout error so the caller can // persist the YAML and operation IDs for later resume. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 7df4bf2bbc3..ecd7db3a42e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -23,10 +23,14 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } if flags.name == "" { + defaultName := defaultEvalName + if resolved.agentName != "" { + defaultName = resolved.agentName + } resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ Message: "Eval suite name", - DefaultValue: defaultEvalName, + DefaultValue: defaultName, IgnoreHintKeys: true, }, }) @@ -168,7 +172,7 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f if !flags.maxSamplesSet { resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ - Message: "Max samples", + Message: "Max samples (between 15 and 1000)", DefaultValue: strconv.Itoa(defaultEvalSamples), IgnoreHintKeys: true, }, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go new file mode 100644 index 00000000000..60a7277cf9e --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "fmt" + "os" + "sync" + "time" + + "github.com/fatih/color" +) + +// evalProgress prints status lines for each job and keeps a single animated +// spinner line at the bottom to show that polling is still in progress. +type evalProgress struct { + mu sync.Mutex + starts map[string]time.Time + start time.Time + stop chan struct{} + done chan struct{} + spinning bool +} + +func newEvalProgress(_ ...string) *evalProgress { + return &evalProgress{ + starts: make(map[string]time.Time), + stop: make(chan struct{}), + done: make(chan struct{}), + } +} + +var spinFrames = []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} + +// Start launches the background spinner ticker. +func (p *evalProgress) Start() { + p.start = time.Now() + p.spinning = true + go func() { + defer close(p.done) + frameIdx := 0 + ticker := time.NewTicker(250 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-p.stop: + return + case <-ticker.C: + p.mu.Lock() + if p.spinning { + elapsed := time.Since(p.start).Truncate(time.Second) + spin := spinFrames[frameIdx%len(spinFrames)] + frameIdx++ + fmt.Fprintf(os.Stdout, "\r %s waiting · %s", spin, elapsed) + } + p.mu.Unlock() + } + } + }() +} + +// Stop halts the spinner and clears its line. +func (p *evalProgress) Stop() { + select { + case <-p.stop: + return + default: + close(p.stop) + } + <-p.done + p.mu.Lock() + if p.spinning { + fmt.Fprintf(os.Stdout, "\r%-60s\r", "") + p.spinning = false + } + p.mu.Unlock() +} + +// clearSpinnerLine clears the current spinner line so a status line can be +// printed cleanly. Must be called with p.mu held. +func (p *evalProgress) clearSpinnerLine() { + if p.spinning { + fmt.Fprintf(os.Stdout, "\r%-60s\r", "") + } +} + +func (p *evalProgress) setRunning(label string) { + p.mu.Lock() + defer p.mu.Unlock() + p.starts[label] = time.Now() + p.clearSpinnerLine() + fmt.Printf(" %s %s\n", color.BlueString("(–) Running"), label) +} + +func (p *evalProgress) setDone(label string) { + p.mu.Lock() + defer p.mu.Unlock() + elapsed := durationText(time.Since(p.starts[label])) + p.clearSpinnerLine() + fmt.Printf(" %s %s (%s)\n", color.GreenString("(✓) Done"), label, elapsed) +} + +func (p *evalProgress) setFailed(label string) { + p.mu.Lock() + defer p.mu.Unlock() + elapsed := durationText(time.Since(p.starts[label])) + p.clearSpinnerLine() + fmt.Printf(" %s %s (%s)\n", color.RedString("(x) Failed"), label, elapsed) +} + +func (p *evalProgress) setTimedOut(label string) { + p.mu.Lock() + defer p.mu.Unlock() + elapsed := durationText(time.Since(p.starts[label])) + p.clearSpinnerLine() + fmt.Printf(" %s %s (%s)\n", color.YellowString("(!) Timed out"), label, elapsed) +} + +// durationText returns a human-friendly elapsed time string. +func durationText(d time.Duration) string { + s := int(d.Seconds()) + if s < 1 { + return "less than a second" + } + if s == 1 { + return "1 second" + } + if s < 60 { + return fmt.Sprintf("%d seconds", s) + } + m := s / 60 + rem := s % 60 + if rem == 0 { + if m == 1 { + return "1 minute" + } + return fmt.Sprintf("%d minutes", m) + } + return fmt.Sprintf("%dm %ds", m, rem) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index ffb5b823388..52ad30b5f1e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -145,6 +145,10 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { if reportURL != "" { fmt.Printf(" Report: %s\n", color.CyanString(reportURL)) } + fmt.Printf("\n To view result summary, run:\n %s\n %s\n", + color.CyanString("azd ai agent eval list"), + color.CyanString("azd ai agent eval show"), + ) return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 8820f097ffa..1337624b21d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -345,22 +345,6 @@ func TestSaveEvaluatorResult_EmptyName(t *testing.T) { assert.Empty(t, matches) } -func TestSaveDatasetGenerationResult(t *testing.T) { - t.Parallel() - dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) - - result := json.RawMessage(`{"name":"my-dataset","version":"v2"}`) - saveDatasetGenerationResult(dir, "my-dataset", result) - - path := filepath.Join(dir, ".azure", ".foundry", "datasets", "my-dataset.json") - assert.FileExists(t, path) - data, err := os.ReadFile(path) - require.NoError(t, err) - assert.Contains(t, string(data), `"name": "my-dataset"`) - assert.Contains(t, string(data), `"version": "v2"`) -} - func TestWriteEvalReviewArtifacts_SkipsWhenResultExists(t *testing.T) { t.Parallel() dir := t.TempDir() diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 2b997235716..75824a0070f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -78,7 +78,7 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { return &OptimizeConfig{ Config: opteval.Config{ Agent: opteval.AgentRef{Name: agentName}, - Evaluators: []string{"task_adherence"}, + Evaluators: []string{"builtin.task_adherence"}, }, InlineDataset: defaultDataset, Options: &opteval.Options{ diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index 2dea9da8713..e65a2714cb5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -205,7 +205,7 @@ dataset_reference: name: eval-dataset version: "1" evaluators: - - task_adherence + - builtin.task_adherence options: eval_model: gpt-4o ` @@ -219,7 +219,7 @@ options: require.NotNil(t, cfg.Options) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) assert.Len(t, cfg.Evaluators, 1) - assert.Equal(t, "task_adherence", cfg.Evaluators[0]) + assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0]) require.NotNil(t, cfg.DatasetReference) assert.Equal(t, "eval-dataset", cfg.DatasetReference.Name) } @@ -236,7 +236,7 @@ agent: dataset_file: eval.jsonl evaluators: - - task_adherence + - builtin.task_adherence options: eval_model: gpt-4o @@ -254,7 +254,7 @@ agent: name: my-test-agent dataset_file: ` + datasetPath + ` evaluators: - - task_adherence + - builtin.task_adherence options: eval_model: gpt-4o mode: evaluate @@ -276,7 +276,7 @@ options: // Evaluator — scalar string without builtin. prefix resolves as custom. require.Len(t, cfg.Evaluators, 1) - assert.Equal(t, "task_adherence", cfg.Evaluators[0]) + assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0]) // Options require.NotNil(t, cfg.Options) @@ -291,5 +291,5 @@ options: require.NoError(t, err) assert.Equal(t, "my-test-agent", req.Agent.AgentName) assert.Len(t, req.Dataset, 1) - assert.Equal(t, []string{"task_adherence"}, req.Evaluators) + assert.Equal(t, []string{"builtin.task_adherence"}, req.Evaluators) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index c494bf186f3..ef5bf4a5c42 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -42,23 +42,6 @@ func EnsureFoundryDirs(projectRoot string) error { return os.MkdirAll(dir, 0750) } -// SaveDatasetGenerationResult saves the raw JSON result of a dataset generation -// job under .azure/.foundry/datasets/.json. -func SaveDatasetGenerationResult(projectRoot, datasetName string, result json.RawMessage) { - if datasetName == "" || len(result) == 0 { - return - } - dir := filepath.Join(projectRoot, foundryDir, "datasets") - if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create dataset dir: %v", err) - return - } - path := filepath.Join(dir, datasetName+".json") - if err := os.WriteFile(path, result, 0600); err != nil { - log.Printf("[debug] failed to save dataset result: %v", err) - } -} - // DownloadDatasetArtifact downloads the dataset referenced by dsRef and saves // it under .azure/.foundry/datasets/.jsonl. func DownloadDatasetArtifact( diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index a64e6d93888..95446ff9d3d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -89,7 +89,8 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { if len(o.TargetAttributes) == 0 { o.TargetAttributes = slices.Clone(DefaultTargetAttributes) - o.MaxIterations = 2 + // o.MaxIterations = 5 + // o.Budget = 100 } return nil } From ad7c5b95b08781abbaefdc37d3eaa42293dfa125 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 15 May 2026 13:24:01 -0700 Subject: [PATCH 12/33] more --- .../internal/cmd/eval_init_prompts.go | 16 +++++++++++++++- .../internal/project/service_target_agent.go | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index ecd7db3a42e..7b585093212 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -293,7 +293,7 @@ func promptRegenerateChoices( } } - // Ask about evaluator — only generated (non-builtin) evaluators can be regenerated. + // Ask about evaluator. generated, builtin := eval_api.SplitEvaluators(existingCfg.Evaluators) if len(generated) > 0 { generatedLabel := strings.Join(generated, ", ") @@ -316,6 +316,20 @@ func promptRegenerateChoices( if resp.Value != nil && *resp.Value { flags.regenerateEvaluator = true } + } else { + // No generated evaluators exist — ask whether to generate one. + resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "No custom evaluator found. Do you want to generate one?", + DefaultValue: new(true), + }, + }) + if err != nil { + return fmt.Errorf("prompting for evaluator generation: %w", err) + } + if resp.Value != nil && *resp.Value { + flags.regenerateEvaluator = true + } } return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go b/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go index 7d1430458b0..38f56ba88fb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go +++ b/cli/azd/extensions/azure.ai.agents/internal/project/service_target_agent.go @@ -1309,7 +1309,7 @@ func (p *AgentServiceTargetProvider) deployArtifacts( last := artifacts[len(artifacts)-1] last.Metadata["note"] = "For information on invoking the agent, see " + output.WithLinkFormat( "https://aka.ms/azd-agents-invoke") + - "\n\nSet up an evaluation suite in one step with " + output.WithHighLightFormat("azd ai agent eval init") + "\n\nSet up an evaluation suite to measure quality and impact in one step with " + output.WithHighLightFormat("azd ai agent eval init") } } From 79eb54b082e773e0e4e508dc2ac3c30d2e997ad4 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 15 May 2026 19:58:29 -0700 Subject: [PATCH 13/33] more system prompt --- .../azure.ai.agents/internal/cmd/eval_init.go | 73 ++-- .../internal/cmd/eval_init_jobs.go | 42 +- .../internal/cmd/eval_init_prompts.go | 26 +- .../internal/cmd/eval_init_test.go | 59 ++- .../azure.ai.agents/internal/cmd/eval_test.go | 12 +- .../azure.ai.agents/internal/cmd/optimize.go | 388 ++++++++++++++++++ .../internal/cmd/optimize_apply.go | 183 ++++++++- .../internal/cmd/optimize_config.go | 57 +++ .../internal/cmd/optimize_deploy.go | 57 --- .../pkg/agents/eval_api/eval_config.go | 4 - .../pkg/agents/eval_api/eval_config_test.go | 16 +- .../internal/pkg/agents/opteval/yaml.go | 57 ++- .../pkg/agents/optimize_api/models.go | 23 +- 13 files changed, 803 insertions(+), 194 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index 1e3574fbd88..771cbf3ab46 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -8,7 +8,7 @@ import ( "errors" "fmt" "os" - "strings" + "path/filepath" "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/eval_api" @@ -23,21 +23,21 @@ const DataGenerationAPIVersion = "v1" // EvalInitFlags defines the customized flags for the eval init command. type evalInitFlags struct { - name string - agent string - projectEndpoint string - genInstruction string - genInstructionFile string - evalModel string - dataset string - output string - maxSamples int - evaluators []string - noWait bool - resetDefaults bool - evalModelSet bool - maxSamplesSet bool - traceDays int + name string + agent string + projectEndpoint string + systemPrompt string + systemPromptFile string + evalModel string + dataset string + output string + maxSamples int + evaluators []string + noWait bool + resetDefaults bool + evalModelSet bool + maxSamplesSet bool + traceDays int // Internal flags set during interactive prompts. regenerateDataset bool regenerateEvaluator bool @@ -54,8 +54,8 @@ By default, this command submits dataset and evaluator generation jobs, waits fo completion, downloads review artifacts under .azure/.foundry, and writes eval.yaml at the agent project root. Use --no-wait to write pending operation IDs and return.`, Example: ` azd ai agent eval init - azd ai agent eval init --gen-instruction "This agent handles restaurant reservations." --eval-model gpt-4o --max-samples 50 - azd ai agent eval init --gen-instruction-file ./instructions.md --eval-model gpt-4o + azd ai agent eval init --system-prompt "This agent handles restaurant reservations." --eval-model gpt-4o --max-samples 50 + azd ai agent eval init --system-prompt-file ./instructions.md --eval-model gpt-4o azd ai agent eval init --dataset ./tests/golden.jsonl --evaluator builtin.intent_resolution`, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { @@ -72,8 +72,8 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit generation jobs and return immediately") cmd.Flags().StringVar(&flags.agent, "agent", "", "Target agent name") cmd.Flags().StringVarP(&flags.projectEndpoint, "project-endpoint", "p", "", "Microsoft Foundry project endpoint URL") - cmd.Flags().StringVarP(&flags.genInstruction, "gen-instruction", "g", "", "Inline instruction for dataset and evaluator generation") - cmd.Flags().StringVarP(&flags.genInstructionFile, "gen-instruction-file", "G", "", "Path to a file containing the generation instruction") + cmd.Flags().StringVarP(&flags.systemPrompt, "system-prompt", "g", "", "Agent system prompt used for dataset and evaluator generation") + cmd.Flags().StringVarP(&flags.systemPromptFile, "system-prompt-file", "G", "", "Path to a file containing the agent system prompt") cmd.Flags().StringVar(&flags.evalModel, "eval-model", defaultEvalModel, "Model used for evaluation and generation, and also as the default model for evaluation") cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Number of samples to generate (15-1000)") @@ -87,17 +87,9 @@ the agent project root. Use --no-wait to write pending operation IDs and return. // runEvalInit executes the eval init command logic. It resolves context, prompts for missing options, submits generation jobs, polls for completion (unless --no-wait), writes the eval config, and prints next steps. func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error { - if flags.genInstruction != "" && flags.genInstructionFile != "" { - return fmt.Errorf("cannot use both --gen-instruction and --gen-instruction-file; provide one or the other") + if flags.systemPrompt != "" && flags.systemPromptFile != "" { + return fmt.Errorf("cannot use both --system-prompt and --system-prompt-file; provide one or the other") } - if flags.genInstructionFile != "" { - data, err := os.ReadFile(flags.genInstructionFile) //nolint:gosec // user-provided instruction file path - if err != nil { - return fmt.Errorf("reading instruction file %q: %w", flags.genInstructionFile, err) - } - flags.genInstruction = strings.TrimSpace(string(data)) - } - resolved, err := resolveEvalContext(ctx, evalContextOptions{ agent: flags.agent, projectEndpoint: flags.projectEndpoint, @@ -109,6 +101,16 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } defer resolved.azdClient.Close() + // Resolve relative system_prompt_file paths against the agent project directory. + if flags.systemPromptFile != "" { + if !filepath.IsAbs(flags.systemPromptFile) && resolved.projectRoot != "" { + flags.systemPromptFile = filepath.Join(resolved.projectRoot, flags.systemPromptFile) + } + if _, err := os.Stat(flags.systemPromptFile); err != nil { + return fmt.Errorf("system prompt file %q is not accessible: %w", flags.systemPromptFile, err) + } + } + configPath := resolveEvalOutputPath(flags.output, resolved.agentProject) printEvalDetectedContext(resolved, configPath) @@ -144,8 +146,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if existingCfg.Options != nil && !flags.evalModelSet { flags.evalModel = existingCfg.Options.EvalModel } - if flags.genInstruction == "" { - flags.genInstruction = existingCfg.GenerationInstruction + if flags.systemPrompt == "" && flags.systemPromptFile == "" { + flags.systemPrompt = existingCfg.Agent.SystemPrompt + flags.systemPromptFile = existingCfg.Agent.SystemPromptFile } if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { flags.maxSamples = existingCfg.MaxSamples @@ -176,10 +179,10 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // Finalize the eval suite name with a random suffix to avoid collisions. flags.name = resolveEvalName(flags) + "-" + randomSuffix() - // Prompt agents use the agent source directly; hosted agents require a gen-instruction. + // Prompt agents use the agent source directly; hosted agents require a system-prompt. if resolved.agentKind != agent_yaml.AgentKindPrompt && - flags.genInstruction == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { - return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") + flags.systemPrompt == "" && flags.systemPromptFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { + return fmt.Errorf("--system-prompt is required when generating eval assets for a hosted agent") } if flags.maxSamples < 15 || flags.maxSamples > 1000 { return fmt.Errorf("--max-samples must be between 15 and 1000") diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index cdc963103b2..cc38e434293 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -37,23 +37,41 @@ func randomSuffix() string { return hex.EncodeToString(b) } +// resolvedSystemPrompt returns the system prompt content from flags, reading +// from file if systemPromptFile is set. +func resolvedSystemPrompt(flags *evalInitFlags) string { + if flags.systemPromptFile != "" { + data, err := os.ReadFile(flags.systemPromptFile) //nolint:gosec // user-provided path validated earlier + if err != nil { + return flags.systemPrompt + } + return string(data) + } + return flags.systemPrompt +} + func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalConfig { + agent := evalAgentRef{ + Name: resolved.agentName, + Kind: resolved.agentKind, + Version: resolved.version, + } + if flags.systemPromptFile != "" { + agent.SystemPromptFile = flags.systemPromptFile + } else { + agent.SystemPrompt = flags.systemPrompt + } return &evalConfig{ Config: opteval.Config{ - Name: resolveEvalName(flags), - Agent: evalAgentRef{ - Name: resolved.agentName, - Kind: resolved.agentKind, - Version: resolved.version, - }, + Name: resolveEvalName(flags), + Agent: agent, }, Options: &opteval.Options{ EvalModel: flags.evalModel, TargetAttributes: opteval.DefaultTargetAttributes, }, - GenerationInstruction: flags.genInstruction, - MaxSamples: flags.maxSamples, - TraceDays: flags.traceDays, + MaxSamples: flags.maxSamples, + TraceDays: flags.traceDays, } } @@ -63,8 +81,9 @@ func submitDatasetGeneration( flags *evalInitFlags, ) (*eval_api.GenerationJob, error) { // Traces are only supported for evaluator generation, not dataset generation. + prompt := resolvedSystemPrompt(flags) sources := eval_api.BuildGenerationSources( - string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, nil, + string(resolved.agentKind), resolved.agentName, resolved.version, prompt, nil, ) request := eval_api.NewDataGenerationJobRequest( resolveEvalName(flags), flags.evalModel, flags.maxSamples, sources, @@ -84,8 +103,9 @@ func submitEvaluatorGeneration( if flags.traceDays > 0 { traces = &eval_api.TraceOptions{Days: flags.traceDays} } + prompt := resolvedSystemPrompt(flags) sources := eval_api.BuildGenerationSources( - string(resolved.agentKind), resolved.agentName, resolved.version, flags.genInstruction, traces, + string(resolved.agentKind), resolved.agentName, resolved.version, prompt, traces, ) request := eval_api.NewEvaluatorGenerationJobRequest( resolveEvalName(flags), flags.evalModel, sources, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 7b585093212..97e6ab4d269 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "os" + "path/filepath" "strconv" "strings" @@ -45,7 +46,7 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f needsGeneration := true // adaptive evaluator is always generated needsEvalGen := true - if flags.genInstruction == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + if flags.systemPrompt == "" && flags.systemPromptFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { // Let the user choose between inline text or loading from a file. inputChoices := []*azdext.SelectChoice{ {Label: "Type inline", Value: "inline"}, @@ -54,32 +55,35 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f defaultIdx := int32(0) selResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ Options: &azdext.SelectOptions{ - Message: "How would you like to provide the generation instruction?", + Message: "How would you like to provide the system prompt?", Choices: inputChoices, SelectedIndex: &defaultIdx, }, }) if err != nil { - return fmt.Errorf("prompting for instruction input method: %w", err) + return fmt.Errorf("prompting for system prompt input method: %w", err) } if inputChoices[int(*selResp.Value)].Value == "file" { // Prompt for the file path. pathResp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ - Message: "Path to instruction file", + Message: "Path to system prompt file", IgnoreHintKeys: true, }, }) if err != nil { - return fmt.Errorf("prompting for instruction file path: %w", err) + return fmt.Errorf("prompting for system prompt file path: %w", err) } filePath := strings.TrimSpace(pathResp.Value) - data, err := os.ReadFile(filePath) //nolint:gosec // user-provided instruction file path - if err != nil { - return fmt.Errorf("reading instruction file %q: %w", filePath, err) + // Resolve relative paths against the agent project directory. + if !filepath.IsAbs(filePath) && resolved.projectRoot != "" { + filePath = filepath.Join(resolved.projectRoot, filePath) + } + if _, err := os.Stat(filePath); err != nil { + return fmt.Errorf("system prompt file %q is not accessible: %w", filePath, err) } - flags.genInstruction = strings.TrimSpace(string(data)) + flags.systemPromptFile = filePath } else { // Inline text input. resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ @@ -89,9 +93,9 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f }, }) if err != nil { - return fmt.Errorf("prompting for generation instruction: %w", err) + return fmt.Errorf("prompting for system prompt: %w", err) } - flags.genInstruction = strings.TrimSpace(resp.Value) + flags.systemPrompt = strings.TrimSpace(resp.Value) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 9a0a9623932..975db965b81 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -34,8 +34,8 @@ func TestNewEvalInitCommand_Flags(t *testing.T) { {"no-wait", "false"}, {"agent", ""}, {"project-endpoint", ""}, - {"gen-instruction", ""}, - {"gen-instruction-file", ""}, + {"system-prompt", ""}, + {"system-prompt-file", ""}, {"eval-model", defaultEvalModel}, {"dataset", ""}, {"max-samples", "100"}, @@ -68,44 +68,46 @@ func TestNewEvalInitCommand_ShortOutFile(t *testing.T) { } // --------------------------------------------------------------------------- -// gen-instruction / gen-instruction-file mutual exclusion +// system-prompt / system-prompt-file mutual exclusion // --------------------------------------------------------------------------- func TestRunEvalInit_MutualExclusion(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - genInstruction: "inline text", - genInstructionFile: "some-file.txt", + systemPrompt: "inline text", + systemPromptFile: "some-file.txt", } err := runEvalInit(t.Context(), flags, true) require.Error(t, err) - assert.Contains(t, err.Error(), "cannot use both --gen-instruction and --gen-instruction-file") + assert.Contains(t, err.Error(), "cannot use both --system-prompt and --system-prompt-file") } -func TestRunEvalInit_InstructionFromFile(t *testing.T) { +func TestRunEvalInit_SystemPromptFile(t *testing.T) { t.Parallel() tmpDir := t.TempDir() instrFile := filepath.Join(tmpDir, "instruction.md") require.NoError(t, os.WriteFile(instrFile, []byte(" Test booking agent \n"), 0600)) flags := &evalInitFlags{ - genInstructionFile: instrFile, - evalModel: defaultEvalModel, - maxSamples: 10, + systemPromptFile: instrFile, + evalModel: defaultEvalModel, + maxSamples: 10, } - // runEvalInit will fail later (no azd client), but genInstruction should be populated first. + // runEvalInit will fail later (no azd client), but file validation should pass. _ = runEvalInit(t.Context(), flags, true) - assert.Equal(t, "Test booking agent", flags.genInstruction) + // File path remains on the flag — content is NOT inlined. + assert.Equal(t, instrFile, flags.systemPromptFile) + assert.Empty(t, flags.systemPrompt) } -func TestRunEvalInit_InstructionFileMissing(t *testing.T) { +func TestRunEvalInit_SystemPromptFileMissing(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - genInstructionFile: "/nonexistent/path/instruction.txt", + systemPromptFile: "/nonexistent/path/instruction.txt", } err := runEvalInit(t.Context(), flags, true) require.Error(t, err) - assert.Contains(t, err.Error(), "reading instruction file") + assert.Contains(t, err.Error(), "not accessible") } // --------------------------------------------------------------------------- @@ -118,9 +120,9 @@ func TestNewEvalConfig(t *testing.T) { t.Run("uses default name", func(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - genInstruction: "Test the booking agent", - evalModel: "gpt-4.1", - maxSamples: 50, + systemPrompt: "Test the booking agent", + evalModel: "gpt-4.1", + maxSamples: 50, } resolved := &evalResolvedContext{ agentName: "booking-agent", @@ -135,7 +137,7 @@ func TestNewEvalConfig(t *testing.T) { assert.Equal(t, agent_yaml.AgentKindHosted, cfg.Agent.Kind) assert.Equal(t, "v2", cfg.Agent.Version) assert.Equal(t, "gpt-4.1", cfg.Options.EvalModel) - assert.Equal(t, "Test the booking agent", cfg.GenerationInstruction) + assert.Equal(t, "Test the booking agent", cfg.Agent.SystemPrompt) assert.Equal(t, 50, cfg.MaxSamples) }) @@ -149,6 +151,25 @@ func TestNewEvalConfig(t *testing.T) { cfg := newEvalConfig(flags, resolved) assert.Equal(t, "my-suite", cfg.Name) }) + + t.Run("stores system_prompt_file when file provided", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + systemPromptFile: "./prompts/system.md", + evalModel: "gpt-4o", + maxSamples: 20, + } + resolved := &evalResolvedContext{ + agentName: "my-agent", + agentKind: agent_yaml.AgentKindHosted, + version: "v1", + } + + cfg := newEvalConfig(flags, resolved) + + assert.Empty(t, cfg.Agent.SystemPrompt) + assert.Equal(t, "./prompts/system.md", cfg.Agent.SystemPromptFile) + }) } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 1337624b21d..6addde4266d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -481,9 +481,10 @@ func TestEvalConfigRoundTrip(t *testing.T) { Config: opteval.Config{ Name: "smoke-core", Agent: evalAgentRef{ - Name: "my-agent", - Kind: agent_yaml.AgentKindHosted, - Version: "v1", + Name: "my-agent", + Kind: agent_yaml.AgentKindHosted, + Version: "v1", + SystemPrompt: "Test this agent", }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, Evaluators: []string{"builtin.task_adherence"}, @@ -491,8 +492,7 @@ func TestEvalConfigRoundTrip(t *testing.T) { Options: &opteval.Options{ EvalModel: "gpt-4o", }, - GenerationInstruction: "Test this agent", - MaxSamples: 50, + MaxSamples: 50, } err := writeEvalConfig(path, original) @@ -506,7 +506,7 @@ func TestEvalConfigRoundTrip(t *testing.T) { assert.Equal(t, original.Agent.Kind, loaded.Agent.Kind) assert.Equal(t, original.Agent.Version, loaded.Agent.Version) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, original.GenerationInstruction, loaded.GenerationInstruction) + assert.Equal(t, original.Agent.SystemPrompt, loaded.Agent.SystemPrompt) assert.Equal(t, original.MaxSamples, loaded.MaxSamples) require.NotNil(t, loaded.DatasetReference) assert.Equal(t, "ds", loaded.DatasetReference.Name) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 96d4af7da02..fe32b61287d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "azureaiagent/internal/pkg/agents/opteval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -154,6 +155,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { var cfg *OptimizeConfig configSource := "" // tracks where the config came from for user messaging hasProject := false + agentProject := "" if a.flags.configFile != "" { cfg, err = LoadOptimizeConfig(a.flags.configFile) @@ -167,6 +169,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { return err } hasProject = resolved.agentProject != "" + agentProject = resolved.agentProject // Check if eval.yaml exists in the agent project and offer to use it if resolved.agentProject != "" { @@ -209,6 +212,33 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { cfg.Options.TargetAttributes = a.flags.targetAttributes } + // Resolve relative skill_dir against agent project directory. + if cfg.Agent.SkillDir != "" && hasProject && !filepath.IsAbs(cfg.Agent.SkillDir) { + cfg.Agent.SkillDir = filepath.Join(agentProject, cfg.Agent.SkillDir) + } + + // Resolve system prompt using a well-defined lifecycle: + // 1. Config file (eval.yaml / --config) — system_prompt or system_prompt_file in agent section + // 2. Baseline config — .agent_optimization/baseline/config.json from a prior optimize run + // 3. Interactive prompt — ask the user to provide inline text or a file path + if err := resolveOptimizeSystemPrompt(ctx, cfg, agentProject, hasProject, a.noPrompt); err != nil { + return err + } + + // Resolve skill_dir: auto-detect, check baseline, or prompt user. + if cfg.Agent.SkillDir == "" && hasProject { + if err := resolveOptimizeSkillDir(ctx, cfg, agentProject, a.noPrompt); err != nil { + return err + } + } + + // Resolve target_config.model: prompt user if not set. + if (cfg.Options.TargetConfig == nil || len(cfg.Options.TargetConfig.Model) == 0) && !a.noPrompt { + if err := resolveOptimizeTargetModels(ctx, cfg); err != nil { + return err + } + } + out := cmd.OutOrStdout() bold := color.New(color.Bold) @@ -231,6 +261,16 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { return fmt.Errorf("failed to build optimization request: %w", err) } + // Save baseline config before starting optimization. + if hasProject { + if err := saveBaselineConfig(agentProject, cfg.Agent.SkillDir, optimizeReq); err != nil { + fmt.Fprintf(out, " warning: failed to save baseline config: %s\n", err) + } else { + fmt.Fprintf(out, " Baseline saved to %s\n", + filepath.Join(optimizationDir, "baseline", "config.json")) + } + } + resp, err := client.StartOptimize(ctx, optimizeReq) if err != nil { return fmt.Errorf("failed to submit optimization job: %w\n\nCheck that the endpoint %q is reachable", err, endpoint) @@ -253,6 +293,354 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { return nil } +// resolveOptimizeSystemPrompt resolves the agent's system prompt using a well-defined lifecycle: +// +// 1. Config (eval.yaml / --config): system_prompt or system_prompt_file in the agent section. +// 2. Baseline: .agent_optimization/baseline/config.json from a prior optimization run. +// 3. Interactive prompt: ask the user to provide inline text or a file path. +// +// Relative file paths in system_prompt_file are resolved against agentProject. +func resolveOptimizeSystemPrompt( + ctx context.Context, + cfg *OptimizeConfig, + agentProject string, + hasProject bool, + noPrompt bool, +) error { + // Resolve relative system_prompt_file paths against the agent project directory. + if cfg.Agent.SystemPromptFile != "" && hasProject && !filepath.IsAbs(cfg.Agent.SystemPromptFile) { + cfg.Agent.SystemPromptFile = filepath.Join(agentProject, cfg.Agent.SystemPromptFile) + } + + // Step 1: Config explicitly declares a system_prompt_file — validate it's readable. + if cfg.Agent.SystemPromptFile != "" { + if _, err := os.Stat(cfg.Agent.SystemPromptFile); err != nil { + return fmt.Errorf("system_prompt_file %q from config is not accessible: %w", + cfg.Agent.SystemPromptFile, err) + } + return nil + } + + // Step 1b: Config already has inline system_prompt — nothing to do. + if cfg.Agent.SystemPrompt != "" { + return nil + } + + // Step 2: Check baseline config from a prior optimization run. + if hasProject { + if baseline, loadErr := loadBaselineConfig(agentProject); loadErr == nil && baseline.Instructions != "" { + if noPrompt { + cfg.Agent.SystemPrompt = baseline.Instructions + return nil + } + + azdClient, clientErr := azdext.NewAzdClient() + if clientErr == nil { + defer azdClient.Close() + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "No system prompt in config. " + + "Found one in baseline (.agent_optimization/baseline/config.json). Use it?", + DefaultValue: new(true), + }, + }) + if promptErr == nil && resp.Value != nil && *resp.Value { + cfg.Agent.SystemPrompt = baseline.Instructions + return nil + } + } + } + } + + // Step 3: Interactive prompt — ask user to provide inline text or a file path. + if noPrompt { + return fmt.Errorf("system prompt is required for optimization.\n\n" + + "Provide it via one of:\n" + + " 1. system_prompt or system_prompt_file in eval.yaml (agent section)\n" + + " 2. Run a prior optimization to create a baseline (.agent_optimization/baseline/config.json)\n" + + " 3. Run without --no-prompt to enter it interactively") + } + + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return fmt.Errorf("system prompt is required but could not open interactive prompt: %w", clientErr) + } + defer azdClient.Close() + + inputChoices := []*azdext.SelectChoice{ + {Label: "Type inline", Value: "inline"}, + {Label: "Load from file", Value: "file"}, + } + defaultIdx := int32(0) + selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "No system prompt found in config or baseline. " + + "How would you like to provide the system prompt?", + Choices: inputChoices, + SelectedIndex: &defaultIdx, + }, + }) + if selErr != nil { + return fmt.Errorf("prompting for system prompt input method: %w", selErr) + } + + if inputChoices[int(*selResp.Value)].Value == "file" { + pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Path to system prompt file", + IgnoreHintKeys: true, + }, + }) + if pathErr != nil { + return fmt.Errorf("prompting for system prompt file path: %w", pathErr) + } + filePath := strings.TrimSpace(pathResp.Value) + // Resolve relative paths against the agent project directory. + if !filepath.IsAbs(filePath) && hasProject { + filePath = filepath.Join(agentProject, filePath) + } + if _, err := os.Stat(filePath); err != nil { + return fmt.Errorf("system prompt file %q is not accessible: %w", filePath, err) + } + cfg.Agent.SystemPromptFile = filePath + } else { + resp, promptErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Enter the agent's system prompt instructions", + IgnoreHintKeys: true, + }, + }) + if promptErr != nil { + return fmt.Errorf("prompting for system prompt: %w", promptErr) + } + cfg.Agent.SystemPrompt = strings.TrimSpace(resp.Value) + } + + return nil +} + +// resolveOptimizeSkillDir resolves the agent's skill directory: +// 1. Auto-detect: look for a "skills/" folder in the agent project — confirm with user. +// 2. Baseline: check .agent_optimization/baseline/config.json for a saved skill_dir. +// 3. Interactive prompt: ask the user to provide a path or skip. +func resolveOptimizeSkillDir( + ctx context.Context, + cfg *OptimizeConfig, + agentProject string, + noPrompt bool, +) error { + // Step 1: Auto-detect common skill directory names. + var detectedDir string + for _, candidate := range []string{"skills", "skill"} { + dir := filepath.Join(agentProject, candidate) + if info, err := os.Stat(dir); err == nil && info.IsDir() { + detectedDir = dir + break + } + } + + // Step 2: Check baseline config. + if detectedDir == "" { + if baseline, loadErr := loadBaselineConfig(agentProject); loadErr == nil && baseline.SkillDir != "" { + if _, err := os.Stat(baseline.SkillDir); err == nil { + detectedDir = baseline.SkillDir + } + } + } + + if noPrompt { + // In no-prompt mode, use whatever was detected (may be empty). + cfg.Agent.SkillDir = detectedDir + return nil + } + + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + cfg.Agent.SkillDir = detectedDir + return nil + } + defer azdClient.Close() + + if detectedDir != "" { + // Found a skill directory — ask user to confirm or provide a different one. + choices := []*azdext.SelectChoice{ + {Label: fmt.Sprintf("Use detected: %s", detectedDir), Value: "use"}, + {Label: "Provide a different path", Value: "other"}, + {Label: "Skip (no skills)", Value: "skip"}, + } + defaultIdx := int32(0) + selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: fmt.Sprintf("Found skills directory: %s", detectedDir), + Choices: choices, + SelectedIndex: &defaultIdx, + }, + }) + if selErr != nil { + cfg.Agent.SkillDir = detectedDir + return nil + } + + switch choices[int(*selResp.Value)].Value { + case "use": + cfg.Agent.SkillDir = detectedDir + return nil + case "skip": + return nil + case "other": + // Fall through to path prompt below. + } + } else { + // No skill directory found — ask if they want to provide one. + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "No skills directory found. Would you like to provide one?", + DefaultValue: new(bool), // default false + }, + }) + if promptErr != nil || !resp.GetValue() { + return nil // skip skills + } + } + + // Prompt for a custom path. + pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Path to skills directory", + IgnoreHintKeys: true, + }, + }) + if pathErr != nil { + return fmt.Errorf("prompting for skills directory: %w", pathErr) + } + + dir := strings.TrimSpace(pathResp.Value) + if dir == "" { + return nil + } + if !filepath.IsAbs(dir) { + dir = filepath.Join(agentProject, dir) + } + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + return fmt.Errorf("skills directory %q is not accessible or not a directory", dir) + } + + cfg.Agent.SkillDir = dir + return nil +} + +// knownOptimizationModels is the list of models commonly used for optimization. +var knownOptimizationModels = []string{ + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4o", + "gpt-4o-mini", +} + +// resolveOptimizeTargetModels prompts the user to select model candidates +// for optimization (target_config.model). Shows the current deployed model +// and allows multi-select from known models. +func resolveOptimizeTargetModels( + ctx context.Context, + cfg *OptimizeConfig, +) error { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return nil + } + defer azdClient.Close() + + currentModel := cfg.Agent.Model + + message := "Select target models for optimization" + if currentModel != "" { + message = fmt.Sprintf("Select target models for optimization (current: %s)", currentModel) + } + + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "Would you like to specify target models for optimization?", + DefaultValue: new(bool), // default false + }, + }) + if promptErr != nil || !resp.GetValue() { + return nil + } + + // Build choices — include current model if not already in the known list. + choices := buildOptimizeModelChoices(currentModel) + + multiResp, multiErr := azdClient.Prompt().MultiSelect(ctx, &azdext.MultiSelectRequest{ + Options: &azdext.MultiSelectOptions{ + Message: message, + Choices: choices, + }, + }) + if multiErr != nil { + return fmt.Errorf("prompting for target models: %w", multiErr) + } + + var models []string + for _, v := range multiResp.Values { + models = append(models, v.Value) + } + + if len(models) > 0 { + if cfg.Options.TargetConfig == nil { + cfg.Options.TargetConfig = &opteval.TargetConfig{} + } + cfg.Options.TargetConfig.Model = models + } + + return nil +} + +// buildOptimizeModelChoices returns MultiSelectChoice items for model selection. +// The current deployed model is included and pre-selected; placed first if not in the known list. +func buildOptimizeModelChoices(currentModel string) []*azdext.MultiSelectChoice { + seen := make(map[string]bool) + var choices []*azdext.MultiSelectChoice + + // If the current model is not in the known list, prepend it. + if currentModel != "" { + found := false + for _, m := range knownOptimizationModels { + if m == currentModel { + found = true + break + } + } + if !found { + choices = append(choices, &azdext.MultiSelectChoice{ + Label: currentModel + " (current)", + Value: currentModel, + Selected: true, + }) + seen[currentModel] = true + } + } + + for _, m := range knownOptimizationModels { + if seen[m] { + continue + } + label := m + selected := false + if m == currentModel { + label = m + " (current)" + selected = true + } + choices = append(choices, &azdext.MultiSelectChoice{ + Label: label, + Value: m, + Selected: selected, + }) + } + + return choices +} + func pollOptimizeJob( cmd *cobra.Command, client *optimize_api.OptimizeClient, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 4d0682c300f..074040e9a9b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -8,6 +8,7 @@ import ( "encoding/json" "fmt" "io" + "os" "path/filepath" "azureaiagent/internal/pkg/agents/optimize_api" @@ -17,6 +18,9 @@ import ( "github.com/spf13/cobra" ) +// optimizationDir is the default folder that holds optimized candidate versions. +const optimizationDir = ".agent_optimization" + type optimizeApplyFlags struct { candidate string agent string @@ -31,7 +35,7 @@ func newOptimizeApplyCommand(extCtx *azdext.ExtensionContext) *cobra.Command { Use: "apply", Short: "Apply optimized candidate configuration locally to your azd project.", Long: `Download the optimized configuration and skill files from an optimization -candidate and write them into your local azd project. +candidate and write them into your local azd project under .agent_optimization/. After applying, run 'azd deploy' to deploy the optimized agent version.`, Example: ` # Apply candidate config locally, then deploy @@ -91,7 +95,9 @@ func (a *OptimizeApplyAction) apply( if err != nil { return err } - agentYamlPath := filepath.Join(project.Path, svc.RelativePath, "agent.yaml") + + serviceDir := filepath.Join(project.Path, svc.RelativePath) + candidateDir := filepath.Join(serviceDir, optimizationDir, a.flags.candidate) bold.Fprintf(out, "Applying optimization candidate %s...\n\n", a.flags.candidate) @@ -101,43 +107,54 @@ func (a *OptimizeApplyAction) apply( } optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) - // Step 1: Fetch candidate config. + // Step 1: Fetch candidate config and write to config.json. fmt.Fprintf(out, " Fetching candidate config...\n") candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) if err != nil { return fmt.Errorf("failed to fetch candidate config: %w", err) } - configJSON, err := json.Marshal(candidateConfig) + if err := os.MkdirAll(candidateDir, 0750); err != nil { + return fmt.Errorf("failed to create optimization directory: %w", err) + } + + // Clean up other candidate directories, keeping only baseline and the current candidate. + cleanOtherCandidates(filepath.Join(serviceDir, optimizationDir), a.flags.candidate, out) + + configJSON, err := json.MarshalIndent(candidateConfig, "", " ") if err != nil { return fmt.Errorf("failed to serialize candidate config: %w", err) } - // Step 2: Write OPTIMIZATION_CONFIG and OPTIMIZATION_CANDIDATE_ID into agent.yaml. - fmt.Fprintf(out, " Updating %s...\n", agentYamlPath) - if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CONFIG", string(configJSON)); err != nil { - return fmt.Errorf("failed to update agent.yaml: %w", err) - } - if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CANDIDATE_ID", a.flags.candidate); err != nil { - return fmt.Errorf("failed to update agent.yaml: %w", err) + configPath := filepath.Join(candidateDir, "config.json") + if err := os.WriteFile(configPath, configJSON, 0600); err != nil { + return fmt.Errorf("failed to write config.json: %w", err) } + fmt.Fprintf(out, " → %s\n", configPath) - // Step 3: Download skill files from the candidate manifest. - serviceDir := filepath.Join(project.Path, svc.RelativePath) - if n, dlErr := downloadSkillFiles(ctx, optClient, a.flags.candidate, serviceDir, out); dlErr != nil { + // Step 2: Download skill files into the candidate directory. + if n, dlErr := downloadSkillFilesToDir(ctx, optClient, a.flags.candidate, candidateDir, out); dlErr != nil { fmt.Fprintf(out, " warning: failed to download skill files: %s\n", dlErr) } else if n > 0 { fmt.Fprintf(out, " Downloaded %d skill file(s)\n", n) } - // Step 4: Store candidate ID in the azd environment for postdeploy tracking. - serviceKey := toServiceKey(svc.Name) - candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) + // Step 3: Write OPTIMIZATION_LOCAL_DIR into agent.yaml so the deploy + // pipeline knows a local optimization config exists. + agentYamlPath := filepath.Join(serviceDir, "agent.yaml") + fmt.Fprintf(out, " Updating %s...\n", agentYamlPath) + if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_LOCAL_DIR", optimizationDir); err != nil { + return fmt.Errorf("failed to update agent.yaml: %w", err) + } + // Step 4: Store candidate ID in the azd environment for tracking. + serviceKey := toServiceKey(svc.Name) envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) if err != nil { return fmt.Errorf("failed to get current environment: %w", err) } + + candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ EnvName: envResp.Environment.Name, Key: candidateKey, @@ -149,9 +166,139 @@ func (a *OptimizeApplyAction) apply( // Done — prompt the user to deploy. fmt.Fprintln(out) color.New(color.FgGreen, color.Bold).Fprintf(out, - " ✓ Candidate %s applied successfully\n\n", a.flags.candidate) + " ✓ Candidate %s applied to %s\n\n", + a.flags.candidate, filepath.Join(optimizationDir, a.flags.candidate)) fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", color.CyanString("azd deploy --service %s", svc.Name)) return nil } + +// baselineConfig is the JSON structure saved as the agent's pre-optimization baseline. +type baselineConfig struct { + Instructions string `json:"instructions,omitempty"` + Model string `json:"model,omitempty"` + Name string `json:"name"` + SkillDir string `json:"skill_dir,omitempty"` +} + +// saveBaselineConfig writes the agent's current configuration to +// /.agent_optimization/baseline/config.json before optimization begins. +func saveBaselineConfig(agentProject, skillDir string, req *optimize_api.OptimizeRequest) error { + baseDir := filepath.Join(agentProject, optimizationDir, "baseline") + if err := os.MkdirAll(baseDir, 0750); err != nil { + return fmt.Errorf("creating baseline directory: %w", err) + } + + cfg := baselineConfig{ + Instructions: req.Agent.SystemPrompt, + Model: req.Agent.Model, + Name: req.Agent.AgentName, + SkillDir: skillDir, + } + + data, err := json.MarshalIndent(cfg, "", " ") + if err != nil { + return fmt.Errorf("serializing baseline config: %w", err) + } + + configPath := filepath.Join(baseDir, "config.json") + if err := os.WriteFile(configPath, data, 0600); err != nil { + return fmt.Errorf("writing baseline config: %w", err) + } + + return nil +} + +// loadBaselineConfig reads the baseline config from +// /.agent_optimization/baseline/config.json. +func loadBaselineConfig(agentProject string) (*baselineConfig, error) { + configPath := filepath.Join(agentProject, optimizationDir, "baseline", "config.json") + data, err := os.ReadFile(configPath) //nolint:gosec // path derived from project directory + if err != nil { + return nil, err + } + + var cfg baselineConfig + if err := json.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("parsing baseline config: %w", err) + } + return &cfg, nil +} + +// downloadSkillFilesToDir fetches the candidate manifest, downloads all skill +// files, and writes them into the given directory. Returns the number of files written. +func downloadSkillFilesToDir( + ctx context.Context, + client *optimize_api.OptimizeClient, + candidateID string, + destDir string, + out io.Writer, +) (int, error) { + manifest, err := client.GetCandidate(ctx, candidateID) + if err != nil { + return 0, fmt.Errorf("fetching candidate manifest: %w", err) + } + + var skillFiles []optimize_api.CandidateFile + for _, f := range manifest.Files { + if isSkillFile(f) { + skillFiles = append(skillFiles, f) + } + } + if len(skillFiles) == 0 { + return 0, nil + } + + count := 0 + for _, f := range skillFiles { + if f.Path == "" { + continue + } + + content, err := client.GetCandidateFile(ctx, candidateID, f.Path) + if err != nil { + fmt.Fprintf(out, " warning: failed to download skill file %s: %s\n", f.Path, err) + continue + } + + outPath := filepath.Join(destDir, filepath.FromSlash(f.Path)) + if err := os.MkdirAll(filepath.Dir(outPath), 0750); err != nil { + return count, fmt.Errorf("creating directory for %s: %w", f.Path, err) + } + + if err := os.WriteFile(outPath, []byte(content), 0600); err != nil { + return count, fmt.Errorf("writing skill file %s: %w", f.Path, err) + } + + fmt.Fprintf(out, " → %s (%d bytes)\n", outPath, len(content)) + count++ + } + + return count, nil +} + +// cleanOtherCandidates removes all subdirectories in the optimization folder +// except "baseline" and the candidate being applied. +func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { + entries, err := os.ReadDir(optimizeDir) + if err != nil { + return + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + if name == "baseline" || name == currentCandidate { + continue + } + dir := filepath.Join(optimizeDir, name) + if err := os.RemoveAll(dir); err != nil { + fmt.Fprintf(out, " warning: failed to remove old candidate %s: %s\n", name, err) + } else { + fmt.Fprintf(out, " Removed old candidate: %s\n", name) + } + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 75824a0070f..8b53edca6cd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -8,6 +8,8 @@ import ( "encoding/json" "fmt" "os" + "path/filepath" + "strings" "azureaiagent/internal/pkg/agents/opteval" "azureaiagent/internal/pkg/agents/optimize_api" @@ -132,6 +134,7 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi AgentName: c.Agent.Name, AgentVersion: c.Agent.Version, Model: c.Agent.Model, + SystemPrompt: c.Agent.ResolvedSystemPrompt(), }, Evaluators: c.Evaluators, Options: optimize_api.OptimizeOptions{ @@ -150,6 +153,13 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi }, } + // Map target_config from YAML to API format. + if c.Options.TargetConfig != nil { + req.Options.TargetConfig = &optimize_api.TargetConfig{ + Model: c.Options.TargetConfig.Model, + } + } + // Map criteria from config schema to API schema. for _, crit := range c.Criteria { req.Criteria = append(req.Criteria, optimize_api.Criterion{ @@ -182,6 +192,15 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi req.Dataset = c.InlineDataset } + // Load skills from skill_dir if specified. + if c.Agent.SkillDir != "" { + skills, err := loadSkillsFromDir(c.Agent.SkillDir) + if err != nil { + return nil, fmt.Errorf("loading skills from %s: %w", c.Agent.SkillDir, err) + } + req.Agent.Skills = skills + } + return req, nil } @@ -219,3 +238,41 @@ func loadDatasetFile(path string) ([]optimize_api.DatasetTask, error) { return tasks, nil } + +// loadSkillsFromDir reads skill files from a directory and returns SkillDefinitions. +// Each file in the directory is treated as a skill: the filename (without extension) +// becomes the skill name, and the file content becomes the skill body. +// Subdirectories are recursed into — each file within is also loaded as a skill. +func loadSkillsFromDir(dir string) ([]optimize_api.SkillDefinition, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, fmt.Errorf("reading skill directory: %w", err) + } + + var skills []optimize_api.SkillDefinition + for _, entry := range entries { + entryPath := filepath.Join(dir, entry.Name()) + + if entry.IsDir() { + subSkills, err := loadSkillsFromDir(entryPath) + if err != nil { + return nil, err + } + skills = append(skills, subSkills...) + continue + } + + data, err := os.ReadFile(entryPath) + if err != nil { + return nil, fmt.Errorf("reading skill file %s: %w", entry.Name(), err) + } + + name := strings.TrimSuffix(entry.Name(), filepath.Ext(entry.Name())) + skills = append(skills, optimize_api.SkillDefinition{ + Name: name, + Body: string(data), + }) + } + + return skills, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 7cd311610a4..194b2a40eb6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "os" - "path/filepath" "strings" "time" @@ -259,62 +258,6 @@ func isSkillFile(f optimize_api.CandidateFile) bool { return f.Type == "skill" || strings.HasPrefix(f.Path, "skills/") } -// downloadSkillFiles fetches the candidate manifest, downloads all skill files, -// and writes them into serviceDir. Returns the number of files written. -func downloadSkillFiles( - ctx context.Context, - client *optimize_api.OptimizeClient, - candidateID string, - serviceDir string, - out io.Writer, -) (int, error) { - manifest, err := client.GetCandidate(ctx, candidateID) - if err != nil { - return 0, fmt.Errorf("fetching candidate manifest: %w", err) - } - - var skillFiles []optimize_api.CandidateFile - for _, f := range manifest.Files { - if isSkillFile(f) { - skillFiles = append(skillFiles, f) - } - } - if len(skillFiles) == 0 { - return 0, nil - } - - count := 0 - for _, f := range skillFiles { - if f.Path == "" { - continue - } - - content, err := client.GetCandidateFile(ctx, candidateID, f.Path) - if err != nil { - fmt.Fprintf(out, " warning: failed to download skill file %s: %s\n", f.Path, err) - continue - } - - // Write relative to serviceDir. - // "skills/math/SKILL.md" becomes "/skills/math/SKILL.md". - outPath := filepath.Join(serviceDir, filepath.FromSlash(f.Path)) - - if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil { - return count, fmt.Errorf("creating directory for %s: %w", f.Path, err) - } - - //nolint:gosec // G306: skill files should be readable - if err := os.WriteFile(outPath, []byte(content), 0644); err != nil { - return count, fmt.Errorf("writing skill file %s: %w", f.Path, err) - } - - fmt.Fprintf(out, " → %s (%d bytes)\n", outPath, len(content)) - count++ - } - - return count, nil -} - // extractLatestDefinition gets the latest version's definition as a map for flexible field access. func extractLatestDefinition(agent *agent_api.AgentObject) (map[string]any, error) { defBytes, err := json.Marshal(agent.Versions.Latest.Definition) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index d1c46d3c384..3f7ade918f8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -21,10 +21,6 @@ type EvalConfig struct { // Options holds run-time options (eval_model, etc.). Options *opteval.Options `yaml:"options,omitempty"` - // GenerationInstruction is the prompt used to generate adaptive evaluators - // and synthetic eval datasets. - GenerationInstruction string `yaml:"generation_instruction,omitempty"` - // MaxSamples is the maximum number of data samples to generate. MaxSamples int `yaml:"max_samples,omitempty"` diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index 6fbe3d43d33..c9ed411b1a4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -98,10 +98,11 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { Config: opteval.Config{ Name: "full-test", Agent: opteval.AgentRef{ - Name: "booking-agent", - Kind: "hosted", - Version: "v3", - Model: "gpt-4.1", + Name: "booking-agent", + Kind: "hosted", + Version: "v3", + Model: "gpt-4.1", + SystemPrompt: "This agent handles restaurant reservations", }, DatasetReference: &opteval.DatasetRef{Name: "golden-data", Version: "v2"}, Evaluators: []string{"builtin.task_adherence", "custom-quality"}, @@ -109,8 +110,7 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { Options: &opteval.Options{ EvalModel: "gpt-4o", }, - GenerationInstruction: "This agent handles restaurant reservations", - MaxSamples: 75, + MaxSamples: 75, } require.NoError(t, WriteEvalConfig(path, original)) @@ -129,7 +129,7 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0]) assert.Equal(t, "custom-quality", loaded.Evaluators[1]) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, "This agent handles restaurant reservations", loaded.GenerationInstruction) + assert.Equal(t, "This agent handles restaurant reservations", loaded.Agent.SystemPrompt) assert.Equal(t, 75, loaded.MaxSamples) } @@ -153,7 +153,7 @@ func TestEvalConfig_RoundTrip_MinimalFields(t *testing.T) { assert.Equal(t, "data.jsonl", loaded.DatasetFile) assert.Nil(t, loaded.DatasetReference) assert.Empty(t, loaded.Evaluators) - assert.Empty(t, loaded.GenerationInstruction) + assert.Empty(t, loaded.Agent.SystemPrompt) assert.Zero(t, loaded.MaxSamples) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 95446ff9d3d..6521acc32de 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -32,10 +32,26 @@ type Config struct { // AgentRef references the agent under evaluation/optimization. type AgentRef struct { - Name string `yaml:"name"` - Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` - Version string `yaml:"version,omitempty"` - Model string `yaml:"model,omitempty"` + Name string `yaml:"name"` + Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` + Version string `yaml:"version,omitempty"` + Model string `yaml:"model,omitempty"` + SystemPrompt string `yaml:"system_prompt,omitempty"` + SystemPromptFile string `yaml:"system_prompt_file,omitempty"` + SkillDir string `yaml:"skill_dir,omitempty"` +} + +// ResolvedSystemPrompt returns the system prompt text. If SystemPromptFile is +// set, its contents are read and returned; otherwise SystemPrompt is returned. +func (a *AgentRef) ResolvedSystemPrompt() string { + if a.SystemPromptFile != "" { + data, err := os.ReadFile(a.SystemPromptFile) + if err != nil { + return a.SystemPrompt + } + return string(data) + } + return a.SystemPrompt } // DatasetRef references a named/versioned dataset. @@ -44,20 +60,26 @@ type DatasetRef struct { Version string `yaml:"version,omitempty"` } +// TargetConfig specifies model candidates and other target-specific configuration. +type TargetConfig struct { + Model []string `yaml:"model,omitempty"` +} + // Options holds run-time options for eval and optimize. // Eval only uses EvalModel; optimize uses all fields. type Options struct { - EvalModel string `yaml:"eval_model,omitempty"` - Mode string `yaml:"mode,omitempty"` - TargetAttributes []string `yaml:"target_attributes,omitempty"` - Budget int `yaml:"budget,omitempty"` - MaxIterations int `yaml:"max_iterations,omitempty"` - MinImprovement float64 `yaml:"min_improvement,omitempty"` - ImprovementThreshold float64 `yaml:"improvement_threshold,omitempty"` - PassThreshold float64 `yaml:"pass_threshold,omitempty"` - KeepVersions bool `yaml:"keep_versions,omitempty"` - TasksPerIteration int `yaml:"tasks_per_iteration,omitempty"` - ReflectionModel string `yaml:"reflection_model,omitempty"` + EvalModel string `yaml:"eval_model,omitempty"` + Mode string `yaml:"mode,omitempty"` + TargetAttributes []string `yaml:"target_attributes,omitempty"` + TargetConfig *TargetConfig `yaml:"target_config,omitempty"` + Budget int `yaml:"budget,omitempty"` + MaxIterations int `yaml:"max_iterations,omitempty"` + MinImprovement float64 `yaml:"min_improvement,omitempty"` + ImprovementThreshold float64 `yaml:"improvement_threshold,omitempty"` + PassThreshold float64 `yaml:"pass_threshold,omitempty"` + KeepVersions bool `yaml:"keep_versions,omitempty"` + TasksPerIteration int `yaml:"tasks_per_iteration,omitempty"` + ReflectionModel string `yaml:"reflection_model,omitempty"` } // DefaultTargetAttributes are the default optimization target attributes. @@ -89,9 +111,10 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { if len(o.TargetAttributes) == 0 { o.TargetAttributes = slices.Clone(DefaultTargetAttributes) - // o.MaxIterations = 5 - // o.Budget = 100 } + + o.MaxIterations = 3 + o.Budget = 30 return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index df95b41d7d2..7cbbf4d01cb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -54,7 +54,8 @@ type AgentDefinition struct { // SkillDefinition describes a skill attached to an agent. type SkillDefinition struct { Name string `json:"name"` - Description string `json:"description"` + Description string `json:"description,omitempty"` + Body string `json:"body,omitempty"` } // DatasetTask is a single task in an inline dataset. @@ -78,6 +79,11 @@ type Criterion struct { Instruction string `json:"instruction"` } +// TargetConfig specifies model candidates and other target-specific configuration. +type TargetConfig struct { + Model []string `json:"model,omitempty"` +} + // OptimizeOptions controls the optimization run. type OptimizeOptions struct { Budget int `json:"budget,omitempty"` @@ -87,13 +93,14 @@ type OptimizeOptions struct { PassThreshold float64 `json:"passThreshold,omitempty"` EvalModel string `json:"evalModel"` // Send as both "strategies" (current server) and "targetAttributes" (future). - Strategies []string `json:"strategies,omitempty"` - TargetAttributes []string `json:"targetAttributes,omitempty"` - KeepVersions bool `json:"keepVersions,omitempty"` - TasksPerIteration int `json:"tasksPerIteration,omitempty"` - MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` - ReflectionModel string `json:"reflectionModel,omitempty"` - Mode string `json:"mode,omitempty"` + Strategies []string `json:"strategies,omitempty"` + TargetAttributes []string `json:"targetAttributes,omitempty"` + TargetConfig *TargetConfig `json:"targetConfig,omitempty"` + KeepVersions bool `json:"keepVersions,omitempty"` + TasksPerIteration int `json:"tasksPerIteration,omitempty"` + MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` + ReflectionModel string `json:"reflectionModel,omitempty"` + Mode string `json:"mode,omitempty"` } // --- Response models --- From da028c173d4c5973d214571eb80f08c5d6aac31a Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 15 May 2026 20:03:57 -0700 Subject: [PATCH 14/33] remove fixed data --- .../azure.ai.agents/internal/pkg/agents/opteval/yaml.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 6521acc32de..41d9d5cb234 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -113,8 +113,8 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { o.TargetAttributes = slices.Clone(DefaultTargetAttributes) } - o.MaxIterations = 3 - o.Budget = 30 + // o.MaxIterations = 3 + // o.Budget = 30 return nil } From 95ad85892a734903ef0cbc1d6fdfc6bd483820f5 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 15 May 2026 20:32:58 -0700 Subject: [PATCH 15/33] candidate id update --- .../azure.ai.agents/internal/cmd/optimize_apply.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 074040e9a9b..2fc3df5ef83 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -139,13 +139,16 @@ func (a *OptimizeApplyAction) apply( fmt.Fprintf(out, " Downloaded %d skill file(s)\n", n) } - // Step 3: Write OPTIMIZATION_LOCAL_DIR into agent.yaml so the deploy - // pipeline knows a local optimization config exists. + // Step 3: Write OPTIMIZATION_LOCAL_DIR and OPTIMIZATION_CANDIDATE_ID into agent.yaml + // so the deploy pipeline knows which local optimization config to use. agentYamlPath := filepath.Join(serviceDir, "agent.yaml") fmt.Fprintf(out, " Updating %s...\n", agentYamlPath) if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_LOCAL_DIR", optimizationDir); err != nil { return fmt.Errorf("failed to update agent.yaml: %w", err) } + if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CANDIDATE_ID", a.flags.candidate); err != nil { + return fmt.Errorf("failed to update agent.yaml: %w", err) + } // Step 4: Store candidate ID in the azd environment for tracking. serviceKey := toServiceKey(svc.Name) From 2f0e462227c0a03c3811fef484a76cc9240e6f14 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Mon, 18 May 2026 20:28:18 -0700 Subject: [PATCH 16/33] Update extension registry --- cli/azd/extensions/registry.json | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index 70a2515652c..cc99086dbbe 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -1,5 +1,4 @@ { - "schemaVersion": "1.0", "extensions": [ { "id": "microsoft.azd.demo", @@ -3892,50 +3891,50 @@ "darwin/amd64": { "checksum": { "algorithm": "sha256", - "value": "ab396ab8dd3b627e0467e35535f6cb6bb29efb475986f7218cf2322946e4e33a" + "value": "e933b6fbe2dd0420d1cea6c63e48f7c753dcdf9e15b8efa2978cdde72e81a8e4" }, "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-amd64.zip" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-amd64.zip" }, "darwin/arm64": { "checksum": { "algorithm": "sha256", - "value": "919f18a74e4fa85d8b3db74e34e1d41af10b1e9a78fae171be7e42085f8bfef9" + "value": "19194ea63609f056a2b772bcf81b27605a02cffcfcc272afabfd0f2e9a0e20d5" }, "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-arm64.zip" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-arm64.zip" }, "linux/amd64": { "checksum": { "algorithm": "sha256", - "value": "dbcf13152499e43dee123bf8d3a25aba16f2f4cf287975187b9f79b62495cb53" + "value": "279474857915889b972cc6c041ebd73ec91131590ec57db122cb65a5b5e4daef" }, "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-amd64.tar.gz" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-amd64.tar.gz" }, "linux/arm64": { "checksum": { "algorithm": "sha256", - "value": "2a84f09164cf0260727e82e1e43ace5aef7a8d3380e0602221d05ece9230fd87" + "value": "818a85228bcb61559dee3618e1a602795514a79b2d14f84c7e1c9906e980f86c" }, "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-arm64.tar.gz" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-arm64.tar.gz" }, "windows/amd64": { "checksum": { "algorithm": "sha256", - "value": "e3a9d3f0358852767babe84ce16fb01e52427458776751e11d2024fd77694a0a" + "value": "60d65a87f3f3ed594245f9004b3a58d881b031c7a723afc5e0f5f7c833a0a14d" }, "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-amd64.zip" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-amd64.zip" }, "windows/arm64": { "checksum": { "algorithm": "sha256", - "value": "1ff890de9c6507ff8a2e104e697545e1928eb4f1557a35e06f7c2168478507c3" + "value": "e2b29ac9718d86b07112b6e1d380208e6374da507dd8bceeb23a2eef93b8d5b4" }, "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" } } } @@ -5038,11 +5037,11 @@ } }, { + "version": "0.0.6-preview", "capabilities": [ "custom-commands", "metadata" ], - "version": "0.0.6-preview", "usage": "azd ai models \u003ccommand\u003e [options]", "examples": [ { From 5f3e3bc84b9332497d53df450d12859d39d85e38 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Mon, 18 May 2026 20:41:14 -0700 Subject: [PATCH 17/33] Update extension registry --- cli/azd/extensions/registry.json | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index cc99086dbbe..38a59c52ac1 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -3937,6 +3937,82 @@ "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" } } + }, + { + "version": "0.1.31-optbugbash-preview", + "requiredAzdVersion": "\u003e1.23.13", + "capabilities": [ + "custom-commands", + "lifecycle-events", + "mcp-server", + "service-target-provider", + "metadata" + ], + "providers": [ + { + "name": "azure.ai.agent", + "type": "service-target", + "description": "Deploys agents to the Foundry Agent Service" + } + ], + "usage": "azd ai agent \u003ccommand\u003e [options]", + "examples": [ + { + "name": "init", + "description": "Initialize a new AI agent project.", + "usage": "azd ai agent init" + } + ], + "artifacts": { + "darwin/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "ac322b193b148ad273f7b96f3b7b69b36c900609f0eb835ac906cfce421bf7d2" + }, + "entryPoint": "azure-ai-agents-darwin-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" + }, + "darwin/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "a77ee7c8cbb2e708b0fdc4e8c4985b48af5e0b384e0cf29e8f0e589fec1044dc" + }, + "entryPoint": "azure-ai-agents-darwin-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" + }, + "linux/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "8baf6c4ebe5db270557c7245016aa67fac8391d99d1d2a17bd6a2084faf620b8" + }, + "entryPoint": "azure-ai-agents-linux-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" + }, + "linux/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "ec70d32603b4971368fdaa2964eba6ae0d01c5beacb5d402f411aa08a3be5a88" + }, + "entryPoint": "azure-ai-agents-linux-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" + }, + "windows/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "0d7eeb7773df5908640bda81b9fd90c77ccdc28bb79f4db7a44c843cce0aabcb" + }, + "entryPoint": "azure-ai-agents-windows-amd64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-windows-amd64.zip" + }, + "windows/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "86d65182ba0b9e46c908a5f8953c3eb1056a41031e85a34e814d6ca3da07e78c" + }, + "entryPoint": "azure-ai-agents-windows-arm64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-windows-arm64.zip" + } + } } ] }, From 55fa39766df989be9dfb312d4dffbf2881376919 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Mon, 18 May 2026 21:10:01 -0700 Subject: [PATCH 18/33] align the demo output format + eval update --- .../extensions/azure.ai.agents/extension.yaml | 2 +- .../azure.ai.agents/internal/cmd/eval.go | 3 +- .../internal/cmd/eval_helpers.go | 218 ++-------------- .../azure.ai.agents/internal/cmd/eval_init.go | 164 ++++++++---- .../internal/cmd/eval_init_jobs.go | 95 ++++--- .../internal/cmd/eval_init_prompts.go | 22 +- .../internal/cmd/eval_init_test.go | 128 +++++----- .../internal/cmd/eval_progress.go | 16 +- .../azure.ai.agents/internal/cmd/eval_run.go | 117 ++++++++- .../internal/cmd/eval_run_test.go | 6 +- .../azure.ai.agents/internal/cmd/eval_show.go | 4 +- .../azure.ai.agents/internal/cmd/eval_test.go | 179 ++++++------- .../internal/cmd/eval_update.go | 240 ++++++++++++++++++ .../azure.ai.agents/internal/cmd/optimize.go | 66 ++--- .../internal/cmd/optimize_apply.go | 79 ++++++ .../internal/cmd/optimize_config.go | 91 ++++++- .../internal/cmd/optimize_config_test.go | 89 ++++++- .../internal/pkg/agents/dataset_api/models.go | 140 +++++++++- .../pkg/agents/dataset_api/models_test.go | 23 ++ .../pkg/agents/dataset_api/operations.go | 212 ++++++++++++++++ .../internal/pkg/agents/eval_api/artifacts.go | 226 ++++++++++++----- .../pkg/agents/eval_api/eval_config.go | 4 +- .../pkg/agents/eval_api/eval_config_test.go | 22 +- .../pkg/agents/eval_api/generation.go | 7 +- .../pkg/agents/eval_api/generation_test.go | 16 +- .../internal/pkg/agents/eval_api/models.go | 81 ++++-- .../pkg/agents/eval_api/operations.go | 28 ++ .../pkg/agents/eval_api/operations_test.go | 6 +- .../pkg/agents/eval_api/portal_urls.go | 73 ++++++ .../internal/pkg/agents/opteval/yaml.go | 206 +++++++++++++-- .../internal/pkg/agents/opteval/yaml_test.go | 60 ++++- .../extensions/azure.ai.agents/version.txt | 2 +- 32 files changed, 1975 insertions(+), 650 deletions(-) create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go diff --git a/cli/azd/extensions/azure.ai.agents/extension.yaml b/cli/azd/extensions/azure.ai.agents/extension.yaml index 857f27744b8..8e24764b11b 100644 --- a/cli/azd/extensions/azure.ai.agents/extension.yaml +++ b/cli/azd/extensions/azure.ai.agents/extension.yaml @@ -5,7 +5,7 @@ displayName: Foundry agents (Preview) description: Ship agents with Microsoft Foundry from your terminal. (Preview) usage: azd ai agent [options] # NOTE: Make sure version.txt is in sync with this version. -version: 0.1.31-preview +version: 0.1.31-optbugbash-preview requiredAzdVersion: ">1.23.13" language: go capabilities: diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 3d3578704e6..d9d1f161aac 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -96,6 +96,7 @@ Use eval init to generate an eval config, then eval run to execute it.`, cmd.AddCommand(newEvalInitCommand(extCtx)) cmd.AddCommand(newEvalRunCommand(extCtx)) + cmd.AddCommand(newEvalUpdateCommand(extCtx)) cmd.AddCommand(newEvalListCommand()) cmd.AddCommand(newEvalShowCommand()) @@ -439,7 +440,7 @@ func pollEvalOperationWithSpinner( return nil, fmt.Errorf("%s did not return an operation ID", strings.ToLower(label)) } - progress.setRunning(label) + progress.setRunning(label, operationID) poller := eval_api.NewPoller(operationID, apiVersion, get) job, err := poller.Poll(ctx) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index cd7a66dd669..3b0e499118e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -5,26 +5,14 @@ package cmd import ( "context" - "encoding/base64" - "encoding/json" "fmt" "log" - "os" - "path/filepath" - "strings" - "azureaiagent/internal/pkg/agents/dataset_api" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" "github.com/azure/azure-dev/cli/azd/pkg/azdext" - "github.com/google/uuid" ) -// foundryBaseDir is the base directory for eval artifacts under the project root. -const foundryBaseDir = ".azure/.foundry" - // resolveEvalOutputPath resolves the eval config output path. func resolveEvalOutputPath(output, agentProject string) string { return eval_api.ResolveEvalOutputPath(output, agentProject) @@ -35,208 +23,40 @@ func resolveEvalConfigPath(config, agentProject string) string { return eval_api.ResolveEvalConfigPath(config, agentProject) } -// ensureFoundryDirs creates the .azure/.foundry directory tree with standard -// subdirectories (datasets, evaluators, results). -func ensureFoundryDirs(projectRoot string) error { - base := filepath.Join(projectRoot, ".azure", ".foundry") - for _, sub := range []string{"datasets", "evaluators"} { - if err := os.MkdirAll(filepath.Join(base, sub), 0750); err != nil { - return err - } - } - return nil -} - -// downloadDatasetArtifact downloads the dataset and writes it locally. -// If the download fails (e.g., non-TLS test server), a placeholder is written. -func downloadDatasetArtifact( - ctx context.Context, - client *dataset_api.DatasetClient, - projectRoot string, - ref *opteval.DatasetRef, - apiVersion string, -) error { - if ref == nil || ref.Name == "" { - return nil - } - - dest := datasetArtifactPath(projectRoot, ref) - dir := filepath.Dir(dest) - if err := os.MkdirAll(dir, 0750); err != nil { - return fmt.Errorf("creating dataset artifact dir: %w", err) - } - - // Attempt full download via the dataset API. - cred, credErr := client.GetDatasetCredential(ctx, ref.Name, ref.Version, apiVersion) - if credErr != nil { - log.Printf("[debug] dataset credential fetch failed: %v", credErr) +// resolvePortalPrefix reads AZURE_AI_PROJECT_ID from the azd environment and +// returns a PortalPrefix for building Foundry portal URLs. +// Returns nil on any failure. +func resolvePortalPrefix(ctx context.Context, azdClient *azdext.AzdClient, envName string) *eval_api.PortalPrefix { + if azdClient == nil || envName == "" { return nil } - - downloadURL := cred.ResolvedDownloadURI() - if downloadURL == "" { - return nil - } - - data, dlErr := client.DownloadDataset(ctx, downloadURL) - if dlErr != nil { - log.Printf("[debug] dataset download failed: %v", dlErr) - return nil - } - - return os.WriteFile(dest, data, 0600) -} - -// datasetArtifactPath returns the local filesystem path for a downloaded dataset. -func datasetArtifactPath(projectRoot string, ref *opteval.DatasetRef) string { - if ref == nil || ref.Name == "" { - return "" - } - name := ref.Name - if ref.Version != "" { - name = name + "-" + ref.Version - } - return filepath.Join(projectRoot, ".azure", ".foundry", "datasets", name+".jsonl") -} - -// saveEvaluatorResult saves the raw evaluator generation result. -func saveEvaluatorResult(projectRoot, evaluatorName string, result json.RawMessage) { - if evaluatorName == "" || len(result) == 0 { - return - } - dir := filepath.Join(projectRoot, ".azure", ".foundry", "evaluators") - if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create evaluator dir: %v", err) - return - } - var pretty json.RawMessage - if err := json.Unmarshal(result, &pretty); err == nil { - if formatted, err := json.MarshalIndent(pretty, "", " "); err == nil { - result = formatted - } - } - path := filepath.Join(dir, evaluatorName+".json") - if err := os.WriteFile(path, result, 0600); err != nil { - log.Printf("[debug] failed to save evaluator result: %v", err) - } -} - -// writeEvalReviewArtifacts writes human-readable review artifacts for evaluators. -// It writes a stub YAML file for each evaluator unless a result JSON already exists. -func writeEvalReviewArtifacts(projectRoot string, cfg *eval_api.EvalConfig) { - if cfg == nil { - return - } - dir := filepath.Join(projectRoot, ".azure", ".foundry", "evaluators") - if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create evaluator review dir: %v", err) - return - } - for _, evaluator := range cfg.Evaluators { - if evaluator == "" || eval_api.IsBuiltinEvaluator(evaluator) { - continue - } - // Skip if a result JSON already exists. - jsonPath := filepath.Join(dir, evaluator+".json") - if _, err := os.Stat(jsonPath); err == nil { - continue - } - yamlPath := filepath.Join(dir, evaluator+".yaml") - stub := fmt.Sprintf("# Evaluator stub: %s\nname: %s\n", evaluator, evaluator) - if err := os.WriteFile(yamlPath, []byte(stub), 0600); err != nil { - log.Printf("[debug] failed to write evaluator stub: %v", err) - } - } - - // Print artifact paths for user review. - artifactsDir := filepath.Join(projectRoot, ".azure", ".foundry") - fmt.Printf("\n Artifacts: %s\n", artifactsDir) - if cfg.DatasetReference != nil && cfg.DatasetReference.Name != "" { - name := cfg.DatasetReference.Name - if cfg.DatasetReference.Version != "" { - name += "-" + cfg.DatasetReference.Version - } - fmt.Printf(" datasets/%s.jsonl\n", name) - } - for _, evaluator := range cfg.Evaluators { - if evaluator != "" && !eval_api.IsBuiltinEvaluator(evaluator) { - fmt.Printf(" evaluators/%s.json\n", evaluator) - } - } -} - -// writeJSONFile writes a value as formatted JSON to the specified path. -func writeJSONFile(path string, v any) error { - if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { - return fmt.Errorf("creating output directory: %w", err) - } - data, err := json.MarshalIndent(v, "", " ") - if err != nil { - return fmt.Errorf("marshalling JSON: %w", err) - } - return os.WriteFile(path, data, 0600) -} - -// buildEvalReportURL constructs the Foundry portal URL for an eval run report. -// It reads AZURE_AI_PROJECT_ID from the azd environment and encodes the subscription ID. -// Returns empty string on any failure. -func buildEvalReportURL(ctx context.Context, azdClient *azdext.AzdClient, envName, evalID, runID string) string { - if azdClient == nil || envName == "" || evalID == "" || runID == "" { - return "" - } v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ EnvName: envName, Key: "AZURE_AI_PROJECT_ID", }) if err != nil || v.Value == "" { log.Printf("[debug] could not read AZURE_AI_PROJECT_ID: %v", err) - return "" + return nil } - reportURL, err := evalReportURL(v.Value, evalID, runID) + prefix, err := eval_api.NewPortalPrefix(v.Value) if err != nil { - log.Printf("[debug] failed to build eval report URL: %v", err) - return "" + log.Printf("[debug] failed to build portal prefix: %v", err) + return nil } - return reportURL + return prefix } -// evalReportURL constructs a URL to the eval run report in the Foundry portal. -// It parses the ARM resource ID to extract subscription, resource group, account, and project info. -func evalReportURL(projectResourceID, evalID, runID string) (string, error) { - resourceID, err := arm.ParseResourceID(projectResourceID) - if err != nil { - return "", fmt.Errorf("failed to parse project resource ID: %w", err) - } - - encodedSub, err := encodeSubscriptionForURL(resourceID.SubscriptionID) - if err != nil { - return "", fmt.Errorf("failed to encode subscription ID: %w", err) - } - - if resourceID.Parent == nil || - !strings.Contains(string(resourceID.ResourceType.Type), "/") { - return "", fmt.Errorf( - "resource ID does not represent a Foundry project (missing parent account): %s", - projectResourceID, - ) +// buildEvalReportURL constructs the Foundry portal URL for an eval run report. +// Returns empty string on any failure. +func buildEvalReportURL(ctx context.Context, azdClient *azdext.AzdClient, envName, evalID, runID string) string { + if evalID == "" || runID == "" { + return "" } - - return fmt.Sprintf( - "https://ai.azure.com/nextgen/r/%s,%s,,%s,%s/build/evaluations/%s/run/%s", - encodedSub, resourceID.ResourceGroupName, - resourceID.Parent.Name, resourceID.Name, - evalID, runID, - ), nil -} - -// encodeSubscriptionForURL encodes a subscription ID GUID as base64 without padding. -func encodeSubscriptionForURL(subscriptionID string) (string, error) { - guid, err := uuid.Parse(subscriptionID) - if err != nil { - return "", fmt.Errorf("invalid subscription ID format: %w", err) + prefix := resolvePortalPrefix(ctx, azdClient, envName) + if prefix == nil { + return "" } - guidBytes, _ := guid.MarshalBinary() - return strings.TrimRight(base64.URLEncoding.EncodeToString(guidBytes), "="), nil + return prefix.EvalRunURL(evalID, runID) } // formatAny converts any value to a string for display. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index 771cbf3ab46..b125788fd38 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -12,6 +12,7 @@ import ( "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" @@ -23,21 +24,21 @@ const DataGenerationAPIVersion = "v1" // EvalInitFlags defines the customized flags for the eval init command. type evalInitFlags struct { - name string - agent string - projectEndpoint string - systemPrompt string - systemPromptFile string - evalModel string - dataset string - output string - maxSamples int - evaluators []string - noWait bool - resetDefaults bool - evalModelSet bool - maxSamplesSet bool - traceDays int + name string + agent string + projectEndpoint string + instruction string + instructionFile string + evalModel string + dataset string + output string + maxSamples int + evaluators []string + noWait bool + resetDefaults bool + evalModelSet bool + maxSamplesSet bool + traceDays int // Internal flags set during interactive prompts. regenerateDataset bool regenerateEvaluator bool @@ -51,11 +52,11 @@ func newEvalInitCommand(extCtx *azdext.ExtensionContext) *cobra.Command { Long: `Generate a local eval suite for a deployed agent. By default, this command submits dataset and evaluator generation jobs, waits for -completion, downloads review artifacts under .azure/.foundry, and writes eval.yaml at +completion, downloads review artifacts, and writes eval.yaml at the agent project root. Use --no-wait to write pending operation IDs and return.`, Example: ` azd ai agent eval init - azd ai agent eval init --system-prompt "This agent handles restaurant reservations." --eval-model gpt-4o --max-samples 50 - azd ai agent eval init --system-prompt-file ./instructions.md --eval-model gpt-4o + azd ai agent eval init --gen-instruction "This agent handles restaurant reservations." --eval-model gpt-4o --max-samples 50 + azd ai agent eval init --gen-instruction-file ./instructions.md --eval-model gpt-4o azd ai agent eval init --dataset ./tests/golden.jsonl --evaluator builtin.intent_resolution`, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { @@ -72,8 +73,8 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit generation jobs and return immediately") cmd.Flags().StringVar(&flags.agent, "agent", "", "Target agent name") cmd.Flags().StringVarP(&flags.projectEndpoint, "project-endpoint", "p", "", "Microsoft Foundry project endpoint URL") - cmd.Flags().StringVarP(&flags.systemPrompt, "system-prompt", "g", "", "Agent system prompt used for dataset and evaluator generation") - cmd.Flags().StringVarP(&flags.systemPromptFile, "system-prompt-file", "G", "", "Path to a file containing the agent system prompt") + cmd.Flags().StringVarP(&flags.instruction, "gen-instruction", "g", "", "Agent instruction used for dataset and evaluator generation") + cmd.Flags().StringVarP(&flags.instructionFile, "gen-instruction-file", "G", "", "Path to a file containing the agent instruction") cmd.Flags().StringVar(&flags.evalModel, "eval-model", defaultEvalModel, "Model used for evaluation and generation, and also as the default model for evaluation") cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Number of samples to generate (15-1000)") @@ -87,9 +88,17 @@ the agent project root. Use --no-wait to write pending operation IDs and return. // runEvalInit executes the eval init command logic. It resolves context, prompts for missing options, submits generation jobs, polls for completion (unless --no-wait), writes the eval config, and prints next steps. func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error { - if flags.systemPrompt != "" && flags.systemPromptFile != "" { - return fmt.Errorf("cannot use both --system-prompt and --system-prompt-file; provide one or the other") + if flags.instruction != "" && flags.instructionFile != "" { + return fmt.Errorf("cannot use both --gen-instruction and --gen-instruction-file; provide one or the other") } + + // Validate instruction file early when the path won't be resolved relative to a project. + if flags.instructionFile != "" { + if _, err := os.Stat(flags.instructionFile); err != nil && filepath.IsAbs(flags.instructionFile) { + return fmt.Errorf("instruction file %q is not accessible: %w", flags.instructionFile, err) + } + } + resolved, err := resolveEvalContext(ctx, evalContextOptions{ agent: flags.agent, projectEndpoint: flags.projectEndpoint, @@ -101,13 +110,13 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } defer resolved.azdClient.Close() - // Resolve relative system_prompt_file paths against the agent project directory. - if flags.systemPromptFile != "" { - if !filepath.IsAbs(flags.systemPromptFile) && resolved.projectRoot != "" { - flags.systemPromptFile = filepath.Join(resolved.projectRoot, flags.systemPromptFile) + // Resolve relative instruction file paths against the agent project directory. + if flags.instructionFile != "" && !filepath.IsAbs(flags.instructionFile) { + if resolved.projectRoot != "" { + flags.instructionFile = filepath.Join(resolved.projectRoot, flags.instructionFile) } - if _, err := os.Stat(flags.systemPromptFile); err != nil { - return fmt.Errorf("system prompt file %q is not accessible: %w", flags.systemPromptFile, err) + if _, err := os.Stat(flags.instructionFile); err != nil { + return fmt.Errorf("instruction file %q is not accessible: %w", flags.instructionFile, err) } } @@ -117,7 +126,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // When eval.yaml exists, decide whether to regenerate or create fresh. existingCfg, hasExisting := tryLoadExistingEvalConfig(configPath) isRegenerate := false - var builtinEvals []string + var builtinEvals opteval.EvaluatorList if flags.resetDefaults && resolved.envName != "" { clearEvalState(ctx, resolved.azdClient, resolved.envName) @@ -146,9 +155,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if existingCfg.Options != nil && !flags.evalModelSet { flags.evalModel = existingCfg.Options.EvalModel } - if flags.systemPrompt == "" && flags.systemPromptFile == "" { - flags.systemPrompt = existingCfg.Agent.SystemPrompt - flags.systemPromptFile = existingCfg.Agent.SystemPromptFile + if flags.instruction == "" && flags.instructionFile == "" { + flags.instruction = existingCfg.Agent.Instruction.Value + flags.instructionFile = existingCfg.Agent.Instruction.File } if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { flags.maxSamples = existingCfg.MaxSamples @@ -176,24 +185,21 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return err } - // Finalize the eval suite name with a random suffix to avoid collisions. - flags.name = resolveEvalName(flags) + "-" + randomSuffix() + // Finalize the eval suite name. On fresh init, add a random suffix to + // avoid collisions. On regeneration, keep the existing name. + if !isRegenerate { + flags.name = resolveEvalName(flags) + "-" + randomSuffix() + } - // Prompt agents use the agent source directly; hosted agents require a system-prompt. + // Prompt agents use the agent source directly; hosted agents require an instruction. if resolved.agentKind != agent_yaml.AgentKindPrompt && - flags.systemPrompt == "" && flags.systemPromptFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { - return fmt.Errorf("--system-prompt is required when generating eval assets for a hosted agent") + flags.instruction == "" && flags.instructionFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { + return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") } if flags.maxSamples < 15 || flags.maxSamples > 1000 { return fmt.Errorf("--max-samples must be between 15 and 1000") } - if resolved.hasProject { - if err := ensureFoundryDirs(resolved.projectRoot); err != nil { - return err - } - } - evalCfg := newEvalConfig(flags, resolved) state := &evalState{} @@ -252,7 +258,8 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return writePendingEvalInit(ctx, resolved, configPath, evalCfg, state) } - if err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, builtinEvals); err != nil { + pollRes, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, builtinEvals) + if err != nil { if _, ok := errors.AsType[*initTimeoutError](err); ok { return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) } @@ -266,22 +273,81 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } if resolved.hasProject { - writeEvalReviewArtifacts(resolved.projectRoot, evalCfg) + eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) } if isRegenerate { - fmt.Println(color.GreenString("Eval suite regenerated")) + fmt.Println(color.GreenString("\nEval suite regenerated")) } else { - fmt.Println(color.GreenString("Eval suite created")) + fmt.Println(color.GreenString("\nEval suite created")) } fmt.Printf(" Config: %s\n", configPath) if evalCfg.DatasetFile != "" { fmt.Printf(" Dataset: %s\n", evalCfg.DatasetFile) + } else if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" { + ds := evalCfg.DatasetReference.Name + if evalCfg.DatasetReference.Version != "" { + ds += " (" + evalCfg.DatasetReference.Version + ")" + } + fmt.Printf(" Dataset: %s\n", ds) + if resolved.hasProject { + fmt.Printf(" %s\n", eval_api.DatasetArtifactPath(resolved.agentProject, evalCfg.DatasetReference)) + } } for _, evaluator := range evalCfg.Evaluators { - if evaluator != "" { - fmt.Printf(" Evaluator: %s\n", evaluator) + if evaluator.Name != "" { + ev := evaluator.Name + if evaluator.Version != "" { + ev += " (" + evaluator.Version + ")" + } + fmt.Printf(" Evaluator: %s\n", ev) + if resolved.hasProject && !eval_api.IsBuiltinEvaluator(evaluator.Name) { + fmt.Printf(" %s\n", + filepath.Join(resolved.agentProject, eval_api.EvaluatorLocalURI(evaluator.Name))) + } } } + + // Print evaluator rubric dimensions if available. + printEvalDimensions(pollRes) + + // Print portal links. + printEvalPortalLinks(ctx, resolved, evalCfg) + fmt.Printf("\n Review the generated assets, then run:\n %s\n", color.CyanString("azd ai agent eval run")) return nil } + +// printEvalDimensions prints rubric dimensions from the poll results if available. +func printEvalDimensions(results *pollResults) { + if results == nil || results.EvaluatorResult == nil { + return + } + if len(results.EvaluatorResult.Definition.Dimensions) == 0 { + return + } + eval_api.PrintEvaluatorDimensions(results.EvaluatorResult) +} + +// printEvalPortalLinks prints Foundry portal links for the generated dataset and evaluator. +func printEvalPortalLinks(ctx context.Context, resolved *evalResolvedContext, evalCfg *evalConfig) { + prefix := resolvePortalPrefix(ctx, resolved.azdClient, resolved.envName) + if prefix == nil { + return + } + hasLink := false + if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" { + fmt.Printf("\n "+color.HiBlackString("Portal:")+"\n Dataset: %s\n", + color.CyanString(prefix.DatasetURL(evalCfg.DatasetReference.Name, evalCfg.DatasetReference.Version))) + hasLink = true + } + for _, evaluator := range evalCfg.Evaluators { + if evaluator.Name != "" && !eval_api.IsBuiltinEvaluator(evaluator.Name) { + if !hasLink { + fmt.Println("\n " + color.HiBlackString("Portal:")) + hasLink = true + } + fmt.Printf(" Evaluator: %s\n", + color.CyanString(prefix.EvaluatorURL(evaluator.Name, evaluator.Version))) + } + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index cc38e434293..c9d93a29d82 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -37,17 +37,17 @@ func randomSuffix() string { return hex.EncodeToString(b) } -// resolvedSystemPrompt returns the system prompt content from flags, reading -// from file if systemPromptFile is set. -func resolvedSystemPrompt(flags *evalInitFlags) string { - if flags.systemPromptFile != "" { - data, err := os.ReadFile(flags.systemPromptFile) //nolint:gosec // user-provided path validated earlier +// resolvedInstruction returns the instruction content from flags, reading +// from file if instructionFile is set. +func resolvedInstruction(flags *evalInitFlags) string { + if flags.instructionFile != "" { + data, err := os.ReadFile(flags.instructionFile) //nolint:gosec // user-provided path validated earlier if err != nil { - return flags.systemPrompt + return flags.instruction } return string(data) } - return flags.systemPrompt + return flags.instruction } func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalConfig { @@ -56,10 +56,10 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon Kind: resolved.agentKind, Version: resolved.version, } - if flags.systemPromptFile != "" { - agent.SystemPromptFile = flags.systemPromptFile - } else { - agent.SystemPrompt = flags.systemPrompt + if flags.instructionFile != "" { + agent.Instruction = opteval.InstructionRef{File: flags.instructionFile} + } else if flags.instruction != "" { + agent.Instruction = opteval.InstructionRef{Value: flags.instruction} } return &evalConfig{ Config: opteval.Config{ @@ -81,7 +81,7 @@ func submitDatasetGeneration( flags *evalInitFlags, ) (*eval_api.GenerationJob, error) { // Traces are only supported for evaluator generation, not dataset generation. - prompt := resolvedSystemPrompt(flags) + prompt := resolvedInstruction(flags) sources := eval_api.BuildGenerationSources( string(resolved.agentKind), resolved.agentName, resolved.version, prompt, nil, ) @@ -103,7 +103,7 @@ func submitEvaluatorGeneration( if flags.traceDays > 0 { traces = &eval_api.TraceOptions{Days: flags.traceDays} } - prompt := resolvedSystemPrompt(flags) + prompt := resolvedInstruction(flags) sources := eval_api.BuildGenerationSources( string(resolved.agentKind), resolved.agentName, resolved.version, prompt, traces, ) @@ -134,18 +134,26 @@ func resolveLocalDatasetFile(dataset string, agentProject string) (string, error } func datasetFromJob(job *eval_api.GenerationJob) *evalDatasetRef { + name, version := job.ResolvedNameVersion() + if name == "" { + return nil + } return &evalDatasetRef{ - Name: job.ResolvedDatasetName(), - Version: job.ResolvedDatasetVersion(), + Name: name, + Version: version, } } -func evaluatorFromJob(job *eval_api.GenerationJob) string { - return job.ResolvedEvaluatorName() +func evaluatorFromJob(job *eval_api.GenerationJob) (string, string) { + return job.ResolvedNameVersion() } -func evaluatorsFromFlags(values []string) []string { - return values +func evaluatorsFromFlags(values []string) opteval.EvaluatorList { + refs := make(opteval.EvaluatorList, len(values)) + for i, v := range values { + refs[i] = opteval.EvaluatorRef{Name: v} + } + return refs } func buildOpenAIEvalRequest(evalCfg *evalConfig) *eval_api.CreateOpenAIEvalRequest { @@ -159,7 +167,7 @@ func resumeEvalInit( evalCfg *evalConfig, state *evalState, ) error { - if err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, nil); err != nil { + if _, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, nil); err != nil { if _, ok := errors.AsType[*initTimeoutError](err); ok { return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) } @@ -168,11 +176,17 @@ func resumeEvalInit( state.InitStatus = "completed" clearEvalState(ctx, resolved.azdClient, resolved.envName) if resolved.hasProject { - writeEvalReviewArtifacts(resolved.projectRoot, evalCfg) + eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) } return writeEvalConfig(configPath, evalCfg) } +// pollResults carries parsed outputs from completed generation jobs so that +// the caller can display them after both jobs finish. +type pollResults struct { + EvaluatorResult *eval_api.EvaluatorResult +} + // pollAndFinalizeJobs polls pending dataset and evaluator generation jobs in // parallel, saves artifacts when an azd project exists, and updates state and // evalCfg. Jobs whose status is already terminal are skipped (safe for resume). @@ -183,8 +197,9 @@ func pollAndFinalizeJobs( resolved *evalResolvedContext, evalCfg *evalConfig, state *evalState, - builtinEvals []string, -) error { + builtinEvals opteval.EvaluatorList, +) (*pollResults, error) { + results := &pollResults{} // Each goroutine writes to distinct fields of evalCfg and state, so no // mutex is needed for those. Only the error variables are shared across // both goroutines and guarded by wg.Wait() (written before Wait, read after). @@ -226,14 +241,22 @@ func pollAndFinalizeJobs( // Dataset goroutine owns: state.DatasetGenStatus, evalCfg.DatasetReference, evalCfg.DatasetFile. state.DatasetGenStatus = completed.NormalizedStatus() dsRef := datasetFromJob(completed) + if dsRef == nil { + return + } evalCfg.DatasetReference = dsRef + if resolved.hasProject { - if err := downloadDatasetArtifact( - ctx, resolved.datasetClient, resolved.projectRoot, dsRef, DefaultAgentAPIVersion, - ); err != nil { + localURI, err := eval_api.DownloadDatasetArtifact( + ctx, resolved.datasetClient, resolved.agentProject, dsRef, DefaultAgentAPIVersion, + ) + if err != nil { datasetPollErr = err return } + if localURI != "" { + dsRef.LocalURI = localURI + } } }() } @@ -252,11 +275,19 @@ func pollAndFinalizeJobs( return } // Evaluator goroutine owns: state.EvalGenStatus, evalCfg.Evaluators. - evalName := evaluatorFromJob(completed) + evalName, evalVersion := evaluatorFromJob(completed) state.EvalGenStatus = completed.NormalizedStatus() - evalCfg.Evaluators = append(builtinEvals, evalName) + evalRef := opteval.EvaluatorRef{ + Name: evalName, + Version: evalVersion, + LocalURI: eval_api.EvaluatorLocalURI(evalName), + } + evalCfg.Evaluators = append(builtinEvals, evalRef) + + results.EvaluatorResult = eval_api.ParseEvaluatorResult(completed.Result) + if resolved.hasProject { - saveEvaluatorResult(resolved.projectRoot, evalName, completed.Result) + eval_api.SaveEvaluatorResult(resolved.agentProject, evalName, completed.Result) } }() } @@ -269,7 +300,7 @@ func pollAndFinalizeJobs( dsTimeout := isPollerTimeout(datasetPollErr) evalTimeout := isPollerTimeout(evalPollErr) if dsTimeout || evalTimeout { - return &initTimeoutError{ + return results, &initTimeoutError{ datasetOpID: state.DatasetGenOpID, evaluatorOpID: state.EvalGenOpID, datasetTimedOut: dsTimeout, @@ -278,9 +309,9 @@ func pollAndFinalizeJobs( } if datasetPollErr != nil { - return datasetPollErr + return results, datasetPollErr } - return evalPollErr + return results, evalPollErr } // isPollerTimeout returns true when the error is a *eval_api.PollerTimeoutError. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 97e6ab4d269..22c9ea6cac4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -46,7 +46,7 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f needsGeneration := true // adaptive evaluator is always generated needsEvalGen := true - if flags.systemPrompt == "" && flags.systemPromptFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + if flags.instruction == "" && flags.instructionFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { // Let the user choose between inline text or loading from a file. inputChoices := []*azdext.SelectChoice{ {Label: "Type inline", Value: "inline"}, @@ -55,25 +55,25 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f defaultIdx := int32(0) selResp, err := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ Options: &azdext.SelectOptions{ - Message: "How would you like to provide the system prompt?", + Message: "How would you like to provide the agent instruction?", Choices: inputChoices, SelectedIndex: &defaultIdx, }, }) if err != nil { - return fmt.Errorf("prompting for system prompt input method: %w", err) + return fmt.Errorf("prompting for instruction input method: %w", err) } if inputChoices[int(*selResp.Value)].Value == "file" { // Prompt for the file path. pathResp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ - Message: "Path to system prompt file", + Message: "Path to agent instruction file", IgnoreHintKeys: true, }, }) if err != nil { - return fmt.Errorf("prompting for system prompt file path: %w", err) + return fmt.Errorf("prompting for instruction file path: %w", err) } filePath := strings.TrimSpace(pathResp.Value) // Resolve relative paths against the agent project directory. @@ -81,9 +81,9 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f filePath = filepath.Join(resolved.projectRoot, filePath) } if _, err := os.Stat(filePath); err != nil { - return fmt.Errorf("system prompt file %q is not accessible: %w", filePath, err) + return fmt.Errorf("instruction file %q is not accessible: %w", filePath, err) } - flags.systemPromptFile = filePath + flags.instructionFile = filePath } else { // Inline text input. resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ @@ -93,9 +93,9 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f }, }) if err != nil { - return fmt.Errorf("prompting for system prompt: %w", err) + return fmt.Errorf("prompting for instruction: %w", err) } - flags.systemPrompt = strings.TrimSpace(resp.Value) + flags.instruction = strings.TrimSpace(resp.Value) } } @@ -300,12 +300,12 @@ func promptRegenerateChoices( // Ask about evaluator. generated, builtin := eval_api.SplitEvaluators(existingCfg.Evaluators) if len(generated) > 0 { - generatedLabel := strings.Join(generated, ", ") + generatedLabel := strings.Join(generated.Names(), ", ") msg := fmt.Sprintf("Existing evaluator: %s. Do you want to regenerate?", generatedLabel) if len(builtin) > 0 { msg = fmt.Sprintf( "Existing evaluator: %s (built-in evaluators %s will be kept). Do you want to regenerate?", - generatedLabel, strings.Join(builtin, ", "), + generatedLabel, strings.Join(builtin.Names(), ", "), ) } resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 975db965b81..804423cce87 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -34,11 +34,11 @@ func TestNewEvalInitCommand_Flags(t *testing.T) { {"no-wait", "false"}, {"agent", ""}, {"project-endpoint", ""}, - {"system-prompt", ""}, - {"system-prompt-file", ""}, + {"gen-instruction", ""}, + {"gen-instruction-file", ""}, {"eval-model", defaultEvalModel}, {"dataset", ""}, - {"max-samples", "100"}, + {"max-samples", "15"}, {"out-file", defaultEvalConfigName}, {"reset-defaults", "false"}, } @@ -68,42 +68,45 @@ func TestNewEvalInitCommand_ShortOutFile(t *testing.T) { } // --------------------------------------------------------------------------- -// system-prompt / system-prompt-file mutual exclusion +// --agent-instruction / --agent-instruction-file mutual exclusion // --------------------------------------------------------------------------- func TestRunEvalInit_MutualExclusion(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - systemPrompt: "inline text", - systemPromptFile: "some-file.txt", + instruction: "inline text", + instructionFile: "some-file.txt", } err := runEvalInit(t.Context(), flags, true) require.Error(t, err) - assert.Contains(t, err.Error(), "cannot use both --system-prompt and --system-prompt-file") + assert.Contains(t, err.Error(), "cannot use both --gen-instruction and --gen-instruction-file") } -func TestRunEvalInit_SystemPromptFile(t *testing.T) { +func TestRunEvalInit_InstructionFile(t *testing.T) { t.Parallel() tmpDir := t.TempDir() instrFile := filepath.Join(tmpDir, "instruction.md") require.NoError(t, os.WriteFile(instrFile, []byte(" Test booking agent \n"), 0600)) flags := &evalInitFlags{ - systemPromptFile: instrFile, - evalModel: defaultEvalModel, - maxSamples: 10, + instructionFile: instrFile, + evalModel: defaultEvalModel, + maxSamples: 10, } // runEvalInit will fail later (no azd client), but file validation should pass. _ = runEvalInit(t.Context(), flags, true) // File path remains on the flag — content is NOT inlined. - assert.Equal(t, instrFile, flags.systemPromptFile) - assert.Empty(t, flags.systemPrompt) + assert.Equal(t, instrFile, flags.instructionFile) + assert.Empty(t, flags.instruction) } -func TestRunEvalInit_SystemPromptFileMissing(t *testing.T) { +func TestRunEvalInit_InstructionFileMissing(t *testing.T) { t.Parallel() + // Use filepath.Join with TempDir to get a proper absolute path that doesn't exist. + missingFile := filepath.Join(t.TempDir(), "nonexistent", "instruction.txt") flags := &evalInitFlags{ - systemPromptFile: "/nonexistent/path/instruction.txt", + instructionFile: missingFile, + projectEndpoint: "https://example.ai.azure.com/", } err := runEvalInit(t.Context(), flags, true) require.Error(t, err) @@ -120,9 +123,9 @@ func TestNewEvalConfig(t *testing.T) { t.Run("uses default name", func(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - systemPrompt: "Test the booking agent", - evalModel: "gpt-4.1", - maxSamples: 50, + instruction: "Test the booking agent", + evalModel: "gpt-4.1", + maxSamples: 50, } resolved := &evalResolvedContext{ agentName: "booking-agent", @@ -137,7 +140,7 @@ func TestNewEvalConfig(t *testing.T) { assert.Equal(t, agent_yaml.AgentKindHosted, cfg.Agent.Kind) assert.Equal(t, "v2", cfg.Agent.Version) assert.Equal(t, "gpt-4.1", cfg.Options.EvalModel) - assert.Equal(t, "Test the booking agent", cfg.Agent.SystemPrompt) + assert.Equal(t, "Test the booking agent", cfg.Agent.Instruction.Value) assert.Equal(t, 50, cfg.MaxSamples) }) @@ -152,12 +155,12 @@ func TestNewEvalConfig(t *testing.T) { assert.Equal(t, "my-suite", cfg.Name) }) - t.Run("stores system_prompt_file when file provided", func(t *testing.T) { + t.Run("stores instruction_file when file provided", func(t *testing.T) { t.Parallel() flags := &evalInitFlags{ - systemPromptFile: "./prompts/system.md", - evalModel: "gpt-4o", - maxSamples: 20, + instructionFile: "./prompts/system.md", + evalModel: "gpt-4o", + maxSamples: 20, } resolved := &evalResolvedContext{ agentName: "my-agent", @@ -167,8 +170,8 @@ func TestNewEvalConfig(t *testing.T) { cfg := newEvalConfig(flags, resolved) - assert.Empty(t, cfg.Agent.SystemPrompt) - assert.Equal(t, "./prompts/system.md", cfg.Agent.SystemPromptFile) + assert.Empty(t, cfg.Agent.Instruction.Value) + assert.Equal(t, "./prompts/system.md", cfg.Agent.Instruction.File) }) } @@ -184,42 +187,46 @@ func TestDatasetFromJob(t *testing.T) { job *eval_api.GenerationJob expectedName string expectedVersion string + expectedNil bool }{ { "result fields", &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"ds-1","version":"v2"}`), }, - "ds-1", "v2", + "ds-1", "v2", false, }, { - "result name", + "result name defaults version to latest", &eval_api.GenerationJob{ Result: json.RawMessage(`{"outputs":[{"name":"ds-2"}]}`), }, - "ds-2", "v1", + "ds-2", "latest", false, }, { "nested outputs format", &eval_api.GenerationJob{ Result: json.RawMessage(`{"outputs":[{"name":"ds-3","version":"v3"}]}`), }, - "ds-3", "v3", + "ds-3", "v3", false, }, { - "empty defaults version to v1", - &eval_api.GenerationJob{ - Result: json.RawMessage(`{"outputs":[{"name":"ds-4"}]}`), - }, - "ds-4", "v1", + "empty result returns nil", + &eval_api.GenerationJob{}, + "", "", true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() ref := datasetFromJob(tt.job) - assert.Equal(t, tt.expectedName, ref.Name) - assert.Equal(t, tt.expectedVersion, ref.Version) + if tt.expectedNil { + assert.Nil(t, ref) + } else { + require.NotNil(t, ref) + assert.Equal(t, tt.expectedName, ref.Name) + assert.Equal(t, tt.expectedVersion, ref.Version) + } }) } } @@ -299,29 +306,32 @@ func TestBuildModelChoices(t *testing.T) { func TestEvaluatorFromJob(t *testing.T) { t.Parallel() - t.Run("extracts name from result", func(t *testing.T) { + t.Run("extracts name and version from result", func(t *testing.T) { t.Parallel() job := &eval_api.GenerationJob{ - Result: json.RawMessage(`{"name":"quality-eval"}`), + Result: json.RawMessage(`{"name":"quality-eval","version":"v2"}`), } - name := evaluatorFromJob(job) + name, version := evaluatorFromJob(job) assert.Equal(t, "quality-eval", name) + assert.Equal(t, "v2", version) }) - t.Run("extracts name from result display_name", func(t *testing.T) { + t.Run("defaults version to latest", func(t *testing.T) { t.Parallel() job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), } - name := evaluatorFromJob(job) + name, version := evaluatorFromJob(job) assert.Equal(t, "smoke-core", name) + assert.Equal(t, "latest", version) }) - t.Run("returns empty when no name", func(t *testing.T) { + t.Run("returns empty name when no result", func(t *testing.T) { t.Parallel() job := &eval_api.GenerationJob{} - name := evaluatorFromJob(job) + name, version := evaluatorFromJob(job) assert.Empty(t, name) + assert.Empty(t, version) }) } @@ -397,14 +407,14 @@ func TestEvaluatorsFromFlags(t *testing.T) { t.Parallel() result := evaluatorsFromFlags([]string{"builtin.task_adherence", "my-custom"}) require.Len(t, result, 2) - assert.Equal(t, "builtin.task_adherence", result[0]) - assert.Equal(t, "my-custom", result[1]) + assert.Equal(t, "builtin.task_adherence", result[0].Name) + assert.Equal(t, "my-custom", result[1].Name) }) t.Run("nil returns nil", func(t *testing.T) { t.Parallel() result := evaluatorsFromFlags(nil) - assert.Nil(t, result) + assert.Empty(t, result) }) } @@ -423,7 +433,7 @@ func TestBuildOpenAIEvalRequest(t *testing.T) { Version: "v1", }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: []string{"builtin.quality"}, + Evaluators: opteval.EvaluatorList{{Name: "builtin.quality"}}, }, Options: &opteval.Options{EvalModel: "gpt-4o"}, } @@ -509,7 +519,7 @@ func TestTryLoadExistingEvalConfig_Found(t *testing.T) { Name: "my-agent", }, DatasetFile: "data.jsonl", - Evaluators: []string{"quality"}, + Evaluators: opteval.EvaluatorList{{Name: "quality"}}, }, } require.NoError(t, writeEvalConfig(cfgPath, cfg)) @@ -518,7 +528,7 @@ func TestTryLoadExistingEvalConfig_Found(t *testing.T) { require.True(t, ok) assert.Equal(t, "smoke-core", loaded.Name) assert.Equal(t, "my-agent", loaded.Agent.Name) - assert.Equal(t, []string{"quality"}, loaded.Evaluators) + assert.Equal(t, opteval.EvaluatorList{{Name: "quality"}}, loaded.Evaluators) } func TestTryLoadExistingEvalConfig_NotFound(t *testing.T) { @@ -567,26 +577,26 @@ func TestSplitEvaluators(t *testing.T) { t.Parallel() tests := []struct { name string - input []string - expectedGenerated []string - expectedBuiltin []string + input opteval.EvaluatorList + expectedGenerated opteval.EvaluatorList + expectedBuiltin opteval.EvaluatorList }{ { "mixed list", - []string{"builtin.task_adherence", "my-quality", "builtin.safety"}, - []string{"my-quality"}, - []string{"builtin.task_adherence", "builtin.safety"}, + opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "my-quality"}, {Name: "builtin.safety"}}, + opteval.EvaluatorList{{Name: "my-quality"}}, + opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, }, { "all builtin", - []string{"builtin.quality", "builtin.safety"}, + opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, nil, - []string{"builtin.quality", "builtin.safety"}, + opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, }, { "all generated", - []string{"smoke-core", "custom-1"}, - []string{"smoke-core", "custom-1"}, + opteval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, + opteval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, nil, }, { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go index 60a7277cf9e..b981f6bd6e9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go @@ -85,12 +85,16 @@ func (p *evalProgress) clearSpinnerLine() { } } -func (p *evalProgress) setRunning(label string) { +func (p *evalProgress) setRunning(label string, detail string) { p.mu.Lock() defer p.mu.Unlock() p.starts[label] = time.Now() p.clearSpinnerLine() - fmt.Printf(" %s %s\n", color.BlueString("(–) Running"), label) + if detail != "" { + fmt.Printf(" %s %s %s\n", color.BlueString("(\u2013) Running"), label, color.HiBlackString("(%s)", detail)) + } else { + fmt.Printf(" %s %s\n", color.BlueString("(\u2013) Running"), label) + } } func (p *evalProgress) setDone(label string) { @@ -101,6 +105,14 @@ func (p *evalProgress) setDone(label string) { fmt.Printf(" %s %s (%s)\n", color.GreenString("(✓) Done"), label, elapsed) } +// printDetail prints an indented detail line (e.g. a portal link) safely +// without conflicting with the spinner. +func (p *evalProgress) printDetail(text string) { + p.mu.Lock() + defer p.mu.Unlock() + p.clearSpinnerLine() + fmt.Printf(" · %s\n", text) +} func (p *evalProgress) setFailed(label string) { p.mu.Lock() defer p.mu.Unlock() diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index 52ad30b5f1e..8cfd67b9f9d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -11,6 +11,7 @@ import ( "net/url" "os" "strings" + "time" "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/opteval" @@ -22,6 +23,8 @@ import ( type evalRunFlags struct { config string + name string + noWait bool } func newEvalRunCommand(extCtx *azdext.ExtensionContext) *cobra.Command { @@ -38,6 +41,8 @@ func newEvalRunCommand(extCtx *azdext.ExtensionContext) *cobra.Command { }, } cmd.Flags().StringVar(&flags.config, "config", defaultEvalConfigName, "Local eval config YAML") + cmd.Flags().StringVar(&flags.name, "name", "", "Name for the eval run (defaults to eval config name)") + cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Start the run and return immediately without waiting for results") return cmd } @@ -100,7 +105,7 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { } runReq := &eval_api.CreateOpenAIEvalRunRequest{ - Name: evalCfg.Name, + Name: resolveRunName(ctx, resolved.azdClient, flags.name, evalCfg.Name, noPrompt), Metadata: map[string]string{"azd_agent": evalCfg.Agent.Name}, } @@ -145,11 +150,113 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { if reportURL != "" { fmt.Printf(" Report: %s\n", color.CyanString(reportURL)) } - fmt.Printf("\n To view result summary, run:\n %s\n %s\n", - color.CyanString("azd ai agent eval list"), - color.CyanString("azd ai agent eval show"), + + if flags.noWait { + fmt.Printf("\n To view result summary, run:\n %s\n %s\n", + color.CyanString("azd ai agent eval list"), + color.CyanString("azd ai agent eval show"), + ) + return nil + } + + // Poll until the eval run reaches a terminal state. + completed, err := pollEvalRun(ctx, resolved.evalClient, evalID, run.ID) + if err != nil { + return err + } + + // Report URL was already printed above; clear it to avoid duplication. + completed.ReportURL = "" + + fmt.Println() + return printEvalRunSummary(evalID, completed) +} + +// resolveRunName determines the eval run name from the flag, interactive +// prompt, or config default (in that priority order). +func resolveRunName( + ctx context.Context, + azdClient *azdext.AzdClient, + flagName, configName string, + noPrompt bool, +) string { + if flagName != "" { + return flagName + } + + defaultName := configName + if defaultName == "" { + defaultName = defaultEvalName + } + + if !noPrompt { + resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Eval run name", + DefaultValue: defaultName, + IgnoreHintKeys: true, + }, + }) + if err == nil { + if value := strings.TrimSpace(resp.Value); value != "" { + return value + } + } + } + + return defaultName +} + +// pollEvalRun polls an eval run until it reaches a terminal status. +// Terminal statuses: "completed", "failed", "canceled". +func pollEvalRun( + ctx context.Context, + client *eval_api.EvalClient, + evalID, runID string, +) (*eval_api.OpenAIEvalRun, error) { + const ( + interval = 5 * time.Second + maxAttempts = 360 // ~30 minutes ) - return nil + + progress := newEvalProgress() + progress.Start() + defer progress.Stop() + + progress.setRunning("Eval run", runID) + + for range maxAttempts { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(interval): + } + + run, err := client.GetOpenAIEvalRun(ctx, evalID, runID, DefaultAgentAPIVersion) + if err != nil { + progress.setFailed("Eval run") + return nil, fmt.Errorf("failed to poll eval run: %w", err) + } + + switch run.Status { + case "completed": + progress.setDone("Eval run") + return run, nil + case "failed": + progress.setFailed("Eval run") + errMsg := "eval run failed" + if run.Error != nil { + errMsg = fmt.Sprintf("eval run failed: %v", run.Error) + } + return nil, fmt.Errorf("%s", errMsg) + case "canceled", "cancelled": + progress.setFailed("Eval run") + return nil, fmt.Errorf("eval run was canceled") + } + } + + progress.setTimedOut("Eval run") + return nil, fmt.Errorf("eval run %s did not complete within %d attempts", runID, maxAttempts) } // loadEvalDatasetFile reads a JSONL file and returns each line as a map. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go index e50479be7f7..ebcf66f6881 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go @@ -20,7 +20,7 @@ import ( func TestNewEvalRunCommand_Flags(t *testing.T) { t.Parallel() - cmd := newEvalRunCommand() + cmd := newEvalRunCommand(nil) f := cmd.Flags().Lookup("config") require.NotNil(t, f) @@ -29,14 +29,14 @@ func TestNewEvalRunCommand_Flags(t *testing.T) { func TestNewEvalRunCommand_NoArgs(t *testing.T) { t.Parallel() - cmd := newEvalRunCommand() + cmd := newEvalRunCommand(nil) assert.NoError(t, cmd.Args(cmd, nil)) assert.Error(t, cmd.Args(cmd, []string{"extra"})) } func TestNewEvalRunCommand_UseString(t *testing.T) { t.Parallel() - cmd := newEvalRunCommand() + cmd := newEvalRunCommand(nil) assert.Equal(t, "run", cmd.Use) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go index cf59343b8d0..a3b34cc25b3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go @@ -71,7 +71,7 @@ func runEvalShow(ctx context.Context, evalID string, flags *evalShowFlags) error return fmt.Errorf("failed to get eval run: %w", err) } if flags.output != "" { - return writeJSONFile(flags.output, run) + return eval_api.WriteJSONFile(flags.output, run) } return printEvalRunSummary(evalID, run) } @@ -85,7 +85,7 @@ func runEvalShow(ctx context.Context, evalID string, flags *evalShowFlags) error return fmt.Errorf("failed to list eval runs: %w", err) } if flags.output != "" { - return writeJSONFile(flags.output, map[string]any{ + return eval_api.WriteJSONFile(flags.output, map[string]any{ "eval": evalObj, "runs": runs.Data, }) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 6addde4266d..cbf7d5d427c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -76,49 +76,37 @@ func TestGenerationJob_NormalizedStatus(t *testing.T) { assert.Equal(t, "running", (&eval_api.GenerationJob{}).NormalizedStatus()) } -func TestGenerationJob_ResolvedDatasetName(t *testing.T) { +func TestGenerationJob_ResolvedNameVersion(t *testing.T) { t.Parallel() - assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedDatasetName()) - // Extracts name from the result JSON. + // Empty job returns empty name and empty version. + name, version := (&eval_api.GenerationJob{}).ResolvedNameVersion() + assert.Equal(t, "", name) + assert.Equal(t, "", version) + + // Extracts name and version from the result JSON. job := &eval_api.GenerationJob{ Result: json.RawMessage(`{"name":"generated-ds","version":"v2"}`), } - assert.Equal(t, "generated-ds", job.ResolvedDatasetName()) + name, version = job.ResolvedNameVersion() + assert.Equal(t, "generated-ds", name) + assert.Equal(t, "v2", version) - // Extracts name from result.outputs[0] (nested API response format). + // Extracts from result.outputs[0] (nested API response format). jobNested := &eval_api.GenerationJob{ Result: json.RawMessage(`{"outputs":[{"type":"dataset","name":"nested-ds","version":"36735"}]}`), } - assert.Equal(t, "nested-ds", jobNested.ResolvedDatasetName()) -} - -func TestGenerationJob_ResolvedDatasetVersion(t *testing.T) { - t.Parallel() - assert.Equal(t, "v1", (&eval_api.GenerationJob{}).ResolvedDatasetVersion()) - - // Extracts version from the result JSON. - job := &eval_api.GenerationJob{ - Result: json.RawMessage(`{"name":"ds","version":"v5"}`), - } - assert.Equal(t, "v5", job.ResolvedDatasetVersion()) - - // Extracts version from result.outputs[0] (nested API response format). - jobNested := &eval_api.GenerationJob{ - Result: json.RawMessage(`{"outputs":[{"type":"dataset","name":"ds","version":"36735"}]}`), - } - assert.Equal(t, "36735", jobNested.ResolvedDatasetVersion()) -} - -func TestGenerationJob_ResolvedEvaluatorName(t *testing.T) { - t.Parallel() - assert.Equal(t, "", (&eval_api.GenerationJob{}).ResolvedEvaluatorName()) + name, version = jobNested.ResolvedNameVersion() + assert.Equal(t, "nested-ds", name) + assert.Equal(t, "36735", version) - // Extracts name from the result JSON. - job := &eval_api.GenerationJob{ - Result: json.RawMessage(`{"name":"smoke-core","display_name":"smoke-core"}`), + // Defaults version to "latest" when missing. + jobNoVer := &eval_api.GenerationJob{ + Result: json.RawMessage(`{"name":"smoke-core"}`), } - assert.Equal(t, "smoke-core", job.ResolvedEvaluatorName()) + name, version = jobNoVer.ResolvedNameVersion() + assert.Equal(t, "smoke-core", name) + assert.Equal(t, "latest", version) } func TestOpenAIEval_ResolvedID(t *testing.T) { @@ -239,33 +227,6 @@ func TestDetectEvalAgentKind(t *testing.T) { }) } -// --------------------------------------------------------------------------- -// ensureFoundryDirs -// --------------------------------------------------------------------------- - -func TestEnsureFoundryDirs(t *testing.T) { - t.Parallel() - dir := t.TempDir() - - err := ensureFoundryDirs(dir) - require.NoError(t, err) - - for _, sub := range []string{"datasets", "evaluators", "results"} { - path := filepath.Join(dir, ".azure", ".foundry", sub) - info, err := os.Stat(path) - require.NoError(t, err, "expected %s to exist", sub) - assert.True(t, info.IsDir()) - } -} - -func TestEnsureFoundryDirs_Idempotent(t *testing.T) { - t.Parallel() - dir := t.TempDir() - - require.NoError(t, ensureFoundryDirs(dir)) - require.NoError(t, ensureFoundryDirs(dir)) -} - // --------------------------------------------------------------------------- // evalState — stored in azd environment (integration-tested via eval init/run) // --------------------------------------------------------------------------- @@ -277,31 +238,24 @@ func TestEnsureFoundryDirs_Idempotent(t *testing.T) { func TestWriteEvalReviewArtifacts(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) cfg := &evalConfig{} cfg.DatasetReference = &evalDatasetRef{Name: "test-data", Version: "v1"} - cfg.Evaluators = []string{"quality"} + cfg.Evaluators = opteval.EvaluatorList{{Name: "quality"}} - writeEvalReviewArtifacts(dir, cfg) + eval_api.WriteEvalReviewArtifacts(dir, cfg) - // writeEvalReviewArtifacts only writes evaluator stubs; dataset download - // is handled separately by downloadDatasetArtifact. - dsPath := filepath.Join(dir, ".azure", ".foundry", "datasets", "test-data-v1.jsonl") - assert.NoFileExists(t, dsPath) - - evPath := filepath.Join(dir, ".azure", ".foundry", "evaluators", "quality.yaml") + evPath := filepath.Join(dir, "evaluators", "quality", "quality.yaml") assert.FileExists(t, evPath) } func TestWriteEvalReviewArtifacts_NilDataset(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) cfg := &evalConfig{} // No dataset reference — should not panic. - writeEvalReviewArtifacts(dir, cfg) + eval_api.WriteEvalReviewArtifacts(dir, cfg) } // --------------------------------------------------------------------------- @@ -311,54 +265,63 @@ func TestWriteEvalReviewArtifacts_NilDataset(t *testing.T) { func TestSaveEvaluatorResult(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) - result := json.RawMessage(`{"name":"smoke-core","description":"An evaluator"}`) - saveEvaluatorResult(dir, "smoke-core", result) + result := json.RawMessage(`{"name":"smoke-core","definition":{"type":"rubric","dimensions":[{"id":"quality","weight":10}]}}`) + eval_api.SaveEvaluatorResult(dir, "smoke-core", result) - path := filepath.Join(dir, ".azure", ".foundry", "evaluators", "smoke-core.json") + path := filepath.Join(dir, "evaluators", "smoke-core", "rubric_dimensions.json") assert.FileExists(t, path) data, err := os.ReadFile(path) require.NoError(t, err) - assert.Contains(t, string(data), `"name": "smoke-core"`) - assert.Contains(t, string(data), `"description": "An evaluator"`) + // Only the dimensions array is saved, not the outer fields. + assert.Contains(t, string(data), `"id": "quality"`) + assert.Contains(t, string(data), `"weight": 10`) + assert.NotContains(t, string(data), `"name": "smoke-core"`) +} + +func TestSaveEvaluatorResult_WithVersion(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + result := json.RawMessage(`{"name":"custom","definition":{"type":"rubric","dimensions":[{"id":"d1","weight":5}]}}`) + eval_api.SaveEvaluatorResult(dir, "custom", result) + + path := filepath.Join(dir, "evaluators", "custom", "rubric_dimensions.json") + assert.FileExists(t, path) } func TestSaveEvaluatorResult_NilResult(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) - saveEvaluatorResult(dir, "test", nil) - path := filepath.Join(dir, ".azure", ".foundry", "evaluators", "test.json") + eval_api.SaveEvaluatorResult(dir, "test", nil) + path := filepath.Join(dir, "evaluators", "test", "rubric_dimensions.json") assert.NoFileExists(t, path) } func TestSaveEvaluatorResult_EmptyName(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) - saveEvaluatorResult(dir, "", json.RawMessage(`{"name":"x"}`)) + eval_api.SaveEvaluatorResult(dir, "", json.RawMessage(`{"name":"x"}`)) // Should not create any file. - matches, _ := filepath.Glob(filepath.Join(dir, ".azure", ".foundry", "evaluators", "*.json")) + matches, _ := filepath.Glob(filepath.Join(dir, "evaluators", "*.json")) assert.Empty(t, matches) } func TestWriteEvalReviewArtifacts_SkipsWhenResultExists(t *testing.T) { t.Parallel() dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) // Pre-save a result file. - saveEvaluatorResult(dir, "quality", json.RawMessage(`{"name":"quality"}`)) + eval_api.SaveEvaluatorResult(dir, "quality", json.RawMessage(`{"name":"quality","definition":{"type":"rubric","dimensions":[{"id":"q","weight":1}]}}`)) cfg := &evalConfig{} - cfg.Evaluators = []string{"quality"} - writeEvalReviewArtifacts(dir, cfg) + cfg.Evaluators = opteval.EvaluatorList{{Name: "quality"}} + eval_api.WriteEvalReviewArtifacts(dir, cfg) // Should NOT create a .yaml stub since .json result already exists. - yamlPath := filepath.Join(dir, ".azure", ".foundry", "evaluators", "quality.yaml") + yamlPath := filepath.Join(dir, "evaluators", "quality", "quality.yaml") assert.NoFileExists(t, yamlPath) } @@ -368,7 +331,7 @@ func TestWriteEvalReviewArtifacts_SkipsWhenResultExists(t *testing.T) { func TestDownloadDatasetArtifact_NilDataset(t *testing.T) { t.Parallel() - err := downloadDatasetArtifact(t.Context(), nil, t.TempDir(), nil, "2025-11-15-preview") + _, err := eval_api.DownloadDatasetArtifact(t.Context(), nil, t.TempDir(), nil, "2025-11-15-preview") require.NoError(t, err) } @@ -376,8 +339,7 @@ func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { t.Parallel() // The Azure SDK bearer token policy rejects non-TLS test servers, so the - // credential call will fail. downloadDatasetArtifact gracefully writes a - // placeholder in that case — verify the placeholder is created. + // credential call will fail. downloadDatasetArtifact gracefully returns nil. apiServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) @@ -387,18 +349,14 @@ func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { client := dataset_api.NewDatasetClient(apiServer.URL, &fakeTokenCredential{}) dir := t.TempDir() - require.NoError(t, ensureFoundryDirs(dir)) ref := &evalDatasetRef{Name: "test-ds", Version: "v1"} - err := downloadDatasetArtifact(t.Context(), client, dir, ref, "2025-11-15-preview") + _, err := eval_api.DownloadDatasetArtifact(t.Context(), client, dir, ref, "2025-11-15-preview") require.NoError(t, err) - // Placeholder is written when credential fetch fails (non-TLS test server). - dest := datasetArtifactPath(dir, ref) - assert.FileExists(t, dest) - data, err := os.ReadFile(dest) - require.NoError(t, err) - assert.Equal(t, "{}\n", string(data)) + // No file written when credential fetch fails (non-TLS test server). + dest := eval_api.DatasetArtifactPath(dir, ref) + assert.NoDirExists(t, dest) } // --------------------------------------------------------------------------- @@ -408,8 +366,13 @@ func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { func TestDatasetArtifactPath(t *testing.T) { t.Parallel() ref := &evalDatasetRef{Name: "golden", Version: "v2"} - result := datasetArtifactPath("/project", ref) - assert.Equal(t, filepath.Join("/project", ".azure", ".foundry", "datasets", "golden-v2.jsonl"), result) + result := eval_api.DatasetArtifactPath("/project", ref) + assert.Equal(t, filepath.Join("/project", "datasets", "golden"), result) + + // No version — same path + refNoVer := &evalDatasetRef{Name: "golden", Version: ""} + resultNoVer := eval_api.DatasetArtifactPath("/project", refNoVer) + assert.Equal(t, filepath.Join("/project", "datasets", "golden"), resultNoVer) } // --------------------------------------------------------------------------- @@ -421,7 +384,7 @@ func TestWriteJSONFile(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "result.json") - err := writeJSONFile(path, map[string]string{"hello": "world"}) + err := eval_api.WriteJSONFile(path, map[string]string{"hello": "world"}) require.NoError(t, err) data, err := os.ReadFile(path) @@ -481,13 +444,13 @@ func TestEvalConfigRoundTrip(t *testing.T) { Config: opteval.Config{ Name: "smoke-core", Agent: evalAgentRef{ - Name: "my-agent", - Kind: agent_yaml.AgentKindHosted, - Version: "v1", - SystemPrompt: "Test this agent", + Name: "my-agent", + Kind: agent_yaml.AgentKindHosted, + Version: "v1", + Instruction: opteval.InstructionRef{Value: "Test this agent"}, }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: []string{"builtin.task_adherence"}, + Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, Options: &opteval.Options{ EvalModel: "gpt-4o", @@ -506,12 +469,12 @@ func TestEvalConfigRoundTrip(t *testing.T) { assert.Equal(t, original.Agent.Kind, loaded.Agent.Kind) assert.Equal(t, original.Agent.Version, loaded.Agent.Version) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, original.Agent.SystemPrompt, loaded.Agent.SystemPrompt) + assert.Equal(t, original.Agent.Instruction.Value, loaded.Agent.Instruction.Value) assert.Equal(t, original.MaxSamples, loaded.MaxSamples) require.NotNil(t, loaded.DatasetReference) assert.Equal(t, "ds", loaded.DatasetReference.Name) require.Len(t, loaded.Evaluators, 1) - assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0]) + assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) } func TestReadEvalConfig_MissingFile(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go new file mode 100644 index 00000000000..5f8ebc2dd67 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go @@ -0,0 +1,240 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "azureaiagent/internal/pkg/agents/dataset_api" + "azureaiagent/internal/pkg/agents/eval_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type evalUpdateFlags struct { + config string + datasetOnly bool + evaluatorOnly bool +} + +func newEvalUpdateCommand(extCtx *azdext.ExtensionContext) *cobra.Command { + flags := &evalUpdateFlags{config: defaultEvalConfigName} + cmd := &cobra.Command{ + Use: "update", + Short: "Update evaluators and datasets from local files.", + Long: `Reads the eval config and uploads new versions for: + - Evaluators with a local_uri (rubric dimensions file) + - Datasets with a local_uri (JSONL data directory) +The version fields in the config are updated after successful uploads. + +In interactive mode, you will be prompted for each asset type that has +local changes. Use --dataset-only or --evaluator-only to skip prompts.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := azdext.WithAccessToken(cmd.Context()) + logCleanup := setupDebugLogging(cmd.Flags()) + defer logCleanup() + return runEvalUpdate(ctx, flags, extCtx.NoPrompt) + }, + } + cmd.Flags().StringVar(&flags.config, "config", defaultEvalConfigName, "Local eval config YAML") + cmd.Flags().BoolVar(&flags.datasetOnly, "dataset-only", false, "Only update the dataset") + cmd.Flags().BoolVar(&flags.evaluatorOnly, "evaluator-only", false, "Only update evaluators") + return cmd +} + +func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) error { + resolved, err := resolveEvalContext(ctx, evalContextOptions{}) + if err != nil { + return err + } + defer resolved.azdClient.Close() + + configPath := eval_api.ResolveEvalConfigPath(flags.config, resolved.agentProject) + evalCfg, err := readEvalConfig(configPath) + if err != nil { + return fmt.Errorf("failed to load eval config: %w", err) + } + + // Detect what has local changes. + hasDataset := evalCfg.DatasetReference != nil && + evalCfg.DatasetReference.Name != "" && + evalCfg.DatasetReference.LocalURI != "" + hasEvaluators := len(evalCfg.Evaluators.FindByLocalURI()) > 0 + + // Determine what to update based on flags and interactive prompts. + updateDS := hasDataset && !flags.evaluatorOnly + updateEval := hasEvaluators && !flags.datasetOnly + + // In interactive mode (no exclusive flags), prompt for each detected type. + if !noPrompt && !flags.datasetOnly && !flags.evaluatorOnly { + if hasDataset { + updateDS = confirmUpdate(ctx, resolved, fmt.Sprintf( + "Dataset %s has local changes. Upload new version?", + evalCfg.DatasetReference.Name, + )) + } + if hasEvaluators { + updateEval = confirmUpdate(ctx, resolved, "Evaluator(s) have local changes. Upload new version(s)?") + } + } + + var totalUpdated int + + if updateDS { + dsUpdated, err := updateDataset(ctx, resolved.datasetClient, evalCfg, configPath) + if err != nil { + return err + } + totalUpdated += dsUpdated + } + + if updateEval { + evalUpdated, err := updateEvaluators(ctx, resolved.evalClient, evalCfg, configPath) + if err != nil { + return err + } + totalUpdated += evalUpdated + } + + if totalUpdated > 0 { + if err := writeEvalConfig(configPath, evalCfg); err != nil { + return fmt.Errorf("failed to save updated config: %w", err) + } + fmt.Printf("\n%s Updated config saved to %s\n", color.GreenString("Done."), flags.config) + } else { + fmt.Println("\nNo updates were made.") + } + + return nil +} + +// confirmUpdate prompts the user with a yes/no question, defaulting to yes. +func confirmUpdate(ctx context.Context, resolved *evalResolvedContext, message string) bool { + resp, err := resolved.azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: message, + DefaultValue: new(true), + }, + }) + if err != nil { + return true // on error, default to updating + } + return resp.Value != nil && *resp.Value +} + +// updateDataset uploads local dataset files as a new dataset version. +// Returns the number of datasets updated (0 or 1). +func updateDataset( + ctx context.Context, + client *dataset_api.DatasetClient, + evalCfg *evalConfig, + configPath string, +) (int, error) { + ref := evalCfg.DatasetReference + if ref == nil || ref.Name == "" || ref.LocalURI == "" { + return 0, nil + } + + localDir := ref.LocalURI + if !filepath.IsAbs(localDir) { + localDir = filepath.Join(filepath.Dir(configPath), localDir) + } + + resp, err := client.UploadNewVersion(ctx, ref.Name, ref.Version, localDir, DefaultAgentAPIVersion) + if err != nil { + fmt.Printf(" %s Failed to update dataset %s: %v\n", color.RedString("x"), ref.Name, err) + return 0, nil + } + + ref.Version = resp.Version + fmt.Printf(" %s Dataset %s → version %s\n", color.GreenString("✓"), ref.Name, resp.Version) + return 1, nil +} + +// updateEvaluators uploads local evaluator dimensions as new evaluator versions. +// Returns the number of evaluators updated. +func updateEvaluators( + ctx context.Context, + client *eval_api.EvalClient, + evalCfg *evalConfig, + configPath string, +) (int, error) { + localEvals := evalCfg.Evaluators.FindByLocalURI() + if len(localEvals) == 0 { + return 0, nil + } + + var updated int + for _, ref := range localEvals { + localPath := ref.LocalURI + if !filepath.IsAbs(localPath) { + localPath = filepath.Join(filepath.Dir(configPath), localPath) + } + + data, err := os.ReadFile(localPath) //nolint:gosec // user-provided local config path + if err != nil { + fmt.Printf(" %s Skipping %s: %v\n", color.YellowString("!"), ref.Name, err) + continue + } + + if !json.Valid(data) { + fmt.Printf(" %s Skipping %s: file is not valid JSON\n", color.YellowString("!"), ref.Name) + continue + } + + current, err := client.GetEvaluatorRaw(ctx, ref.Name, ref.Version, DefaultAgentAPIVersion) + if err != nil { + fmt.Printf(" %s Failed to get evaluator %s: %v\n", color.RedString("x"), ref.Name, err) + continue + } + + var obj map[string]json.RawMessage + if err := json.Unmarshal(current, &obj); err != nil { + fmt.Printf(" %s Failed to parse evaluator %s: %v\n", color.RedString("x"), ref.Name, err) + continue + } + + // Patch dimensions into the existing definition. + var defObj map[string]json.RawMessage + if raw, ok := obj["definition"]; ok { + if err := json.Unmarshal(raw, &defObj); err != nil { + defObj = make(map[string]json.RawMessage) + } + } else { + defObj = make(map[string]json.RawMessage) + } + defObj["dimensions"] = json.RawMessage(data) + updatedDef, err := json.Marshal(defObj) + if err != nil { + fmt.Printf(" %s Failed to build definition for %s: %v\n", color.RedString("x"), ref.Name, err) + continue + } + obj["definition"] = json.RawMessage(updatedDef) + + body, err := json.Marshal(obj) + if err != nil { + fmt.Printf(" %s Failed to build request for %s: %v\n", color.RedString("x"), ref.Name, err) + continue + } + + resp, err := client.CreateEvaluatorVersion(ctx, ref.Name, body, DefaultAgentAPIVersion) + if err != nil { + fmt.Printf(" %s Failed to update %s: %v\n", color.RedString("x"), ref.Name, err) + continue + } + + evalCfg.Evaluators.SetVersion(ref.Name, resp.Version) + updated++ + fmt.Printf(" %s Evaluator %s → version %s\n", color.GreenString("✓"), ref.Name, resp.Version) + } + + return updated, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index fe32b61287d..f4e7e0f9e90 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -5,8 +5,10 @@ package cmd import ( "context" + "encoding/json" "fmt" "io" + "log" "os" "path/filepath" "strings" @@ -217,8 +219,8 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { cfg.Agent.SkillDir = filepath.Join(agentProject, cfg.Agent.SkillDir) } - // Resolve system prompt using a well-defined lifecycle: - // 1. Config file (eval.yaml / --config) — system_prompt or system_prompt_file in agent section + // Resolve agent instruction using a well-defined lifecycle: + // 1. Config file (eval.yaml / --config) — instruction in the agent section (inline or file reference) // 2. Baseline config — .agent_optimization/baseline/config.json from a prior optimize run // 3. Interactive prompt — ask the user to provide inline text or a file path if err := resolveOptimizeSystemPrompt(ctx, cfg, agentProject, hasProject, a.noPrompt); err != nil { @@ -261,6 +263,10 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { return fmt.Errorf("failed to build optimization request: %w", err) } + if body, jsonErr := json.MarshalIndent(optimizeReq, "", " "); jsonErr == nil { + log.Printf("[debug] optimization request:\n%s", body) + } + // Save baseline config before starting optimization. if hasProject { if err := saveBaselineConfig(agentProject, cfg.Agent.SkillDir, optimizeReq); err != nil { @@ -295,11 +301,11 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { // resolveOptimizeSystemPrompt resolves the agent's system prompt using a well-defined lifecycle: // -// 1. Config (eval.yaml / --config): system_prompt or system_prompt_file in the agent section. +// 1. Config (eval.yaml / --config): instruction in the agent section (inline or file). // 2. Baseline: .agent_optimization/baseline/config.json from a prior optimization run. // 3. Interactive prompt: ask the user to provide inline text or a file path. // -// Relative file paths in system_prompt_file are resolved against agentProject. +// Relative file paths are resolved against agentProject. func resolveOptimizeSystemPrompt( ctx context.Context, cfg *OptimizeConfig, @@ -307,22 +313,22 @@ func resolveOptimizeSystemPrompt( hasProject bool, noPrompt bool, ) error { - // Resolve relative system_prompt_file paths against the agent project directory. - if cfg.Agent.SystemPromptFile != "" && hasProject && !filepath.IsAbs(cfg.Agent.SystemPromptFile) { - cfg.Agent.SystemPromptFile = filepath.Join(agentProject, cfg.Agent.SystemPromptFile) + // Resolve relative instruction file paths against the agent project directory. + if cfg.Agent.Instruction.File != "" && hasProject && !filepath.IsAbs(cfg.Agent.Instruction.File) { + cfg.Agent.Instruction.File = filepath.Join(agentProject, cfg.Agent.Instruction.File) } - // Step 1: Config explicitly declares a system_prompt_file — validate it's readable. - if cfg.Agent.SystemPromptFile != "" { - if _, err := os.Stat(cfg.Agent.SystemPromptFile); err != nil { - return fmt.Errorf("system_prompt_file %q from config is not accessible: %w", - cfg.Agent.SystemPromptFile, err) + // Step 1: Config explicitly declares a file reference — validate it's readable. + if cfg.Agent.Instruction.File != "" { + if _, err := os.Stat(cfg.Agent.Instruction.File); err != nil { + return fmt.Errorf("instruction file %q from config is not accessible: %w", + cfg.Agent.Instruction.File, err) } return nil } - // Step 1b: Config already has inline system_prompt — nothing to do. - if cfg.Agent.SystemPrompt != "" { + // Step 1b: Config already has inline instruction — nothing to do. + if cfg.Agent.Instruction.Value != "" { return nil } @@ -330,7 +336,7 @@ func resolveOptimizeSystemPrompt( if hasProject { if baseline, loadErr := loadBaselineConfig(agentProject); loadErr == nil && baseline.Instructions != "" { if noPrompt { - cfg.Agent.SystemPrompt = baseline.Instructions + cfg.Agent.Instruction.Value = baseline.Instructions return nil } @@ -339,13 +345,13 @@ func resolveOptimizeSystemPrompt( defer azdClient.Close() resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ Options: &azdext.ConfirmOptions{ - Message: "No system prompt in config. " + + Message: "No instruction in config. " + "Found one in baseline (.agent_optimization/baseline/config.json). Use it?", DefaultValue: new(true), }, }) if promptErr == nil && resp.Value != nil && *resp.Value { - cfg.Agent.SystemPrompt = baseline.Instructions + cfg.Agent.Instruction.Value = baseline.Instructions return nil } } @@ -354,16 +360,16 @@ func resolveOptimizeSystemPrompt( // Step 3: Interactive prompt — ask user to provide inline text or a file path. if noPrompt { - return fmt.Errorf("system prompt is required for optimization.\n\n" + + return fmt.Errorf("instruction is required for optimization.\n\n" + "Provide it via one of:\n" + - " 1. system_prompt or system_prompt_file in eval.yaml (agent section)\n" + + " 1. instruction in eval.yaml (agent section): inline string or file reference\n" + " 2. Run a prior optimization to create a baseline (.agent_optimization/baseline/config.json)\n" + " 3. Run without --no-prompt to enter it interactively") } azdClient, clientErr := azdext.NewAzdClient() if clientErr != nil { - return fmt.Errorf("system prompt is required but could not open interactive prompt: %w", clientErr) + return fmt.Errorf("instruction is required but could not open interactive prompt: %w", clientErr) } defer azdClient.Close() @@ -374,25 +380,25 @@ func resolveOptimizeSystemPrompt( defaultIdx := int32(0) selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ Options: &azdext.SelectOptions{ - Message: "No system prompt found in config or baseline. " + - "How would you like to provide the system prompt?", + Message: "No instruction found in config or baseline. " + + "How would you like to provide it?", Choices: inputChoices, SelectedIndex: &defaultIdx, }, }) if selErr != nil { - return fmt.Errorf("prompting for system prompt input method: %w", selErr) + return fmt.Errorf("prompting for instruction input method: %w", selErr) } if inputChoices[int(*selResp.Value)].Value == "file" { pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ - Message: "Path to system prompt file", + Message: "Path to instruction file", IgnoreHintKeys: true, }, }) if pathErr != nil { - return fmt.Errorf("prompting for system prompt file path: %w", pathErr) + return fmt.Errorf("prompting for instruction file path: %w", pathErr) } filePath := strings.TrimSpace(pathResp.Value) // Resolve relative paths against the agent project directory. @@ -400,20 +406,20 @@ func resolveOptimizeSystemPrompt( filePath = filepath.Join(agentProject, filePath) } if _, err := os.Stat(filePath); err != nil { - return fmt.Errorf("system prompt file %q is not accessible: %w", filePath, err) + return fmt.Errorf("instruction file %q is not accessible: %w", filePath, err) } - cfg.Agent.SystemPromptFile = filePath + cfg.Agent.Instruction.File = filePath } else { resp, promptErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ - Message: "Enter the agent's system prompt instructions", + Message: "Enter the agent's instruction", IgnoreHintKeys: true, }, }) if promptErr != nil { - return fmt.Errorf("prompting for system prompt: %w", promptErr) + return fmt.Errorf("prompting for instruction: %w", promptErr) } - cfg.Agent.SystemPrompt = strings.TrimSpace(resp.Value) + cfg.Agent.Instruction.Value = strings.TrimSpace(resp.Value) } return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 2fc3df5ef83..fa9a1db5e95 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -10,6 +10,7 @@ import ( "io" "os" "path/filepath" + "strings" "azureaiagent/internal/pkg/agents/optimize_api" @@ -174,6 +175,9 @@ func (a *OptimizeApplyAction) apply( fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", color.CyanString("azd deploy --service %s", svc.Name)) + // Show prompt diff (baseline → optimized). + printPromptDiff(out, serviceDir, a.flags.candidate, candidateConfig) + return nil } @@ -305,3 +309,78 @@ func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { } } } + +// maxDiffPreviewLines is the max lines shown per section in the prompt diff preview. +const maxDiffPreviewLines = 4 + +// printPromptDiff displays an abbreviated prompt diff (baseline → optimized) +// with a short preview and a suggested command for the full diff. +func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateConfig any) { + optimized := extractInstructions(candidateConfig) + if optimized == "" { + return + } + + baseline, err := loadBaselineConfig(serviceDir) + if err != nil || baseline.Instructions == "" { + return + } + + baselineText := baseline.Instructions + baselineLines := strings.Split(baselineText, "\n") + optimizedLines := strings.Split(optimized, "\n") + + fmt.Fprintf(out, "\n Prompt diff (baseline → optimized):\n\n") + + // Baseline preview (removed). + removed := color.New(color.FgRed) + removed.Fprintf(out, " — Baseline (%d lines, %d chars):\n", + len(baselineLines), len(baselineText)) + printPreviewLines(out, baselineLines, "- ", removed) + + fmt.Fprintln(out) + + // Optimized preview (added). + added := color.New(color.FgGreen) + added.Fprintf(out, " — Optimized (%d lines, %d chars):\n", + len(optimizedLines), len(optimized)) + printPreviewLines(out, optimizedLines, "+ ", added) + + // Suggest command to see the full diff. + baselinePath := filepath.Join(optimizationDir, "baseline", "config.json") + candidatePath := filepath.Join(optimizationDir, candidateID, "config.json") + fmt.Fprintf(out, "\n To see the full diff:\n") + fmt.Fprintf(out, " %s\n", + color.CyanString("diff %s %s", baselinePath, candidatePath)) +} + +// printPreviewLines prints up to maxDiffPreviewLines with a prefix, then "..." if truncated. +func printPreviewLines(out io.Writer, lines []string, prefix string, c *color.Color) { + limit := min(len(lines), maxDiffPreviewLines) + for _, line := range lines[:limit] { + c.Fprintf(out, " %s%s\n", prefix, line) + } + if len(lines) > maxDiffPreviewLines { + c.Fprintf(out, " %s... (%d more lines)\n", prefix, len(lines)-maxDiffPreviewLines) + } +} + +// extractInstructions retrieves the system prompt string from a candidate config +// returned by the optimization service. +func extractInstructions(config any) string { + m, ok := config.(map[string]any) + if !ok { + return "" + } + if v, exists := m["systemPrompt"]; exists { + if s, ok := v.(string); ok { + return s + } + } + if v, exists := m["instructions"]; exists { + if s, ok := v.(string); ok { + return s + } + } + return "" +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 8b53edca6cd..c823f84aa29 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -80,7 +80,7 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { return &OptimizeConfig{ Config: opteval.Config{ Agent: opteval.AgentRef{Name: agentName}, - Evaluators: []string{"builtin.task_adherence"}, + Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, InlineDataset: defaultDataset, Options: &opteval.Options{ @@ -136,7 +136,7 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi Model: c.Agent.Model, SystemPrompt: c.Agent.ResolvedSystemPrompt(), }, - Evaluators: c.Evaluators, + Evaluators: c.Evaluators.Names(), Options: optimize_api.OptimizeOptions{ EvalModel: c.Options.EvalModel, Budget: c.Options.Budget, @@ -240,8 +240,10 @@ func loadDatasetFile(path string) ([]optimize_api.DatasetTask, error) { } // loadSkillsFromDir reads skill files from a directory and returns SkillDefinitions. -// Each file in the directory is treated as a skill: the filename (without extension) -// becomes the skill name, and the file content becomes the skill body. +// For markdown files (.md), YAML frontmatter is parsed to extract name and description; +// the content after the frontmatter becomes the skill body. +// For other files, the filename (without extension) is used as the name and the full +// content as the body. // Subdirectories are recursed into — each file within is also loaded as a skill. func loadSkillsFromDir(dir string) ([]optimize_api.SkillDefinition, error) { entries, err := os.ReadDir(dir) @@ -267,12 +269,83 @@ func loadSkillsFromDir(dir string) ([]optimize_api.SkillDefinition, error) { return nil, fmt.Errorf("reading skill file %s: %w", entry.Name(), err) } - name := strings.TrimSuffix(entry.Name(), filepath.Ext(entry.Name())) - skills = append(skills, optimize_api.SkillDefinition{ - Name: name, - Body: string(data), - }) + skill := parseSkillFile(entry.Name(), string(data)) + skills = append(skills, skill) } return skills, nil } + +// skillFrontmatter represents the YAML frontmatter in a skill markdown file. +type skillFrontmatter struct { + Name string `yaml:"name"` + Description string `yaml:"description"` +} + +// parseSkillFile parses a skill file. For .md files it attempts to extract +// YAML frontmatter (delimited by "---") for name and description; the body +// is the content after the frontmatter. For other files, the filename (sans +// extension) is the name and the full content is the body. +func parseSkillFile(filename, content string) optimize_api.SkillDefinition { + ext := filepath.Ext(filename) + baseName := strings.TrimSuffix(filename, ext) + + if !strings.EqualFold(ext, ".md") { + return optimize_api.SkillDefinition{ + Name: baseName, + Body: content, + } + } + + // Try to parse YAML frontmatter from markdown. + fm, body := splitFrontmatter(content) + skill := optimize_api.SkillDefinition{ + Name: baseName, + Body: body, + } + + if fm != "" { + var meta skillFrontmatter + if err := yaml.Unmarshal([]byte(fm), &meta); err == nil { + if meta.Name != "" { + skill.Name = meta.Name + } + skill.Description = meta.Description + } + } + + return skill +} + +// splitFrontmatter splits YAML frontmatter (between "---" delimiters) from +// the rest of the content. Returns (frontmatter, body). If no frontmatter is +// found, returns ("", original content). +func splitFrontmatter(content string) (string, string) { + const delimiter = "---" + + scanner := bufio.NewScanner(strings.NewReader(content)) + if !scanner.Scan() { + return "", content + } + if strings.TrimSpace(scanner.Text()) != delimiter { + return "", content + } + + var fmLines []string + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == delimiter { + // Found closing delimiter — rest is the body. + var bodyLines []string + for scanner.Scan() { + bodyLines = append(bodyLines, scanner.Text()) + } + body := strings.Join(bodyLines, "\n") + return strings.Join(fmLines, "\n"), strings.TrimSpace(body) + } + fmLines = append(fmLines, line) + } + + // No closing delimiter found — treat entire content as body. + return "", content +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index e65a2714cb5..7ca6531af44 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -9,6 +9,7 @@ import ( "testing" "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/optimize_api" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -219,7 +220,7 @@ options: require.NotNil(t, cfg.Options) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) assert.Len(t, cfg.Evaluators, 1) - assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0]) + assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) require.NotNil(t, cfg.DatasetReference) assert.Equal(t, "eval-dataset", cfg.DatasetReference.Name) } @@ -276,7 +277,7 @@ options: // Evaluator — scalar string without builtin. prefix resolves as custom. require.Len(t, cfg.Evaluators, 1) - assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0]) + assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) // Options require.NotNil(t, cfg.Options) @@ -293,3 +294,87 @@ options: assert.Len(t, req.Dataset, 1) assert.Equal(t, []string{"builtin.task_adherence"}, req.Evaluators) } + +// --------------------------------------------------------------------------- +// parseSkillFile / loadSkillsFromDir +// --------------------------------------------------------------------------- + +func TestParseSkillFile_MarkdownWithFrontmatter(t *testing.T) { + t.Parallel() + content := `--- +name: policy-reviewer +description: Reviews a travel request against company travel policy. +--- + +# Policy Reviewer Skill + +Review travel requests and provide a friendly assessment. +` + skill := parseSkillFile("SKILL.md", content) + assert.Equal(t, "policy-reviewer", skill.Name) + assert.Equal(t, "Reviews a travel request against company travel policy.", skill.Description) + assert.Contains(t, skill.Body, "# Policy Reviewer Skill") + assert.Contains(t, skill.Body, "friendly assessment") + assert.NotContains(t, skill.Body, "---") +} + +func TestParseSkillFile_MarkdownWithoutFrontmatter(t *testing.T) { + t.Parallel() + content := "# Simple Skill\n\nDo something useful.\n" + skill := parseSkillFile("simple.md", content) + assert.Equal(t, "simple", skill.Name) + assert.Empty(t, skill.Description) + assert.Equal(t, content, skill.Body) +} + +func TestParseSkillFile_NonMarkdown(t *testing.T) { + t.Parallel() + content := "You are a helpful assistant." + skill := parseSkillFile("assistant.txt", content) + assert.Equal(t, "assistant", skill.Name) + assert.Empty(t, skill.Description) + assert.Equal(t, content, skill.Body) +} + +func TestParseSkillFile_FrontmatterNameOnly(t *testing.T) { + t.Parallel() + content := "---\nname: custom-name\n---\nBody content here.\n" + skill := parseSkillFile("ignored-filename.md", content) + assert.Equal(t, "custom-name", skill.Name) + assert.Empty(t, skill.Description) + assert.Equal(t, "Body content here.", skill.Body) +} + +func TestLoadSkillsFromDir_WithMarkdownSkills(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + md := "---\nname: reviewer\ndescription: Reviews things\n---\n\nReview body.\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(md), 0600)) + + txt := "Plain text skill body." + require.NoError(t, os.WriteFile(filepath.Join(dir, "helper.txt"), []byte(txt), 0600)) + + skills, err := loadSkillsFromDir(dir) + require.NoError(t, err) + require.Len(t, skills, 2) + + // Find each skill by name. + var mdSkill, txtSkill *optimize_api.SkillDefinition + for i := range skills { + switch skills[i].Name { + case "reviewer": + mdSkill = &skills[i] + case "helper": + txtSkill = &skills[i] + } + } + + require.NotNil(t, mdSkill) + assert.Equal(t, "Reviews things", mdSkill.Description) + assert.Contains(t, mdSkill.Body, "Review body.") + + require.NotNil(t, txtSkill) + assert.Empty(t, txtSkill.Description) + assert.Equal(t, txt, txtSkill.Body) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go index 79aacaf485b..6015b559c2b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go @@ -3,6 +3,14 @@ package dataset_api +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" +) + // CreateDatasetRequest is the request body for creating (uploading) a dataset. type CreateDatasetRequest struct { Name string `json:"name"` @@ -34,16 +42,48 @@ func (d *Dataset) ResolvedBlobURI() string { } // DatasetCredential is the response for dataset credential (SAS token) requests. +// The API returns a nested structure with blobReference and blobReferenceForConsumption. type DatasetCredential struct { + // Flat fields (legacy format). BlobURI string `json:"blob_uri,omitempty"` SAS string `json:"sas,omitempty"` - // SASUri is the full URI with SAS token appended, ready for download. - SASUri string `json:"sas_uri,omitempty"` + SASUri string `json:"sas_uri,omitempty"` + + // Nested fields (current API format). + BlobReference *BlobReference `json:"blobReference,omitempty"` + BlobReferenceConsumption *BlobReference `json:"blobReferenceForConsumption,omitempty"` +} + +// BlobReference represents a blob storage reference with credentials. +type BlobReference struct { + BlobURI string `json:"blobUri,omitempty"` + StorageAccountARM string `json:"storageAccountArmId,omitempty"` + Credential *BlobCredential `json:"credential,omitempty"` +} + +// BlobCredential holds SAS credential details for blob access. +type BlobCredential struct { + Type string `json:"type,omitempty"` + SASUri string `json:"sasUri,omitempty"` + SASPath string `json:"sas,omitempty"` } // ResolvedDownloadURI returns the URL to download the dataset. -// Prefers sas_uri (complete), falls back to blob_uri + sas query string. +// Prefers blobReferenceForConsumption.credential.sasUri (current API), +// then blobReference.credential.sasUri, then flat sas_uri, then blob_uri + sas. func (c *DatasetCredential) ResolvedDownloadURI() string { + // Current API format: nested blob references. + if c.BlobReferenceConsumption != nil && c.BlobReferenceConsumption.Credential != nil { + if uri := c.BlobReferenceConsumption.Credential.SASUri; uri != "" { + return uri + } + } + if c.BlobReference != nil && c.BlobReference.Credential != nil { + if uri := c.BlobReference.Credential.SASUri; uri != "" { + return uri + } + } + // Legacy flat format. if c.SASUri != "" { return c.SASUri } @@ -52,3 +92,97 @@ func (c *DatasetCredential) ResolvedDownloadURI() string { } return c.BlobURI } + +// PendingUploadResponse is returned by the startPendingUpload endpoint. +// It contains a SAS URI for uploading blob data and the blob container URI. +type PendingUploadResponse struct { + BlobReference *BlobReference `json:"blobReference,omitempty"` + BlobReferenceConsumption *BlobReference `json:"blobReferenceForConsumption,omitempty"` + PendingUploadID *string `json:"pendingUploadId,omitempty"` + PendingUploadType string `json:"pendingUploadType,omitempty"` + Version string `json:"version,omitempty"` +} + +// ResolvedUploadURI returns the SAS URI for uploading blobs. +func (p *PendingUploadResponse) ResolvedUploadURI() string { + if p.BlobReference != nil && p.BlobReference.Credential != nil { + if uri := p.BlobReference.Credential.SASUri; uri != "" { + return uri + } + } + return "" +} + +// ResolvedBlobURI returns the blob container URI (without SAS) for the finalize request. +func (p *PendingUploadResponse) ResolvedBlobURI() string { + if p.BlobReference != nil { + return p.BlobReference.BlobURI + } + return "" +} + +// FinalizeDatasetRequest is the request body for finalizing a dataset version +// after blob upload. +type FinalizeDatasetRequest struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Type string `json:"type"` + IsReference bool `json:"isReference"` + DataURI string `json:"dataUri"` +} + +// NextVersion computes the next dataset version string. +// +// Rules: +// 1. Empty → "1.0" +// 2. Parseable as a decimal number → increment by 1, format as "N.0" +// 3. Ends with trailing digits → increment the trailing numeric part +// 4. Otherwise → append ".1" +func NextVersion(current string) string { + current = strings.TrimSpace(current) + if current == "" { + return "1.0" + } + + // Try parsing as a decimal number (e.g. "1", "1.0", "2.0"). + if f, err := strconv.ParseFloat(current, 64); err == nil { + return strconv.FormatFloat(f+1, 'f', 1, 64) + } + + // Find trailing digits and increment them. + i := len(current) - 1 + for i >= 0 && current[i] >= '0' && current[i] <= '9' { + i-- + } + if i < len(current)-1 { + prefix := current[:i+1] + n, err := strconv.Atoi(current[i+1:]) + if err == nil { + return prefix + strconv.Itoa(n+1) + } + } + + return current + ".1" +} + +// ReadFirstJSONLFile finds and reads the first .jsonl file in a directory. +func ReadFirstJSONLFile(dir string) (string, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return "", fmt.Errorf("reading directory: %w", err) + } + for _, e := range entries { + if e.IsDir() { + continue + } + if filepath.Ext(e.Name()) == ".jsonl" { + data, err := os.ReadFile(filepath.Join(dir, e.Name())) //nolint:gosec // local artifact path + if err != nil { + return "", fmt.Errorf("reading %s: %w", e.Name(), err) + } + return string(data), nil + } + } + return "", fmt.Errorf("no .jsonl file found in %s", dir) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go index 2f3a083066d..ad764c9e44f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models_test.go @@ -81,6 +81,29 @@ func TestDatasetCredential_ResolvedDownloadURI(t *testing.T) { cred: DatasetCredential{}, expected: "", }, + { + name: "prefers blobReferenceForConsumption", + cred: DatasetCredential{ + BlobReference: &BlobReference{Credential: &BlobCredential{SASUri: "https://blob.example/ref?sig=1"}}, + BlobReferenceConsumption: &BlobReference{Credential: &BlobCredential{SASUri: "https://blob.example/consumption?sig=2"}}, + }, + expected: "https://blob.example/consumption?sig=2", + }, + { + name: "falls back to blobReference", + cred: DatasetCredential{ + BlobReference: &BlobReference{Credential: &BlobCredential{SASUri: "https://blob.example/ref?sig=1"}}, + }, + expected: "https://blob.example/ref?sig=1", + }, + { + name: "nested takes priority over flat sas_uri", + cred: DatasetCredential{ + SASUri: "https://blob.example/flat?sig=flat", + BlobReference: &BlobReference{Credential: &BlobCredential{SASUri: "https://blob.example/nested?sig=nested"}}, + }, + expected: "https://blob.example/nested?sig=nested", + }, } for _, tc := range tests { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go index 9027bfd4ade..58da5cf7cf4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go @@ -12,6 +12,7 @@ import ( "log" "net/http" "net/url" + "strings" "azureaiagent/internal/version" @@ -69,6 +70,114 @@ func (c *DatasetClient) CreateDataset( return doRequestTyped[Dataset](c, ctx, http.MethodPost, pathDatasets, nil, request, apiVersion) } +// UploadNewVersion reads the first JSONL file from localDir, computes the next +// version from currentVersion, and uploads it as a new dataset version using +// the 3-step pending upload flow: +// 1. startPendingUpload → get SAS URI +// 2. Upload blob to SAS URI +// 3. Finalize dataset version with dataUri +func (c *DatasetClient) UploadNewVersion( + ctx context.Context, + name string, + currentVersion string, + localDir string, + apiVersion string, +) (*Dataset, error) { + content, err := ReadFirstJSONLFile(localDir) + if err != nil { + return nil, fmt.Errorf("reading dataset from %s: %w", localDir, err) + } + + newVersion := NextVersion(currentVersion) + + // Step 1: Start pending upload to get a SAS URI. + pending, err := c.StartPendingUpload(ctx, name, newVersion, apiVersion) + if err != nil { + return nil, fmt.Errorf("starting pending upload: %w", err) + } + + uploadURI := pending.ResolvedUploadURI() + if uploadURI == "" { + return nil, fmt.Errorf("no upload SAS URI returned from startPendingUpload") + } + + // Step 2: Upload the JSONL file to blob storage. + blobName := name + ".jsonl" + if err := c.UploadBlob(ctx, uploadURI, blobName, []byte(content)); err != nil { + return nil, fmt.Errorf("uploading blob: %w", err) + } + + // Step 3: Finalize the dataset version. + dataURI := pending.ResolvedBlobURI() + return c.FinalizeDatasetVersion(ctx, name, newVersion, dataURI, apiVersion) +} + +// StartPendingUpload initiates a pending upload for a dataset version. +// Returns the SAS URI and blob reference for uploading data. +func (c *DatasetClient) StartPendingUpload( + ctx context.Context, + name string, + version string, + apiVersion string, +) (*PendingUploadResponse, error) { + path := fmt.Sprintf( + "%s/%s/versions/%s/startPendingUpload", + pathDatasets, url.PathEscape(name), url.PathEscape(version), + ) + return doRequestTyped[PendingUploadResponse](c, ctx, http.MethodPost, path, nil, json.RawMessage(`{}`), apiVersion) +} + +// UploadBlob uploads data to a container SAS URI as a block blob. +func (c *DatasetClient) UploadBlob(ctx context.Context, containerSASUri, blobName string, data []byte) error { + u, err := url.Parse(containerSASUri) + if err != nil { + return fmt.Errorf("invalid container SAS URI: %w", err) + } + + // Append blob name to the container path. + u.Path = strings.TrimSuffix(u.Path, "/") + "/" + blobName + + req, err := http.NewRequestWithContext(ctx, http.MethodPut, u.String(), bytes.NewReader(data)) + if err != nil { + return fmt.Errorf("failed to create upload request: %w", err) + } + req.Header.Set("x-ms-blob-type", "BlockBlob") + req.Header.Set("Content-Type", "application/octet-stream") + + httpClient := &http.Client{} + resp, err := httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to upload blob: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("blob upload failed with status %d: %s", resp.StatusCode, string(body)) + } + + return nil +} + +// FinalizeDatasetVersion completes the dataset version after blob upload +// by sending the metadata (name, version, dataUri) to the API. +func (c *DatasetClient) FinalizeDatasetVersion( + ctx context.Context, + name string, + version string, + dataURI string, + apiVersion string, +) (*Dataset, error) { + path := fmt.Sprintf("%s/%s/versions/%s", pathDatasets, url.PathEscape(name), url.PathEscape(version)) + request := &FinalizeDatasetRequest{ + Name: name, + Version: version, + Type: "uri_file", + DataURI: dataURI, + } + return doRequestTyped[Dataset](c, ctx, http.MethodPut, path, nil, request, apiVersion) +} + // GetDataset retrieves metadata for a dataset by name and version. func (c *DatasetClient) GetDataset( ctx context.Context, @@ -128,6 +237,109 @@ func (c *DatasetClient) DownloadDataset(ctx context.Context, downloadURL string) return data, nil } +// ListContainerBlobs lists blobs in a container using a container-level SAS URI. +// The containerSASUri should include the SAS token (e.g., from credential.sasUri with sr=c). +// Returns a list of blob names found in the container. +func (c *DatasetClient) ListContainerBlobs(ctx context.Context, containerSASUri string) ([]string, error) { + // Parse the container URI and append list query parameters. + u, err := url.Parse(containerSASUri) + if err != nil { + return nil, fmt.Errorf("invalid container SAS URI: %w", err) + } + + q := u.Query() + q.Set("restype", "container") + q.Set("comp", "list") + u.RawQuery = q.Encode() + + log.Printf("[dataset_api] listing blobs: %s", u.String()) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return nil, fmt.Errorf("failed to create list request: %w", err) + } + + httpClient := &http.Client{} + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to list container blobs: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("container list failed with status %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read list response: %w", err) + } + + // Parse XML blob listing to extract blob names. + names := parseBlobNames(string(body)) + log.Printf("[dataset_api] found %d blobs in container", len(names)) + return names, nil +} + +// DownloadBlob downloads a single blob from a container using the container SAS URI +// and the blob name. Returns the blob content as bytes. +func (c *DatasetClient) DownloadBlob(ctx context.Context, containerSASUri, blobName string) ([]byte, error) { + u, err := url.Parse(containerSASUri) + if err != nil { + return nil, fmt.Errorf("invalid container SAS URI: %w", err) + } + + // Append blob name to the container path. + u.Path = strings.TrimSuffix(u.Path, "/") + "/" + blobName + + log.Printf("[dataset_api] downloading blob: %s", u.String()) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return nil, fmt.Errorf("failed to create blob download request: %w", err) + } + + httpClient := &http.Client{} + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to download blob: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("blob download failed with status %d for %s", resp.StatusCode, blobName) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read blob content: %w", err) + } + + log.Printf("[dataset_api] downloaded blob %s (%d bytes)", blobName, len(data)) + return data, nil +} + +// parseBlobNames extracts blob names from the Azure Blob Storage XML list response. +func parseBlobNames(xmlBody string) []string { + var names []string + // Simple extraction — look for ... within elements. + remaining := xmlBody + for { + start := strings.Index(remaining, "") + if start == -1 { + break + } + remaining = remaining[start+len(""):] + end := strings.Index(remaining, "") + if end == -1 { + break + } + names = append(names, remaining[:end]) + remaining = remaining[end:] + } + return names +} + // doRequest performs an HTTP request against the dataset API and returns the raw response body. func (c *DatasetClient) doRequest( ctx context.Context, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index ef5bf4a5c42..00036744a78 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -7,17 +7,21 @@ import ( "context" "encoding/json" "fmt" - "log" "os" "path/filepath" + "strings" "time" "azureaiagent/internal/pkg/agents/dataset_api" "azureaiagent/internal/pkg/agents/opteval" ) -// foundryDir is the directory under .azure where eval artifacts are stored. -const foundryDir = ".azure/.foundry" +// Artifact directory names relative to the agent project root. +const ( + EvaluatorsDir = "evaluators" + DatasetsDir = "datasets" + EvaluatorContractFile = "rubric_dimensions.json" +) // ResolveEvalOutputPath resolves the eval output config path. If output is // already absolute it is returned as-is; otherwise it is joined with the @@ -35,107 +39,195 @@ func ResolveEvalConfigPath(config, agentProject string) string { return ResolveEvalOutputPath(config, agentProject) } -// EnsureFoundryDirs creates the .azure/.foundry directory tree under the -// project root if it doesn't already exist. -func EnsureFoundryDirs(projectRoot string) error { - dir := filepath.Join(projectRoot, foundryDir) - return os.MkdirAll(dir, 0750) -} - -// DownloadDatasetArtifact downloads the dataset referenced by dsRef and saves -// it under .azure/.foundry/datasets/.jsonl. +// DownloadDatasetArtifact downloads the dataset and writes it locally. +// If the download fails (e.g., non-TLS test server), it returns nil gracefully. +// On success it returns the relative local URI (datasets///) for the +// downloaded directory. The SAS URI may point to a container (downloads all blobs) +// or a single blob. func DownloadDatasetArtifact( ctx context.Context, client *dataset_api.DatasetClient, - projectRoot string, - dsRef *opteval.DatasetRef, + agentProject string, + ref *opteval.DatasetRef, apiVersion string, -) error { - if dsRef == nil || dsRef.Name == "" { - return fmt.Errorf("dataset reference is empty") - } - - ds, err := client.GetDataset(ctx, dsRef.Name, dsRef.Version, apiVersion) - if err != nil { - return fmt.Errorf("failed to get dataset %q: %w", dsRef.Name, err) +) (string, error) { + if ref == nil || ref.Name == "" { + return "", nil } - cred, err := client.GetDatasetCredential(ctx, dsRef.Name, dsRef.Version, apiVersion) - if err != nil { - return fmt.Errorf("failed to get dataset credential: %w", err) + // Attempt full download via the dataset API. + cred, credErr := client.GetDatasetCredential(ctx, ref.Name, ref.Version, apiVersion) + if credErr != nil { + return "", nil } downloadURL := cred.ResolvedDownloadURI() if downloadURL == "" { - downloadURL = ds.ResolvedBlobURI() - } - if downloadURL == "" { - return fmt.Errorf("no download URL available for dataset %q", dsRef.Name) + return "", nil } - data, err := client.DownloadDataset(ctx, downloadURL) - if err != nil { - return fmt.Errorf("failed to download dataset: %w", err) + destDir := DatasetArtifactPath(agentProject, ref) + if err := os.MkdirAll(destDir, 0750); err != nil { + return "", fmt.Errorf("creating dataset artifact dir: %w", err) } - dir := filepath.Join(projectRoot, foundryDir, "datasets") - if err := os.MkdirAll(dir, 0750); err != nil { - return fmt.Errorf("failed to create dataset dir: %w", err) + // Determine if this is a container-level SAS (sr=c) or blob-level. + if isContainerSAS(downloadURL) { + blobs, err := client.ListContainerBlobs(ctx, downloadURL) + if err != nil { + return "", nil + } + if len(blobs) == 0 { + return "", nil + } + for _, blobName := range blobs { + data, dlErr := client.DownloadBlob(ctx, downloadURL, blobName) + if dlErr != nil { + continue + } + dest := filepath.Join(destDir, filepath.FromSlash(blobName)) + if err := os.MkdirAll(filepath.Dir(dest), 0750); err != nil { + continue + } + if err := os.WriteFile(dest, data, 0600); err != nil { + continue + } + } + } else { + // Single blob download. + data, dlErr := client.DownloadDataset(ctx, downloadURL) + if dlErr != nil { + return "", nil + } + // Infer filename from URL. + filename := filenameFromURL(downloadURL) + dest := filepath.Join(destDir, filename) + if err := os.WriteFile(dest, data, 0600); err != nil { + return "", fmt.Errorf("writing dataset artifact: %w", err) + } } - path := filepath.Join(dir, dsRef.Name+".jsonl") - if err := os.WriteFile(path, data, 0600); err != nil { - return fmt.Errorf("failed to write dataset artifact: %w", err) + return DatasetLocalURI(ref.Name), nil +} + +// isContainerSAS checks if a SAS URI is container-scoped (sr=c in query). +func isContainerSAS(rawURL string) bool { + idx := strings.IndexByte(rawURL, '?') + if idx == -1 { + return false + } + query := rawURL[idx+1:] + // Look for sr=c parameter. + for _, param := range strings.Split(query, "&") { + if param == "sr=c" { + return true + } } + return false +} - return nil +// filenameFromURL extracts the filename from a blob URL path. +// Falls back to "data.jsonl" if unable to determine. +func filenameFromURL(rawURL string) string { + path := rawURL + if idx := strings.IndexByte(path, '?'); idx != -1 { + path = path[:idx] + } + parts := strings.Split(path, "/") + if len(parts) > 0 { + name := parts[len(parts)-1] + if name != "" && strings.Contains(name, ".") { + return name + } + } + return "data.jsonl" } -// DatasetArtifactPath returns the local path where a downloaded dataset -// artifact is stored. -func DatasetArtifactPath(projectRoot string, dsRef *opteval.DatasetRef) string { - if dsRef == nil || dsRef.Name == "" { +// DatasetArtifactPath returns the local filesystem path for a downloaded dataset directory. +func DatasetArtifactPath(agentProject string, ref *opteval.DatasetRef) string { + if ref == nil || ref.Name == "" { return "" } - return filepath.Join(projectRoot, foundryDir, "datasets", dsRef.Name+".jsonl") + return filepath.Join(agentProject, DatasetsDir, ref.Name) +} + +// DatasetLocalURI returns the relative path (from the agent project root) +// to a dataset artifact directory. This is the value stored in DatasetRef.LocalURI. +func DatasetLocalURI(name string) string { + return filepath.Join(DatasetsDir, name) +} + +// evaluatorDir returns the full path to an evaluator's local directory. +func evaluatorDir(agentProject, name string) string { + return filepath.Join(agentProject, EvaluatorsDir, name) +} + +// EvaluatorLocalURI returns the relative path (from the agent project root) +// to an evaluator artifact file. This is the value stored in EvaluatorRef.LocalURI. +func EvaluatorLocalURI(name string) string { + return filepath.Join(EvaluatorsDir, name, EvaluatorContractFile) } -// SaveEvaluatorResult saves the raw JSON result of an evaluator generation job -// under .azure/.foundry/evaluators/.json. -func SaveEvaluatorResult(projectRoot, evaluatorName string, result json.RawMessage) { +// SaveEvaluatorResult extracts the rubric dimensions from the evaluator result +// and saves them as the local artifact. Only dimensions are persisted so that +// users can edit weights/descriptions and upload a new evaluator version. +func SaveEvaluatorResult(agentProject, evaluatorName string, result json.RawMessage) { if evaluatorName == "" || len(result) == 0 { return } - dir := filepath.Join(projectRoot, foundryDir, "evaluators") + dir := evaluatorDir(agentProject, evaluatorName) if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create evaluator dir: %v", err) return } - path := filepath.Join(dir, evaluatorName+".json") - if err := os.WriteFile(path, result, 0600); err != nil { - log.Printf("[debug] failed to save evaluator result: %v", err) - } -} -// WriteEvalReviewArtifacts writes human-readable review artifacts for the eval -// config under .azure/.foundry/review/. -func WriteEvalReviewArtifacts(projectRoot string, cfg *EvalConfig) { - if cfg == nil { + // Parse the evaluator result to extract the rubric dimensions. + parsed := ParseEvaluatorResult(result) + if parsed == nil || len(parsed.Definition.Dimensions) == 0 { return } - dir := filepath.Join(projectRoot, foundryDir, "review") - if err := os.MkdirAll(dir, 0750); err != nil { - log.Printf("[debug] failed to create review dir: %v", err) + + formatted, err := json.MarshalIndent(parsed.Definition.Dimensions, "", " ") + if err != nil { return } - data, err := json.MarshalIndent(cfg, "", " ") - if err != nil { - log.Printf("[debug] failed to marshal eval config for review: %v", err) + + path := filepath.Join(dir, EvaluatorContractFile) + _ = os.WriteFile(path, formatted, 0600) +} + +// PrintEvaluatorDimensions prints a compact table of rubric dimensions. +func PrintEvaluatorDimensions(parsed *EvaluatorResult) { + dims := parsed.Definition.Dimensions + fmt.Printf("\n Evaluator dimensions (%d):\n", len(dims)) + fmt.Println(" Weight Dimension") + fmt.Println(" ────── ─────────") + for _, d := range dims { + fmt.Printf(" %6d %s\n", d.Weight, d.ID) + } +} + +// WriteEvalReviewArtifacts writes human-readable review artifacts for evaluators. +// It writes a stub YAML file for each evaluator unless a result JSON already exists. +func WriteEvalReviewArtifacts(agentProject string, cfg *EvalConfig) { + if cfg == nil { return } - path := filepath.Join(dir, "eval-config.json") - if err := os.WriteFile(path, data, 0600); err != nil { - log.Printf("[debug] failed to write review artifact: %v", err) + for _, evaluator := range cfg.Evaluators { + if evaluator.Name == "" || IsBuiltinEvaluator(evaluator.Name) { + continue + } + dir := evaluatorDir(agentProject, evaluator.Name) + if err := os.MkdirAll(dir, 0750); err != nil { + continue + } + // Skip if a result JSON already exists. + jsonPath := filepath.Join(dir, EvaluatorContractFile) + if _, err := os.Stat(jsonPath); err == nil { + continue + } + yamlPath := filepath.Join(dir, evaluator.Name+".yaml") + stub := fmt.Sprintf("# Evaluator stub: %s\nname: %s\n", evaluator.Name, evaluator.Name) + _ = os.WriteFile(yamlPath, []byte(stub), 0600) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index 3f7ade918f8..e56395ad824 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -108,11 +108,11 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR evalModel = c.Options.EvalModel } for _, evaluator := range c.Evaluators { - apiName := strings.TrimPrefix(evaluator, "builtin.") + apiName := strings.TrimPrefix(evaluator.Name, "builtin.") criterion := TestingCriterion{ Type: "azure_ai_evaluator", Name: apiName, - EvaluatorName: evaluator, + EvaluatorName: evaluator.Name, DataMapping: map[string]string{ //"messages": "{{item.messages}}", "query": "{{item.query}}", diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index c9ed411b1a4..eff24a84597 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -98,14 +98,14 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { Config: opteval.Config{ Name: "full-test", Agent: opteval.AgentRef{ - Name: "booking-agent", - Kind: "hosted", - Version: "v3", - Model: "gpt-4.1", - SystemPrompt: "This agent handles restaurant reservations", + Name: "booking-agent", + Kind: "hosted", + Version: "v3", + Model: "gpt-4.1", + Instruction: opteval.InstructionRef{Value: "This agent handles restaurant reservations"}, }, DatasetReference: &opteval.DatasetRef{Name: "golden-data", Version: "v2"}, - Evaluators: []string{"builtin.task_adherence", "custom-quality"}, + Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, }, Options: &opteval.Options{ EvalModel: "gpt-4o", @@ -126,10 +126,10 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { assert.Equal(t, "golden-data", loaded.DatasetReference.Name) assert.Equal(t, "v2", loaded.DatasetReference.Version) require.Len(t, loaded.Evaluators, 2) - assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0]) - assert.Equal(t, "custom-quality", loaded.Evaluators[1]) + assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) + assert.Equal(t, "custom-quality", loaded.Evaluators[1].Name) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, "This agent handles restaurant reservations", loaded.Agent.SystemPrompt) + assert.Equal(t, "This agent handles restaurant reservations", loaded.Agent.Instruction.Value) assert.Equal(t, 75, loaded.MaxSamples) } @@ -153,7 +153,7 @@ func TestEvalConfig_RoundTrip_MinimalFields(t *testing.T) { assert.Equal(t, "data.jsonl", loaded.DatasetFile) assert.Nil(t, loaded.DatasetReference) assert.Empty(t, loaded.Evaluators) - assert.Empty(t, loaded.Agent.SystemPrompt) + assert.True(t, loaded.Agent.Instruction.IsEmpty()) assert.Zero(t, loaded.MaxSamples) } @@ -199,7 +199,7 @@ func TestToAgentTargetAdaptableEvalGroupRequest_WithEvaluators(t *testing.T) { Config: opteval.Config{ Name: "test-eval", Agent: opteval.AgentRef{Name: "agent-1", Version: "v1"}, - Evaluators: []string{"builtin.quality", "custom-1"}, + Evaluators: opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "custom-1"}}, DatasetFile: "tasks.jsonl", }, Options: &opteval.Options{EvalModel: "gpt-4o"}, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index d678cbd614d..699ce2680f1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -7,6 +7,8 @@ import ( "path/filepath" "strings" "time" + + "azureaiagent/internal/pkg/agents/opteval" ) // --------------------------------------------------------------------------- @@ -98,7 +100,6 @@ func NewEvaluatorGenerationJobRequest( return &EvaluatorGenerationJobRequest{ Name: name, EvaluatorName: name, - Category: "quality", Model: evalModel, Sources: sources, } @@ -116,9 +117,9 @@ func IsBuiltinEvaluator(name string) bool { // SplitEvaluators partitions evaluators into generated (non-builtin) and // built-in lists. -func SplitEvaluators(evaluators []string) (generated, builtin []string) { +func SplitEvaluators(evaluators opteval.EvaluatorList) (generated, builtin opteval.EvaluatorList) { for _, e := range evaluators { - if IsBuiltinEvaluator(e) { + if IsBuiltinEvaluator(e.Name) { builtin = append(builtin, e) } else { generated = append(generated, e) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go index 1fe67341922..5710e733eb5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go @@ -6,6 +6,8 @@ package eval_api import ( "testing" + "azureaiagent/internal/pkg/agents/opteval" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -101,16 +103,20 @@ func TestSplitEvaluators(t *testing.T) { t.Run("mixed", func(t *testing.T) { t.Parallel() - gen, bi := SplitEvaluators([]string{"builtin.task_adherence", "my-quality", "builtin.safety"}) - assert.Equal(t, []string{"my-quality"}, gen) - assert.Equal(t, []string{"builtin.task_adherence", "builtin.safety"}, bi) + gen, bi := SplitEvaluators(opteval.EvaluatorList{ + {Name: "builtin.task_adherence"}, {Name: "my-quality"}, {Name: "builtin.safety"}, + }) + assert.Equal(t, opteval.EvaluatorList{{Name: "my-quality"}}, gen) + assert.Equal(t, opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, bi) }) t.Run("all builtin", func(t *testing.T) { t.Parallel() - gen, bi := SplitEvaluators([]string{"builtin.quality", "builtin.safety"}) + gen, bi := SplitEvaluators(opteval.EvaluatorList{ + {Name: "builtin.quality"}, {Name: "builtin.safety"}, + }) assert.Nil(t, gen) - assert.Equal(t, []string{"builtin.quality", "builtin.safety"}, bi) + assert.Equal(t, opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, bi) }) t.Run("nil", func(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index b71a4470a9e..f8eb38cb060 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -63,24 +63,19 @@ func (j *GenerationJob) NormalizedStatus() string { return j.Status } -// ResolvedDatasetName returns the dataset name from the result JSON (top-level -// or nested outputs[0]). -func (j *GenerationJob) ResolvedDatasetName() string { - return j.resultStringField("name") -} - -// ResolvedDatasetVersion returns the dataset version from the result JSON -// (top-level or nested outputs[0]), defaulting to "v1". -func (j *GenerationJob) ResolvedDatasetVersion() string { - if v := j.resultStringField("version"); v != "" { - return v +// ResolvedNameVersion extracts the name and version from the generation job result. +// If name is empty, both return values are empty (caller should treat as no result). +// If version is empty, it defaults to "latest". +func (j *GenerationJob) ResolvedNameVersion() (string, string) { + name := j.resultStringField("name") + if name == "" { + return "", "" } - return "v1" -} - -// ResolvedEvaluatorName returns the evaluator name from the result JSON. -func (j *GenerationJob) ResolvedEvaluatorName() string { - return j.resultStringField("name") + version := j.resultStringField("version") + if version == "" { + version = "latest" + } + return name, version } // resultStringField extracts a string field from the raw Result JSON. @@ -132,6 +127,58 @@ type EvaluatorGenerationJobRequest struct { Sources []GenerationSource `json:"sources"` } +// --------------------------------------------------------------------------- +// Evaluator Versions +// --------------------------------------------------------------------------- + +// EvaluatorVersion is the response for evaluator version operations. +type EvaluatorVersion struct { + Name string `json:"name"` + Version string `json:"version"` +} + +// --------------------------------------------------------------------------- +// Evaluator Definition (Rubric) +// --------------------------------------------------------------------------- + +// EvaluatorResult is the top-level response from evaluator generation, +// containing the evaluator's definition. +type EvaluatorResult struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` + Definition EvaluatorDefinition `json:"definition"` +} + +// EvaluatorDefinition describes an evaluator's scoring rubric. +type EvaluatorDefinition struct { + Type string `json:"type"` + Dimensions []EvaluatorDimension `json:"dimensions"` +} + +// EvaluatorDimension is a single scoring dimension within a rubric evaluator. +type EvaluatorDimension struct { + ID string `json:"id"` + Description string `json:"description,omitempty"` + Weight int `json:"weight"` + AlwaysApplicable bool `json:"always_applicable,omitempty"` +} + +// ParseEvaluatorResult parses a GenerationJob result into a structured EvaluatorResult. +// Returns nil if the result cannot be parsed. +func ParseEvaluatorResult(result json.RawMessage) *EvaluatorResult { + if len(result) == 0 { + return nil + } + var r EvaluatorResult + if err := json.Unmarshal(result, &r); err != nil { + return nil + } + if len(r.Definition.Dimensions) == 0 { + return nil + } + return &r +} + // --------------------------------------------------------------------------- // Datasets // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go index e51ab769177..5e67ac9b54b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go @@ -27,6 +27,7 @@ import ( const ( pathDataGenerationJobs = "/data_generation_jobs" pathEvaluatorGenerationJobs = "/evaluator_generation_jobs" + pathEvaluators = "/evaluators" pathDatasets = "/datasets" pathOpenAIEvals = "/openai/evals" ) @@ -104,6 +105,33 @@ func (c *EvalClient) GetEvaluatorGenerationJob( return doRequestTyped[GenerationJob](c, ctx, http.MethodGet, path, nil, nil, apiVersion) } +// CreateEvaluatorVersion creates a new version of a named evaluator. +// The body should be the full evaluator JSON with the definition field updated. +func (c *EvalClient) CreateEvaluatorVersion( + ctx context.Context, + name string, + body json.RawMessage, + apiVersion string, +) (*EvaluatorVersion, error) { + path := pathEvaluators + "/" + url.PathEscape(name) + "/versions" + return doRequestTyped[EvaluatorVersion](c, ctx, http.MethodPost, path, nil, body, apiVersion) +} + +// GetEvaluatorRaw gets an evaluator by name and version as raw JSON. +// If version is empty, the latest version is fetched. +func (c *EvalClient) GetEvaluatorRaw( + ctx context.Context, + name string, + version string, + apiVersion string, +) (json.RawMessage, error) { + path := pathEvaluators + "/" + url.PathEscape(name) + if version != "" { + path += "/versions/" + url.PathEscape(version) + } + return c.doRequest(ctx, http.MethodGet, path, nil, nil, apiVersion) +} + // CreateOpenAIEval creates an OpenAI eval definition. func (c *EvalClient) CreateOpenAIEval( ctx context.Context, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go index 4e9c78510b2..14215ad407a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go @@ -123,7 +123,8 @@ func TestGetDataGenerationJob_Success(t *testing.T) { require.NoError(t, err) assert.Equal(t, "/data_generation_jobs/op-123", capturedPath) assert.Equal(t, "completed", result.Status) - assert.Equal(t, "test-ds", result.ResolvedDatasetName()) + name, _ := result.ResolvedNameVersion() + assert.Equal(t, "test-ds", name) } // --------------------------------------------------------------------------- @@ -182,7 +183,8 @@ func TestGetEvaluatorGenerationJob_Success(t *testing.T) { require.NoError(t, err) assert.Equal(t, "/evaluator_generation_jobs/eval-op-456", capturedPath) assert.Equal(t, "completed", result.Status) - assert.Equal(t, "quality", result.ResolvedEvaluatorName()) + name, _ := result.ResolvedNameVersion() + assert.Equal(t, "quality", name) } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go new file mode 100644 index 00000000000..85b52321e82 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "encoding/base64" + "fmt" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/google/uuid" +) + +// PortalPrefix holds the parsed project context needed to construct Foundry portal URLs. +type PortalPrefix struct { + prefix string // e.g. "https://ai.azure.com/nextgen/r/,,,," +} + +// NewPortalPrefix parses an ARM project resource ID and returns a PortalPrefix +// that can be reused to build multiple portal URLs. +// Returns an error if the resource ID is invalid or not a Foundry project. +func NewPortalPrefix(projectResourceID string) (*PortalPrefix, error) { + resourceID, err := arm.ParseResourceID(projectResourceID) + if err != nil { + return nil, fmt.Errorf("failed to parse project resource ID: %w", err) + } + + encodedSub, err := encodeSubscriptionForURL(resourceID.SubscriptionID) + if err != nil { + return nil, fmt.Errorf("failed to encode subscription ID: %w", err) + } + + if resourceID.Parent == nil || + !strings.Contains(string(resourceID.ResourceType.Type), "/") { + return nil, fmt.Errorf( + "resource ID does not represent a Foundry project (missing parent account): %s", + projectResourceID, + ) + } + + prefix := fmt.Sprintf( + "https://ai.azure.com/nextgen/r/%s,%s,,%s,%s", + encodedSub, resourceID.ResourceGroupName, + resourceID.Parent.Name, resourceID.Name, + ) + return &PortalPrefix{prefix: prefix}, nil +} + +// EvalRunURL returns the portal URL for an eval run report. +func (p *PortalPrefix) EvalRunURL(evalID, runID string) string { + return fmt.Sprintf("%s/build/evaluations/%s/run/%s", p.prefix, evalID, runID) +} + +// EvaluatorURL returns the portal URL for a generated evaluator. +func (p *PortalPrefix) EvaluatorURL(evaluatorName, version string) string { + return fmt.Sprintf("%s/build/evaluations/catalog/%s/%s", p.prefix, evaluatorName, version) +} + +// DatasetURL returns the portal URL for a dataset. +func (p *PortalPrefix) DatasetURL(datasetName, version string) string { + return fmt.Sprintf("%s/build/data/datasets/%s/%s", p.prefix, datasetName, version) +} + +// encodeSubscriptionForURL encodes a subscription ID GUID as base64 without padding. +func encodeSubscriptionForURL(subscriptionID string) (string, error) { + guid, err := uuid.Parse(subscriptionID) + if err != nil { + return "", fmt.Errorf("invalid subscription ID format: %w", err) + } + guidBytes, _ := guid.MarshalBinary() + return strings.TrimRight(base64.URLEncoding.EncodeToString(guidBytes), "="), nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 41d9d5cb234..62aec1729bd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -23,41 +23,201 @@ import ( // Runtime state (operation IDs, eval IDs, status) is stored in // the azd environment rather than in this config file. type Config struct { - Name string `yaml:"name,omitempty"` - Agent AgentRef `yaml:"agent"` - DatasetFile string `yaml:"dataset_file,omitempty"` - DatasetReference *DatasetRef `yaml:"dataset_reference,omitempty"` - Evaluators []string `yaml:"evaluators,omitempty"` + Name string `yaml:"name,omitempty"` + Agent AgentRef `yaml:"agent"` + DatasetFile string `yaml:"dataset_file,omitempty"` + DatasetReference *DatasetRef `yaml:"dataset_reference,omitempty"` + Evaluators EvaluatorList `yaml:"evaluators,omitempty"` +} + +// EvaluatorRef describes an evaluator. It can be a simple string name or a +// structured entry with name, version, and local_uri. +type EvaluatorRef struct { + Name string `yaml:"name" json:"name"` + Version string `yaml:"version,omitempty" json:"version,omitempty"` + LocalURI string `yaml:"local_uri,omitempty" json:"local_uri,omitempty"` +} + +// EvaluatorList is a list of evaluators that supports mixed YAML: +// +// evaluators: +// - builtin.task_adherence +// - name: custom-quality +// version: "2" +// local_uri: evaluators/custom-quality_2.json +type EvaluatorList []EvaluatorRef + +// UnmarshalYAML handles both plain string and mapping entries. +func (el *EvaluatorList) UnmarshalYAML(value *yaml.Node) error { + if value.Kind != yaml.SequenceNode { + return fmt.Errorf("evaluators must be a sequence, got %v", value.Kind) + } + + result := make([]EvaluatorRef, 0, len(value.Content)) + for _, item := range value.Content { + switch item.Kind { + case yaml.ScalarNode: + // Plain string entry: "builtin.task_adherence" + result = append(result, EvaluatorRef{Name: item.Value}) + case yaml.MappingNode: + // Structured entry: {name: ..., version: ..., local_uri: ...} + var ref EvaluatorRef + if err := item.Decode(&ref); err != nil { + return fmt.Errorf("parsing evaluator entry: %w", err) + } + result = append(result, ref) + default: + return fmt.Errorf("unexpected evaluator entry type: %v", item.Kind) + } + } + *el = result + return nil +} + +// MarshalYAML emits plain strings for simple evaluators and mappings for +// structured ones (those with version or local_uri). +func (el EvaluatorList) MarshalYAML() (any, error) { + nodes := make([]*yaml.Node, 0, len(el)) + for _, ref := range el { + if ref.Version == "" && ref.LocalURI == "" { + // Emit as a plain string. + nodes = append(nodes, &yaml.Node{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: ref.Name, + }) + } else { + // Emit as a mapping. + var n yaml.Node + if err := n.Encode(ref); err != nil { + return nil, err + } + nodes = append(nodes, &n) + } + } + return &yaml.Node{Kind: yaml.SequenceNode, Content: nodes}, nil +} + +// Names returns the evaluator names as a plain string slice. +func (el EvaluatorList) Names() []string { + names := make([]string, len(el)) + for i, ref := range el { + names[i] = ref.Name + } + return names +} + +// FindByLocalURI returns all evaluators that have a local_uri set. +func (el EvaluatorList) FindByLocalURI() []EvaluatorRef { + var refs []EvaluatorRef + for _, ref := range el { + if ref.LocalURI != "" { + refs = append(refs, ref) + } + } + return refs +} + +// SetVersion updates the version of a named evaluator in the list. +func (el EvaluatorList) SetVersion(name, version string) { + for i := range el { + if el[i].Name == name { + el[i].Version = version + return + } + } +} + +// SetLocalURI updates the local_uri of a named evaluator in the list. +func (el EvaluatorList) SetLocalURI(name, uri string) { + for i := range el { + if el[i].Name == name { + el[i].LocalURI = uri + return + } + } } // AgentRef references the agent under evaluation/optimization. type AgentRef struct { - Name string `yaml:"name"` - Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` - Version string `yaml:"version,omitempty"` - Model string `yaml:"model,omitempty"` - SystemPrompt string `yaml:"system_prompt,omitempty"` - SystemPromptFile string `yaml:"system_prompt_file,omitempty"` - SkillDir string `yaml:"skill_dir,omitempty"` + Name string `yaml:"name"` + Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` + Version string `yaml:"version,omitempty"` + Model string `yaml:"model,omitempty"` + Instruction InstructionRef `yaml:"instruction,omitempty"` + SkillDir string `yaml:"skill_dir,omitempty"` } -// ResolvedSystemPrompt returns the system prompt text. If SystemPromptFile is -// set, its contents are read and returned; otherwise SystemPrompt is returned. +// ResolvedSystemPrompt returns the resolved instruction text. +// If the instruction references a file, its contents are read; otherwise the +// inline value is returned. func (a *AgentRef) ResolvedSystemPrompt() string { - if a.SystemPromptFile != "" { - data, err := os.ReadFile(a.SystemPromptFile) + return a.Instruction.Resolve() +} + +// InstructionRef holds an instruction that can be either an inline string or a +// file reference. In YAML it supports two forms: +// +// instruction: "inline text" +// instruction: +// file: ./path/to/file.md +type InstructionRef struct { + Value string `yaml:"-"` // inline text + File string `yaml:"-"` // file reference +} + +// Resolve returns the instruction text. If File is set, the file is read; +// otherwise Value is returned directly. +func (r *InstructionRef) Resolve() string { + if r.File != "" { + data, err := os.ReadFile(r.File) if err != nil { - return a.SystemPrompt + return r.Value } return string(data) } - return a.SystemPrompt + return r.Value +} + +// IsEmpty returns true if neither inline value nor file is set. +func (r *InstructionRef) IsEmpty() bool { + return r.Value == "" && r.File == "" +} + +// UnmarshalYAML allows InstructionRef to be either a plain string or a mapping +// with a "file" key. +func (r *InstructionRef) UnmarshalYAML(value *yaml.Node) error { + if value.Kind == yaml.ScalarNode { + r.Value = value.Value + return nil + } + if value.Kind == yaml.MappingNode { + var m struct { + File string `yaml:"file"` + } + if err := value.Decode(&m); err != nil { + return err + } + r.File = m.File + return nil + } + return fmt.Errorf("instruction must be a string or a mapping with 'file' key") +} + +// MarshalYAML writes InstructionRef as a plain string when inline, or as a +// mapping with "file" when referencing a file. +func (r InstructionRef) MarshalYAML() (any, error) { + if r.File != "" { + return map[string]string{"file": r.File}, nil + } + return r.Value, nil } // DatasetRef references a named/versioned dataset. type DatasetRef struct { - Name string `yaml:"name"` - Version string `yaml:"version,omitempty"` + Name string `yaml:"name"` + Version string `yaml:"version,omitempty"` + LocalURI string `yaml:"local_uri,omitempty"` } // TargetConfig specifies model candidates and other target-specific configuration. @@ -83,7 +243,7 @@ type Options struct { } // DefaultTargetAttributes are the default optimization target attributes. -var DefaultTargetAttributes = []string{"instruction", "skill", "agents-optimization-job"} +var DefaultTargetAttributes = []string{"agents-optimization-job"} // Deprecated: DefaultStrategies is an alias for backward compatibility. var DefaultStrategies = DefaultTargetAttributes @@ -113,8 +273,8 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { o.TargetAttributes = slices.Clone(DefaultTargetAttributes) } - // o.MaxIterations = 3 - // o.Budget = 30 + o.MaxIterations = 4 + o.Budget = 100 return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go index c1d5b56c307..d794a5fd190 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go @@ -32,7 +32,7 @@ func TestConfig_RoundTrip(t *testing.T) { Model: "gpt-4o", }, DatasetFile: "tasks.jsonl", - Evaluators: []string{"builtin.quality", "custom-1"}, + Evaluators: EvaluatorList{{Name: "builtin.quality"}, {Name: "custom-1"}}, } require.NoError(t, Write(path, original)) @@ -46,8 +46,62 @@ func TestConfig_RoundTrip(t *testing.T) { assert.Equal(t, "gpt-4o", loaded.Agent.Model) assert.Equal(t, "tasks.jsonl", loaded.DatasetFile) require.Len(t, loaded.Evaluators, 2) - assert.Equal(t, "builtin.quality", loaded.Evaluators[0]) - assert.Equal(t, "custom-1", loaded.Evaluators[1]) + assert.Equal(t, "builtin.quality", loaded.Evaluators[0].Name) + assert.Equal(t, "custom-1", loaded.Evaluators[1].Name) +} + +func TestConfig_RoundTrip_MixedEvaluators(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + + original := &Config{ + Agent: AgentRef{Name: "agent-x"}, + Evaluators: EvaluatorList{ + {Name: "builtin.task_adherence"}, + {Name: "custom-quality", Version: "2", LocalURI: "evaluators/custom-quality_2.json"}, + }, + } + + require.NoError(t, Write(path, original)) + loaded, err := Read(path) + require.NoError(t, err) + + require.Len(t, loaded.Evaluators, 2) + assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) + assert.Empty(t, loaded.Evaluators[0].Version) + assert.Empty(t, loaded.Evaluators[0].LocalURI) + assert.Equal(t, "custom-quality", loaded.Evaluators[1].Name) + assert.Equal(t, "2", loaded.Evaluators[1].Version) + assert.Equal(t, "evaluators/custom-quality_2.json", loaded.Evaluators[1].LocalURI) +} + +func TestEvaluatorList_Names(t *testing.T) { + t.Parallel() + list := EvaluatorList{{Name: "a"}, {Name: "b"}, {Name: "c"}} + assert.Equal(t, []string{"a", "b", "c"}, list.Names()) +} + +func TestEvaluatorList_FindByLocalURI(t *testing.T) { + t.Parallel() + list := EvaluatorList{ + {Name: "builtin.x"}, + {Name: "custom", LocalURI: "/path/to/file.json"}, + {Name: "other"}, + } + found := list.FindByLocalURI() + require.Len(t, found, 1) + assert.Equal(t, "custom", found[0].Name) +} + +func TestEvaluatorList_SetVersion(t *testing.T) { + t.Parallel() + list := EvaluatorList{{Name: "a", Version: "1"}, {Name: "b"}} + list.SetVersion("b", "3") + assert.Equal(t, "3", list[1].Version) + // Non-matching name is a no-op. + list.SetVersion("nonexistent", "99") + assert.Equal(t, "1", list[0].Version) } func TestConfig_RoundTrip_DatasetReference(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/version.txt b/cli/azd/extensions/azure.ai.agents/version.txt index db5a7919b50..df74c89d931 100644 --- a/cli/azd/extensions/azure.ai.agents/version.txt +++ b/cli/azd/extensions/azure.ai.agents/version.txt @@ -1 +1 @@ -0.1.31-preview +0.1.31-optbugbash-preview From 67a887b7bfd9a7b7598a095e0445da3dcb18aa95 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Tue, 19 May 2026 17:31:30 -0700 Subject: [PATCH 19/33] minor version --- .../azd_observability_bugbash.md | 1 - .../extensions/azure.ai.agents/extension.yaml | 2 +- .../azure.ai.agents/internal/cmd/eval.go | 19 +-- .../internal/cmd/eval_helpers.go | 31 ----- .../azure.ai.agents/internal/cmd/eval_init.go | 4 +- .../internal/cmd/eval_init_jobs.go | 111 +++++++++++++----- .../internal/cmd/eval_init_test.go | 2 +- .../azure.ai.agents/internal/cmd/eval_list.go | 4 +- .../azure.ai.agents/internal/cmd/eval_run.go | 25 +++- .../azure.ai.agents/internal/cmd/eval_show.go | 6 +- .../azure.ai.agents/internal/cmd/eval_test.go | 39 +++--- .../internal/cmd/eval_update.go | 4 +- .../azure.ai.agents/internal/cmd/optimize.go | 11 ++ .../internal/cmd/optimize_config.go | 2 +- .../internal/pkg/agents/eval_api/models.go | 7 ++ .../internal/pkg/agents/eval_api/poller.go | 3 + .../internal/pkg/agents/opteval/yaml.go | 17 +-- .../extensions/azure.ai.agents/version.txt | 2 +- cli/azd/extensions/registry.json | 76 ++++++++++++ 19 files changed, 238 insertions(+), 128 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index e3612348554..9cbe09c0673 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -200,7 +200,6 @@ options: strategies: # ([]string) Optimization strategies to try. - instruction # Default: ["instruction", "skill", "agents-optimization-job"] - skill - - agents-optimization-job budget: 5 # (int) Max optimization budget (number of candidates). Default: 5 max_iterations: 2 # (int) Max iterations per strategy. Default: 2 (when strategies are default) min_improvement: 0.0 # (float) Minimum score improvement to accept a candidate. Default: 0 (not set) diff --git a/cli/azd/extensions/azure.ai.agents/extension.yaml b/cli/azd/extensions/azure.ai.agents/extension.yaml index 8e24764b11b..4a2e04450c7 100644 --- a/cli/azd/extensions/azure.ai.agents/extension.yaml +++ b/cli/azd/extensions/azure.ai.agents/extension.yaml @@ -5,7 +5,7 @@ displayName: Foundry agents (Preview) description: Ship agents with Microsoft Foundry from your terminal. (Preview) usage: azd ai agent [options] # NOTE: Make sure version.txt is in sync with this version. -version: 0.1.31-optbugbash-preview +version: 0.1.32-optbugbash-preview requiredAzdVersion: ">1.23.13" language: go capabilities: diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index d9d1f161aac..335c4f353a4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -454,7 +454,11 @@ func pollEvalOperationWithSpinner( log.Printf("[debug] %s: failed response:\n%s", label, body) } progress.setFailed(label) - return nil, fmt.Errorf("%s failed with status %q", strings.ToLower(label), jfe.Status) + errMsg := fmt.Sprintf("%s failed with status %q", strings.ToLower(label), jfe.Status) + if jfe.Job != nil && jfe.Job.Error != nil && jfe.Job.Error.Message != "" { + errMsg += ": " + jfe.Job.Error.Message + } + return nil, fmt.Errorf("%s", errMsg) } progress.setFailed(label) return nil, err @@ -465,19 +469,6 @@ func pollEvalOperationWithSpinner( return job, nil } -func readEvalConfig(path string) (*evalConfig, error) { - return eval_api.LoadEvalConfig(path) -} - -func writeEvalConfig(path string, cfg *evalConfig) error { - return eval_api.WriteEvalConfig(path, cfg) -} - -// formatTimestamp formats a timestamp value for display in eval output. -func formatTimestamp(ts any) string { - return eval_api.FormatTimestamp(ts) -} - // loadEvalState reads eval runtime state from the azd environment. // Returns an empty state if no values are set. func loadEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) *evalState { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index 3b0e499118e..bb59378fdd5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -5,7 +5,6 @@ package cmd import ( "context" - "fmt" "log" "azureaiagent/internal/pkg/agents/eval_api" @@ -13,16 +12,6 @@ import ( "github.com/azure/azure-dev/cli/azd/pkg/azdext" ) -// resolveEvalOutputPath resolves the eval config output path. -func resolveEvalOutputPath(output, agentProject string) string { - return eval_api.ResolveEvalOutputPath(output, agentProject) -} - -// resolveEvalConfigPath resolves the eval config path for reading. -func resolveEvalConfigPath(config, agentProject string) string { - return eval_api.ResolveEvalConfigPath(config, agentProject) -} - // resolvePortalPrefix reads AZURE_AI_PROJECT_ID from the azd environment and // returns a PortalPrefix for building Foundry portal URLs. // Returns nil on any failure. @@ -58,23 +47,3 @@ func buildEvalReportURL(ctx context.Context, azdClient *azdext.AzdClient, envNam } return prefix.EvalRunURL(evalID, runID) } - -// formatAny converts any value to a string for display. -func formatAny(v any) string { - if v == nil { - return "" - } - switch val := v.(type) { - case string: - return val - case float64: - if val == float64(int64(val)) { - return fmt.Sprintf("%d", int64(val)) - } - return fmt.Sprintf("%g", val) - case bool: - return fmt.Sprintf("%t", val) - default: - return fmt.Sprintf("%v", val) - } -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index b125788fd38..5cc2e966d25 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -120,7 +120,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } } - configPath := resolveEvalOutputPath(flags.output, resolved.agentProject) + configPath := eval_api.ResolveEvalOutputPath(flags.output, resolved.agentProject) printEvalDetectedContext(resolved, configPath) // When eval.yaml exists, decide whether to regenerate or create fresh. @@ -268,7 +268,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error state.InitStatus = "completed" clearEvalState(ctx, resolved.azdClient, resolved.envName) - if err := writeEvalConfig(configPath, evalCfg); err != nil { + if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { return err } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index c9d93a29d82..7204793dcb9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -67,8 +67,7 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon Agent: agent, }, Options: &opteval.Options{ - EvalModel: flags.evalModel, - TargetAttributes: opteval.DefaultTargetAttributes, + EvalModel: flags.evalModel, }, MaxSamples: flags.maxSamples, TraceDays: flags.traceDays, @@ -178,7 +177,7 @@ func resumeEvalInit( if resolved.hasProject { eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) } - return writeEvalConfig(configPath, evalCfg) + return eval_api.WriteEvalConfig(configPath, evalCfg) } // pollResults carries parsed outputs from completed generation jobs so that @@ -209,36 +208,58 @@ func pollAndFinalizeJobs( wg sync.WaitGroup ) - pollDataset := state.DatasetGenOpID != "" && - !eval_api.ParseJobStatus(state.DatasetGenStatus).IsTerminal() - pollEval := state.EvalGenOpID != "" && - !eval_api.ParseJobStatus(state.EvalGenStatus).IsTerminal() + hasDataset := state.DatasetGenOpID != "" + hasEval := state.EvalGenOpID != "" + needPollDataset := hasDataset && !eval_api.ParseJobStatus(state.DatasetGenStatus).IsTerminal() + needPollEval := hasEval && !eval_api.ParseJobStatus(state.EvalGenStatus).IsTerminal() - // Build progress display labels. + // Build progress display labels (only for jobs that need polling). var labels []string - if pollDataset { + if needPollDataset { labels = append(labels, "Dataset generation") } - if pollEval { + if needPollEval { labels = append(labels, "Evaluator generation") } progress := newEvalProgress(labels...) progress.Start() - if pollDataset { + if hasDataset { wg.Add(1) go func() { defer wg.Done() - completed, err := pollEvalOperationWithSpinner( - ctx, "Dataset generation", state.DatasetGenOpID, - resolved.evalClient.GetDataGenerationJob, DataGenerationAPIVersion, - progress, - ) - if err != nil { - datasetPollErr = err - return + var completed *eval_api.GenerationJob + if needPollDataset { + var err error + completed, err = pollEvalOperationWithSpinner( + ctx, "Dataset generation", state.DatasetGenOpID, + resolved.evalClient.GetDataGenerationJob, DataGenerationAPIVersion, + progress, + ) + if err != nil { + datasetPollErr = fmt.Errorf("dataset generation job %s: %w", state.DatasetGenOpID, err) + return + } + } else { + // Job was already terminal at submission — fetch it directly. + var err error + completed, err = resolved.evalClient.GetDataGenerationJob( + ctx, state.DatasetGenOpID, DataGenerationAPIVersion, + ) + if err != nil { + datasetPollErr = err + return + } + if eval_api.ParseJobStatus(completed.NormalizedStatus()).IsFailed() { + errMsg := fmt.Sprintf("dataset generation job %s failed", state.DatasetGenOpID) + if completed.Error != nil && completed.Error.Message != "" { + errMsg += ": " + completed.Error.Message + } + datasetPollErr = fmt.Errorf("%s", errMsg) + return + } } - // Dataset goroutine owns: state.DatasetGenStatus, evalCfg.DatasetReference, evalCfg.DatasetFile. + state.DatasetGenStatus = completed.NormalizedStatus() dsRef := datasetFromJob(completed) if dsRef == nil { @@ -261,19 +282,42 @@ func pollAndFinalizeJobs( }() } - if pollEval { + if hasEval { wg.Add(1) go func() { defer wg.Done() - completed, err := pollEvalOperationWithSpinner( - ctx, "Evaluator generation", state.EvalGenOpID, - resolved.evalClient.GetEvaluatorGenerationJob, DefaultAgentAPIVersion, - progress, - ) - if err != nil { - evalPollErr = err - return + var completed *eval_api.GenerationJob + if needPollEval { + var err error + completed, err = pollEvalOperationWithSpinner( + ctx, "Evaluator generation", state.EvalGenOpID, + resolved.evalClient.GetEvaluatorGenerationJob, DefaultAgentAPIVersion, + progress, + ) + if err != nil { + evalPollErr = fmt.Errorf("evaluator generation job %s: %w", state.EvalGenOpID, err) + return + } + } else { + // Job was already terminal at submission — fetch it directly. + var err error + completed, err = resolved.evalClient.GetEvaluatorGenerationJob( + ctx, state.EvalGenOpID, DefaultAgentAPIVersion, + ) + if err != nil { + evalPollErr = err + return + } + if eval_api.ParseJobStatus(completed.NormalizedStatus()).IsFailed() { + errMsg := fmt.Sprintf("evaluator generation job %s failed", state.EvalGenOpID) + if completed.Error != nil && completed.Error.Message != "" { + errMsg += ": " + completed.Error.Message + } + evalPollErr = fmt.Errorf("%s", errMsg) + return + } } + // Evaluator goroutine owns: state.EvalGenStatus, evalCfg.Evaluators. evalName, evalVersion := evaluatorFromJob(completed) state.EvalGenStatus = completed.NormalizedStatus() @@ -308,6 +352,9 @@ func pollAndFinalizeJobs( } } + if datasetPollErr != nil && evalPollErr != nil { + return results, fmt.Errorf("%w\n%w", datasetPollErr, evalPollErr) + } if datasetPollErr != nil { return results, datasetPollErr } @@ -344,7 +391,7 @@ func writePendingEvalInit( if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } - if err := writeEvalConfig(configPath, evalCfg); err != nil { + if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { return err } fmt.Println(color.YellowString("Eval init submitted (async)")) @@ -374,7 +421,7 @@ func writeTimedOutEvalInit( if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } - if err := writeEvalConfig(configPath, evalCfg); err != nil { + if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { return err } fmt.Println(color.YellowString("\nGeneration jobs timed out but are still running on the server.")) @@ -396,7 +443,7 @@ func writeTimedOutEvalInit( // tryLoadExistingEvalConfig attempts to load an eval config from the given path. // Returns (config, true) if the file exists and parses successfully, or (nil, false) otherwise. func tryLoadExistingEvalConfig(configPath string) (*evalConfig, bool) { - cfg, err := readEvalConfig(configPath) + cfg, err := eval_api.LoadEvalConfig(configPath) if err != nil { return nil, false } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 804423cce87..a0b2e3c1ed3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -522,7 +522,7 @@ func TestTryLoadExistingEvalConfig_Found(t *testing.T) { Evaluators: opteval.EvaluatorList{{Name: "quality"}}, }, } - require.NoError(t, writeEvalConfig(cfgPath, cfg)) + require.NoError(t, eval_api.WriteEvalConfig(cfgPath, cfg)) loaded, ok := tryLoadExistingEvalConfig(cfgPath) require.True(t, ok) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go index 47687de3d30..01123f4b6a2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go @@ -10,6 +10,8 @@ import ( "sync" "text/tabwriter" + "azureaiagent/internal/pkg/agents/eval_api" + "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" "github.com/spf13/cobra" @@ -96,7 +98,7 @@ func runEvalList(ctx context.Context, flags *evalListFlags) error { } status := padColorizedStatus(summaries[i].lastRunStatus) createdBy := item.CreatedBy - createdOn := formatTimestamp(item.CreatedAt) + createdOn := eval_api.FormatTimestamp(item.CreatedAt) fmt.Fprintf(w, "%s \t%s\t%s\t%s\t%d\t%s\t%s\n", marker, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index 8cfd67b9f9d..bfe49efc44c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -53,16 +53,37 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { } defer resolved.azdClient.Close() - configPath := resolveEvalConfigPath(flags.config, resolved.agentProject) - evalCfg, err := readEvalConfig(configPath) + configPath := eval_api.ResolveEvalConfigPath(flags.config, resolved.agentProject) + evalCfg, err := eval_api.LoadEvalConfig(configPath) if err != nil { return err } + + // Reconcile agent name/version between environment and eval.yaml. + // Environment values take precedence; warn and update the config if they differ. + configChanged := false if resolved.agentName == "" { resolved.agentName = evalCfg.Agent.Name + } else if evalCfg.Agent.Name != "" && evalCfg.Agent.Name != resolved.agentName { + fmt.Printf(" %s agent name in %s (%q) differs from environment (%q) — using environment value\n", + color.YellowString("warning:"), flags.config, evalCfg.Agent.Name, resolved.agentName) + evalCfg.Agent.Name = resolved.agentName + configChanged = true } if resolved.version == "" { resolved.version = evalCfg.Agent.Version + } else if evalCfg.Agent.Version != "" && evalCfg.Agent.Version != resolved.version { + fmt.Printf(" %s agent version in %s (%q) differs from environment (%q) — using environment value\n", + color.YellowString("warning:"), flags.config, evalCfg.Agent.Version, resolved.version) + evalCfg.Agent.Version = resolved.version + configChanged = true + } + if configChanged { + if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { + fmt.Printf(" %s failed to update %s: %s\n", color.YellowString("warning:"), flags.config, err) + } else { + fmt.Printf(" Updated %s with current environment values\n", flags.config) + } } state := loadEvalState(ctx, resolved.azdClient, resolved.envName) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go index a3b34cc25b3..6564cc41bd6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go @@ -101,7 +101,7 @@ func printEvalSummary(evalObj *eval_api.OpenAIEval, runs []eval_api.OpenAIEvalRu if agent := evalObj.Metadata["azd_agent"]; agent != "" { fmt.Printf("Agent: %s\n", agent) } - fmt.Printf("Created: %s\n", formatTimestamp(evalObj.CreatedAt)) + fmt.Printf("Created: %s\n", eval_api.FormatTimestamp(evalObj.CreatedAt)) if evalObj.CreatedBy != "" { fmt.Printf("Created by: %s\n", evalObj.CreatedBy) } @@ -121,7 +121,7 @@ func printEvalSummary(evalObj *eval_api.OpenAIEval, runs []eval_api.OpenAIEvalRu run.Status, passed, failed, - formatTimestamp(run.CreatedAt), + eval_api.FormatTimestamp(run.CreatedAt), ) } if err := w.Flush(); err != nil { @@ -138,7 +138,7 @@ func printEvalRunSummary(evalID string, run *eval_api.OpenAIEvalRun) error { fmt.Printf("Name: %s\n", run.Name) } fmt.Printf("Status: %s\n", run.Status) - fmt.Printf("Created: %s\n", formatTimestamp(run.CreatedAt)) + fmt.Printf("Created: %s\n", eval_api.FormatTimestamp(run.CreatedAt)) if run.CreatedBy != "" { fmt.Printf("Created by: %s\n", run.CreatedBy) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index cbf7d5d427c..397501c6bdc 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -117,30 +117,21 @@ func TestOpenAIEval_ResolvedID(t *testing.T) { } // --------------------------------------------------------------------------- -// formatAny / formatTimestamp +// eval_api.FormatTimestamp // --------------------------------------------------------------------------- -func TestFormatAny(t *testing.T) { - t.Parallel() - - assert.Equal(t, "", formatAny(nil)) - assert.Equal(t, "hello", formatAny("hello")) - assert.Equal(t, "42", formatAny(float64(42))) - assert.Equal(t, "true", formatAny(true)) -} - func TestFormatTimestamp(t *testing.T) { t.Parallel() - assert.Equal(t, "2024-01-15 10:30 UTC", formatTimestamp("2024-01-15 10:30 UTC")) - assert.Contains(t, formatTimestamp(float64(1705312200)), "2024-01-15") - assert.Contains(t, formatTimestamp(int64(1705312200)), "2024-01-15") - assert.Equal(t, "", formatTimestamp(nil)) - assert.Equal(t, "", formatTimestamp(true)) + assert.Equal(t, "2024-01-15 10:30 UTC", eval_api.FormatTimestamp("2024-01-15 10:30 UTC")) + assert.Contains(t, eval_api.FormatTimestamp(float64(1705312200)), "2024-01-15") + assert.Contains(t, eval_api.FormatTimestamp(int64(1705312200)), "2024-01-15") + assert.Equal(t, "", eval_api.FormatTimestamp(nil)) + assert.Equal(t, "", eval_api.FormatTimestamp(true)) } // --------------------------------------------------------------------------- -// resolveEvalOutputPath / resolveEvalConfigPath +// eval_api.ResolveEvalOutputPath / eval_api.ResolveEvalConfigPath // --------------------------------------------------------------------------- func TestResolveEvalOutputPath(t *testing.T) { @@ -149,12 +140,12 @@ func TestResolveEvalOutputPath(t *testing.T) { t.Run("absolute path returned as-is", func(t *testing.T) { t.Parallel() abs := filepath.Join(os.TempDir(), "eval.yaml") - assert.Equal(t, abs, resolveEvalOutputPath(abs, "/project")) + assert.Equal(t, abs, eval_api.ResolveEvalOutputPath(abs, "/project")) }) t.Run("relative path joined with agent project", func(t *testing.T) { t.Parallel() - result := resolveEvalOutputPath("eval.yaml", "/project/agent") + result := eval_api.ResolveEvalOutputPath("eval.yaml", "/project/agent") assert.Equal(t, filepath.Join("/project/agent", "eval.yaml"), result) }) } @@ -165,12 +156,12 @@ func TestResolveEvalConfigPath(t *testing.T) { t.Run("absolute path returned as-is", func(t *testing.T) { t.Parallel() abs := filepath.Join(os.TempDir(), "eval.yaml") - assert.Equal(t, abs, resolveEvalConfigPath(abs, "/project")) + assert.Equal(t, abs, eval_api.ResolveEvalConfigPath(abs, "/project")) }) t.Run("relative path joined with agent project when file does not exist", func(t *testing.T) { t.Parallel() - result := resolveEvalConfigPath("nonexistent.yaml", "/project/agent") + result := eval_api.ResolveEvalConfigPath("nonexistent.yaml", "/project/agent") assert.Equal(t, filepath.Join("/project/agent", "nonexistent.yaml"), result) }) } @@ -432,7 +423,7 @@ func TestRelPathForYaml(t *testing.T) { } // --------------------------------------------------------------------------- -// writeEvalConfig / readEvalConfig round-trip +// eval_api.WriteEvalConfig / eval_api.LoadEvalConfig round-trip // --------------------------------------------------------------------------- func TestEvalConfigRoundTrip(t *testing.T) { @@ -458,10 +449,10 @@ func TestEvalConfigRoundTrip(t *testing.T) { MaxSamples: 50, } - err := writeEvalConfig(path, original) + err := eval_api.WriteEvalConfig(path, original) require.NoError(t, err) - loaded, err := readEvalConfig(path) + loaded, err := eval_api.LoadEvalConfig(path) require.NoError(t, err) assert.Equal(t, original.Name, loaded.Name) @@ -479,6 +470,6 @@ func TestEvalConfigRoundTrip(t *testing.T) { func TestReadEvalConfig_MissingFile(t *testing.T) { t.Parallel() - _, err := readEvalConfig("/nonexistent/path/eval.yaml") + _, err := eval_api.LoadEvalConfig("/nonexistent/path/eval.yaml") assert.Error(t, err) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go index 5f8ebc2dd67..573863be0cf 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go @@ -58,7 +58,7 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e defer resolved.azdClient.Close() configPath := eval_api.ResolveEvalConfigPath(flags.config, resolved.agentProject) - evalCfg, err := readEvalConfig(configPath) + evalCfg, err := eval_api.LoadEvalConfig(configPath) if err != nil { return fmt.Errorf("failed to load eval config: %w", err) } @@ -105,7 +105,7 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e } if totalUpdated > 0 { - if err := writeEvalConfig(configPath, evalCfg); err != nil { + if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { return fmt.Errorf("failed to save updated config: %w", err) } fmt.Printf("\n%s Updated config saved to %s\n", color.GreenString("Done."), flags.config) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index f4e7e0f9e90..598646e708b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -20,6 +20,7 @@ import ( "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" "github.com/spf13/cobra" + "go.yaml.in/yaml/v3" ) // optimizeAgentContext holds the resolved agent name and project directory. @@ -199,6 +200,16 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { if cfg == nil { cfg = defaultOptimizeConfig(resolved.agentName) + } else if resolved.agentName != "" && cfg.Agent.Name != "" && cfg.Agent.Name != resolved.agentName { + // Config loaded from eval.yaml but agent name differs from environment. + fmt.Printf(" %s agent name in %s (%q) differs from environment (%q) — using environment value\n", + color.YellowString("warning:"), configSource, cfg.Agent.Name, resolved.agentName) + cfg.Agent.Name = resolved.agentName + if data, mErr := yaml.Marshal(cfg); mErr == nil { + if wErr := os.WriteFile(configSource, data, 0600); wErr == nil { + fmt.Printf(" Updated %s with current environment values\n", configSource) + } + } } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index c823f84aa29..940f0ab1031 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -86,7 +86,7 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { Options: &opteval.Options{ EvalModel: "gpt-4o", Mode: "optimize", - TargetAttributes: []string{"instruction", "skill", "agents-optimization-job"}, + TargetAttributes: []string{"instruction", "skill"}, Budget: 5, }, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index f8eb38cb060..388a4257b59 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -48,6 +48,13 @@ type GenerationJob struct { ID string `json:"id"` Status string `json:"status"` Result json.RawMessage `json:"result,omitempty"` + Error *JobError `json:"error,omitempty"` +} + +// JobError captures error details from a failed generation job. +type JobError struct { + Code string `json:"code,omitempty"` + Message string `json:"message,omitempty"` } // OperationID returns the job's operation identifier. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go index 1c33582ecae..a7124208253 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go @@ -70,6 +70,9 @@ type JobFailedError struct { } func (e *JobFailedError) Error() string { + if e.Job != nil && e.Job.Error != nil && e.Job.Error.Message != "" { + return fmt.Sprintf("job failed with status %q: %s", e.Status, e.Job.Error.Message) + } return fmt.Sprintf("job failed with status %q", e.Status) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 62aec1729bd..15083472993 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -7,7 +7,6 @@ import ( "fmt" "os" "path/filepath" - "slices" "azureaiagent/internal/pkg/agents/agent_yaml" @@ -242,12 +241,6 @@ type Options struct { ReflectionModel string `yaml:"reflection_model,omitempty"` } -// DefaultTargetAttributes are the default optimization target attributes. -var DefaultTargetAttributes = []string{"agents-optimization-job"} - -// Deprecated: DefaultStrategies is an alias for backward compatibility. -var DefaultStrategies = DefaultTargetAttributes - // UnmarshalYAML populates default target attributes when the field is absent in YAML. // For backward compatibility, the legacy "strategies" key is also accepted. func (o *Options) UnmarshalYAML(value *yaml.Node) error { @@ -269,12 +262,12 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { } } - if len(o.TargetAttributes) == 0 { - o.TargetAttributes = slices.Clone(DefaultTargetAttributes) + if o.MaxIterations <= 0 { + o.MaxIterations = 4 + } + if o.Budget <= 0 { + o.Budget = 100 } - - o.MaxIterations = 4 - o.Budget = 100 return nil } diff --git a/cli/azd/extensions/azure.ai.agents/version.txt b/cli/azd/extensions/azure.ai.agents/version.txt index df74c89d931..0a97711d3d4 100644 --- a/cli/azd/extensions/azure.ai.agents/version.txt +++ b/cli/azd/extensions/azure.ai.agents/version.txt @@ -1 +1 @@ -0.1.31-optbugbash-preview +0.1.32-optbugbash-preview diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index 38a59c52ac1..d73bcc49147 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -4013,6 +4013,82 @@ "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-windows-arm64.zip" } } + }, + { + "version": "0.1.32-optbugbash-preview", + "requiredAzdVersion": "\u003e1.23.13", + "capabilities": [ + "custom-commands", + "lifecycle-events", + "mcp-server", + "service-target-provider", + "metadata" + ], + "providers": [ + { + "name": "azure.ai.agent", + "type": "service-target", + "description": "Deploys agents to the Foundry Agent Service" + } + ], + "usage": "azd ai agent \u003ccommand\u003e [options]", + "examples": [ + { + "name": "init", + "description": "Initialize a new AI agent project.", + "usage": "azd ai agent init" + } + ], + "artifacts": { + "darwin/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "9b7c45241bf0bf1ac6b822e2190bec7136d1ffc25a43718816a17cf77a022d70" + }, + "entryPoint": "azure-ai-agents-darwin-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" + }, + "darwin/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "034491e234bff6b4a4ad37850e9961268ac82ff7d364898cdf4b8a572184a675" + }, + "entryPoint": "azure-ai-agents-darwin-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" + }, + "linux/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "92408289da7dae4e45969cc80b92856e577e014853415176f81b53b581c4bee5" + }, + "entryPoint": "azure-ai-agents-linux-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" + }, + "linux/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "164328f849a5375c5f11201f1a23915a606f0fa02bda51aead9850a685ea5cc6" + }, + "entryPoint": "azure-ai-agents-linux-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" + }, + "windows/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "0b804726424c7cbebd2c501b38d222cc42beb92172c522871cad0b8df82c6efd" + }, + "entryPoint": "azure-ai-agents-windows-amd64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-windows-amd64.zip" + }, + "windows/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "c195a3a99d3ff94ed5d841e74a79d64376ebe5e70c81b0ca3cbedb69cd48864a" + }, + "entryPoint": "azure-ai-agents-windows-arm64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-windows-arm64.zip" + } + } } ] }, From 92ba4d972155916c5c881096c9d92e5589ada9ca Mon Sep 17 00:00:00 2001 From: zyysurely Date: Tue, 19 May 2026 21:33:47 -0700 Subject: [PATCH 20/33] new version --- .../extensions/azure.ai.agents/extension.yaml | 2 +- .../azure.ai.agents/internal/cmd/eval_init.go | 65 +++- .../internal/cmd/eval_init_jobs.go | 6 +- .../internal/cmd/eval_init_prompts.go | 56 ++- .../azure.ai.agents/internal/cmd/optimize.go | 193 +++++++--- .../internal/cmd/optimize_apply.go | 364 +++++++++++++++--- .../internal/cmd/optimize_config.go | 25 ++ .../internal/cmd/optimize_helpers.go | 25 ++ .../pkg/agents/eval_api/eval_config.go | 4 +- .../pkg/agents/eval_api/portal_urls.go | 7 + .../internal/pkg/agents/opteval/yaml.go | 69 +++- .../pkg/agents/optimize_api/models.go | 17 + .../extensions/azure.ai.agents/version.txt | 2 +- cli/azd/extensions/registry.json | 76 ++++ 14 files changed, 794 insertions(+), 117 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/extension.yaml b/cli/azd/extensions/azure.ai.agents/extension.yaml index 4a2e04450c7..5a7cd13dbb8 100644 --- a/cli/azd/extensions/azure.ai.agents/extension.yaml +++ b/cli/azd/extensions/azure.ai.agents/extension.yaml @@ -5,7 +5,7 @@ displayName: Foundry agents (Preview) description: Ship agents with Microsoft Foundry from your terminal. (Preview) usage: azd ai agent [options] # NOTE: Make sure version.txt is in sync with this version. -version: 0.1.32-optbugbash-preview +version: 0.1.33-optbugbash-preview requiredAzdVersion: ">1.23.13" language: go capabilities: diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index 5cc2e966d25..f7f97a68e43 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -29,6 +29,9 @@ type evalInitFlags struct { projectEndpoint string instruction string instructionFile string + configFile string + skillDir string + toolsFile string evalModel string dataset string output string @@ -123,6 +126,26 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error configPath := eval_api.ResolveEvalOutputPath(flags.output, resolved.agentProject) printEvalDetectedContext(resolved, configPath) + // Auto-detect agent config metadata if no instruction was provided. + // This looks for .agent_configs/baseline/metadata.yaml and resolves + // instruction and skill_dir from it. + if flags.instruction == "" && flags.instructionFile == "" && resolved.hasProject { + defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") + absConfigFile := filepath.Join(resolved.agentProject, defaultConfigFile) + if _, err := os.Stat(absConfigFile); err == nil { + // Found a default config — resolve all fields from it. + var agent opteval.AgentRef + agent.ConfigFile = defaultConfigFile + agent.ResolveFromConfig(resolved.agentProject) + flags.configFile = defaultConfigFile + flags.instructionFile = agent.Instruction.File + flags.instruction = agent.Instruction.Value + flags.skillDir = agent.SkillDir + flags.toolsFile = agent.ToolsFile + fmt.Printf(" Config: %s\n", absConfigFile) + } + } + // When eval.yaml exists, decide whether to regenerate or create fresh. existingCfg, hasExisting := tryLoadExistingEvalConfig(configPath) isRegenerate := false @@ -155,9 +178,22 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if existingCfg.Options != nil && !flags.evalModelSet { flags.evalModel = existingCfg.Options.EvalModel } - if flags.instruction == "" && flags.instructionFile == "" { - flags.instruction = existingCfg.Agent.Instruction.Value - flags.instructionFile = existingCfg.Agent.Instruction.File + if flags.configFile == "" && existingCfg.Agent.ConfigFile != "" { + flags.configFile = existingCfg.Agent.ConfigFile + // Resolve all fields from the config for generation API calls. + var agentRef opteval.AgentRef + agentRef.ConfigFile = flags.configFile + agentRef.ResolveFromConfig(resolved.agentProject) + if flags.instruction == "" && flags.instructionFile == "" { + flags.instructionFile = agentRef.Instruction.File + flags.instruction = agentRef.Instruction.Value + } + if flags.skillDir == "" { + flags.skillDir = agentRef.SkillDir + } + if flags.toolsFile == "" { + flags.toolsFile = agentRef.ToolsFile + } } if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { flags.maxSamples = existingCfg.MaxSamples @@ -185,6 +221,26 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return err } + // If no baseline config exists yet and we have an instruction, write it + // so that optimize can use it later. + if flags.configFile == "" && resolved.hasProject && + (flags.instruction != "" || flags.instructionFile != "") { + defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") + absConfigFile := filepath.Join(resolved.agentProject, defaultConfigFile) + if _, err := os.Stat(absConfigFile); err != nil { + // Baseline doesn't exist — create it. + instruction := resolvedInstruction(flags) + if writeErr := writeBaselineFromEvalInit( + resolved.agentProject, resolved.agentName, instruction, + ); writeErr != nil { + fmt.Printf(" warning: failed to write baseline config: %s\n", writeErr) + } else { + flags.configFile = defaultConfigFile + fmt.Printf(" Baseline: %s\n", absConfigFile) + } + } + } + // Finalize the eval suite name. On fresh init, add a random suffix to // avoid collisions. On regeneration, keep the existing name. if !isRegenerate { @@ -193,7 +249,8 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // Prompt agents use the agent source directly; hosted agents require an instruction. if resolved.agentKind != agent_yaml.AgentKindPrompt && - flags.instruction == "" && flags.instructionFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { + flags.instruction == "" && flags.instructionFile == "" && flags.configFile == "" && + (flags.dataset == "" || len(flags.evaluators) == 0) { return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") } if flags.maxSamples < 15 || flags.maxSamples > 1000 { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 7204793dcb9..d86ae9893f9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -56,10 +56,8 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon Kind: resolved.agentKind, Version: resolved.version, } - if flags.instructionFile != "" { - agent.Instruction = opteval.InstructionRef{File: flags.instructionFile} - } else if flags.instruction != "" { - agent.Instruction = opteval.InstructionRef{Value: flags.instruction} + if flags.configFile != "" { + agent.ConfigFile = flags.configFile } return &evalConfig{ Config: opteval.Config{ diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 22c9ea6cac4..111e3cb09a4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -46,7 +46,12 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f needsGeneration := true // adaptive evaluator is always generated needsEvalGen := true - if flags.instruction == "" && flags.instructionFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + if flags.configFile != "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + // Config detected — show resolved values and let the user confirm or override. + if err := promptConfigConfirmation(ctx, azdClient, resolved, flags); err != nil { + return err + } + } else if flags.instruction == "" && flags.instructionFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { // Let the user choose between inline text or loading from a file. inputChoices := []*azdext.SelectChoice{ {Label: "Type inline", Value: "inline"}, @@ -338,3 +343,52 @@ func promptRegenerateChoices( return nil } + +// promptConfigConfirmation shows the resolved instruction file from +// metadata.yaml and lets the user confirm or override it. +func promptConfigConfirmation( + ctx context.Context, + azdClient *azdext.AzdClient, + resolved *evalResolvedContext, + flags *evalInitFlags, +) error { + prompt := azdClient.Prompt() + projectDir := resolved.agentProject + + // Instruction file. + instrDefault := relativeDisplay(flags.instructionFile, projectDir) + resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Instruction file", + DefaultValue: instrDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for instruction file: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && projectDir != "" { + value = filepath.Join(projectDir, value) + } + if _, err := os.Stat(value); err != nil { + return fmt.Errorf("instruction file %q is not accessible: %w", value, err) + } + flags.instructionFile = value + flags.instruction = "" // file takes precedence + } + + return nil +} + +// relativeDisplay returns a project-relative path for display purposes. +// Returns empty string for empty input. +func relativeDisplay(absPath, projectDir string) string { + if absPath == "" || projectDir == "" { + return absPath + } + if rel, err := filepath.Rel(projectDir, absPath); err == nil { + return rel + } + return absPath +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 598646e708b..57e413426a8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -225,14 +225,44 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { cfg.Options.TargetAttributes = a.flags.targetAttributes } + // Resolve agent config directory pointer — fills in instruction, skill_dir, + // tools_file, and model from metadata.yaml if the config pointer is set. + if hasProject { + cfg.Agent.ResolveFromConfig(agentProject) + } + + // Auto-detect baseline config if no config pointer is set yet. + if cfg.Agent.ConfigFile == "" && hasProject { + defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") + absConfigFile := filepath.Join(agentProject, defaultConfigFile) + if _, statErr := os.Stat(absConfigFile); statErr == nil { + cfg.Agent.ConfigFile = defaultConfigFile + cfg.Agent.ResolveFromConfig(agentProject) + fmt.Printf(" Baseline: %s\n", absConfigFile) + } + } + + // When baseline config is detected, show resolved values and let the user confirm. + if cfg.Agent.ConfigFile != "" && hasProject && !a.noPrompt { + if err := promptOptimizeConfigConfirmation(ctx, cfg, agentProject); err != nil { + return err + } + } + // Resolve relative skill_dir against agent project directory. if cfg.Agent.SkillDir != "" && hasProject && !filepath.IsAbs(cfg.Agent.SkillDir) { cfg.Agent.SkillDir = filepath.Join(agentProject, cfg.Agent.SkillDir) } + // Resolve relative tools_file against agent project directory. + // TODO: re-enable when tools optimization is supported in the service. + // if cfg.Agent.ToolsFile != "" && hasProject && !filepath.IsAbs(cfg.Agent.ToolsFile) { + // cfg.Agent.ToolsFile = filepath.Join(agentProject, cfg.Agent.ToolsFile) + // } + // Resolve agent instruction using a well-defined lifecycle: - // 1. Config file (eval.yaml / --config) — instruction in the agent section (inline or file reference) - // 2. Baseline config — .agent_optimization/baseline/config.json from a prior optimize run + // 1. Config dir pointer (agent.config in eval.yaml) — resolves from metadata.yaml + // 2. Config file (eval.yaml / --config) — instruction in the agent section (inline or file reference) // 3. Interactive prompt — ask the user to provide inline text or a file path if err := resolveOptimizeSystemPrompt(ctx, cfg, agentProject, hasProject, a.noPrompt); err != nil { return err @@ -280,11 +310,15 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { // Save baseline config before starting optimization. if hasProject { - if err := saveBaselineConfig(agentProject, cfg.Agent.SkillDir, optimizeReq); err != nil { + if err := saveBaselineConfig(agentProject, cfg.Agent.SkillDir, cfg.Agent.ToolsFile, optimizeReq); err != nil { fmt.Fprintf(out, " warning: failed to save baseline config: %s\n", err) } else { - fmt.Fprintf(out, " Baseline saved to %s\n", - filepath.Join(optimizationDir, "baseline", "config.json")) + baselineMetaPath := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") + fmt.Fprintf(out, " Baseline saved to %s\n", baselineMetaPath) + // Set config pointer so eval.yaml references the baseline. + if cfg.Agent.ConfigFile == "" { + cfg.Agent.ConfigFile = baselineMetaPath + } } } @@ -294,7 +328,12 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { } fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(resp.OperationID)) - fmt.Fprintf(out, " Status: %s\n\n", resp.Status) + fmt.Fprintf(out, " Status: %s\n", resp.Status) + + // Print portal link for the optimization job. + printOptimizePortalLink(ctx, out, cfg.Agent.Name, resp.OperationID) + + fmt.Fprintln(out) // Store last operation ID in azd environment for use by status/deploy saveLastOptimizeJobID(ctx, resp.OperationID) @@ -310,10 +349,10 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { return nil } -// resolveOptimizeSystemPrompt resolves the agent's system prompt using a well-defined lifecycle: +// resolveOptimizeSystemPrompt resolves the agent's system prompt: // -// 1. Config (eval.yaml / --config): instruction in the agent section (inline or file). -// 2. Baseline: .agent_optimization/baseline/config.json from a prior optimization run. +// 1. Config dir pointer (agent.config): instruction from metadata.yaml (already resolved). +// 2. Config (eval.yaml / --config): inline instruction or file reference. // 3. Interactive prompt: ask the user to provide inline text or a file path. // // Relative file paths are resolved against agentProject. @@ -343,38 +382,12 @@ func resolveOptimizeSystemPrompt( return nil } - // Step 2: Check baseline config from a prior optimization run. - if hasProject { - if baseline, loadErr := loadBaselineConfig(agentProject); loadErr == nil && baseline.Instructions != "" { - if noPrompt { - cfg.Agent.Instruction.Value = baseline.Instructions - return nil - } - - azdClient, clientErr := azdext.NewAzdClient() - if clientErr == nil { - defer azdClient.Close() - resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ - Options: &azdext.ConfirmOptions{ - Message: "No instruction in config. " + - "Found one in baseline (.agent_optimization/baseline/config.json). Use it?", - DefaultValue: new(true), - }, - }) - if promptErr == nil && resp.Value != nil && *resp.Value { - cfg.Agent.Instruction.Value = baseline.Instructions - return nil - } - } - } - } - - // Step 3: Interactive prompt — ask user to provide inline text or a file path. + // Step 2: Interactive prompt — ask user to provide inline text or a file path. if noPrompt { return fmt.Errorf("instruction is required for optimization.\n\n" + "Provide it via one of:\n" + - " 1. instruction in eval.yaml (agent section): inline string or file reference\n" + - " 2. Run a prior optimization to create a baseline (.agent_optimization/baseline/config.json)\n" + + " 1. Set agent.config in eval.yaml to point to a config dir with metadata.yaml\n" + + " 2. Set instruction in eval.yaml (agent section): inline string or file reference\n" + " 3. Run without --no-prompt to enter it interactively") } @@ -437,8 +450,8 @@ func resolveOptimizeSystemPrompt( } // resolveOptimizeSkillDir resolves the agent's skill directory: -// 1. Auto-detect: look for a "skills/" folder in the agent project — confirm with user. -// 2. Baseline: check .agent_optimization/baseline/config.json for a saved skill_dir. +// 1. Config dir pointer (agent.config): skill_dir from metadata.yaml (already resolved). +// 2. Auto-detect: look for a "skills/" folder in the agent project — confirm with user. // 3. Interactive prompt: ask the user to provide a path or skip. func resolveOptimizeSkillDir( ctx context.Context, @@ -456,15 +469,6 @@ func resolveOptimizeSkillDir( } } - // Step 2: Check baseline config. - if detectedDir == "" { - if baseline, loadErr := loadBaselineConfig(agentProject); loadErr == nil && baseline.SkillDir != "" { - if _, err := os.Stat(baseline.SkillDir); err == nil { - detectedDir = baseline.SkillDir - } - } - } - if noPrompt { // In no-prompt mode, use whatever was detected (may be empty). cfg.Agent.SkillDir = detectedDir @@ -546,6 +550,97 @@ func resolveOptimizeSkillDir( return nil } +// promptOptimizeConfigConfirmation shows the resolved values from the baseline +// config and lets the user confirm or override instruction file, skills +// directory, and tools file. +func promptOptimizeConfigConfirmation(ctx context.Context, cfg *OptimizeConfig, agentProject string) error { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return nil // non-fatal — skip confirmation prompts + } + defer azdClient.Close() + prompt := azdClient.Prompt() + + // Instruction file. + instrDefault := relativeOptDisplay(cfg.Agent.Instruction.File, agentProject) + resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Instruction file", + DefaultValue: instrDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for instruction file: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && agentProject != "" { + value = filepath.Join(agentProject, value) + } + if _, err := os.Stat(value); err != nil { + return fmt.Errorf("instruction file %q is not accessible: %w", value, err) + } + cfg.Agent.Instruction.File = value + cfg.Agent.Instruction.Value = "" + } + + // Skills directory. + skillDefault := relativeOptDisplay(cfg.Agent.SkillDir, agentProject) + resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Skills directory (enter to skip)", + DefaultValue: skillDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for skills directory: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && agentProject != "" { + value = filepath.Join(agentProject, value) + } + cfg.Agent.SkillDir = value + } else { + cfg.Agent.SkillDir = "" + } + + // TODO: re-enable tools file prompt when tools optimization is supported. + // // Tools file. + // toolsDefault := relativeOptDisplay(cfg.Agent.ToolsFile, agentProject) + // resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ + // Options: &azdext.PromptOptions{ + // Message: "Tools file (enter to skip)", + // DefaultValue: toolsDefault, + // IgnoreHintKeys: true, + // }, + // }) + // if err != nil { + // return fmt.Errorf("prompting for tools file: %w", err) + // } + // if value := strings.TrimSpace(resp.Value); value != "" { + // if !filepath.IsAbs(value) && agentProject != "" { + // value = filepath.Join(agentProject, value) + // } + // cfg.Agent.ToolsFile = value + // } else { + // cfg.Agent.ToolsFile = "" + // } + + return nil +} + +// relativeOptDisplay returns a project-relative path for display. +func relativeOptDisplay(absPath, projectDir string) string { + if absPath == "" || projectDir == "" { + return absPath + } + if rel, err := filepath.Rel(projectDir, absPath); err == nil { + return rel + } + return absPath +} + // knownOptimizationModels is the list of models commonly used for optimization. var knownOptimizationModels = []string{ "gpt-4.1", diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index fa9a1db5e95..6886e2e3f00 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -17,10 +17,12 @@ import ( "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" "github.com/spf13/cobra" + "go.yaml.in/yaml/v3" ) -// optimizationDir is the default folder that holds optimized candidate versions. -const optimizationDir = ".agent_optimization" +// agentConfigsDir is the default folder that holds agent configuration versions +// (baseline and optimized candidates). +const agentConfigsDir = ".agent_configs" type optimizeApplyFlags struct { candidate string @@ -36,7 +38,7 @@ func newOptimizeApplyCommand(extCtx *azdext.ExtensionContext) *cobra.Command { Use: "apply", Short: "Apply optimized candidate configuration locally to your azd project.", Long: `Download the optimized configuration and skill files from an optimization -candidate and write them into your local azd project under .agent_optimization/. +candidate and write them into your local azd project under .agent_configs/. After applying, run 'azd deploy' to deploy the optimized agent version.`, Example: ` # Apply candidate config locally, then deploy @@ -98,7 +100,7 @@ func (a *OptimizeApplyAction) apply( } serviceDir := filepath.Join(project.Path, svc.RelativePath) - candidateDir := filepath.Join(serviceDir, optimizationDir, a.flags.candidate) + candidateDir := filepath.Join(serviceDir, agentConfigsDir, a.flags.candidate) bold.Fprintf(out, "Applying optimization candidate %s...\n\n", a.flags.candidate) @@ -108,7 +110,7 @@ func (a *OptimizeApplyAction) apply( } optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) - // Step 1: Fetch candidate config and write to config.json. + // Step 1: Fetch candidate config from the optimization service. fmt.Fprintf(out, " Fetching candidate config...\n") candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) if err != nil { @@ -120,31 +122,27 @@ func (a *OptimizeApplyAction) apply( } // Clean up other candidate directories, keeping only baseline and the current candidate. - cleanOtherCandidates(filepath.Join(serviceDir, optimizationDir), a.flags.candidate, out) + cleanOtherCandidates(filepath.Join(serviceDir, agentConfigsDir), a.flags.candidate, out) - configJSON, err := json.MarshalIndent(candidateConfig, "", " ") - if err != nil { - return fmt.Errorf("failed to serialize candidate config: %w", err) - } - - configPath := filepath.Join(candidateDir, "config.json") - if err := os.WriteFile(configPath, configJSON, 0600); err != nil { - return fmt.Errorf("failed to write config.json: %w", err) - } - fmt.Fprintf(out, " → %s\n", configPath) - - // Step 2: Download skill files into the candidate directory. + // Step 2: Download skill files into the candidate directory (before metadata.yaml + // so the skills/ dir exists when writeAgentConfigFromCandidate checks for it). if n, dlErr := downloadSkillFilesToDir(ctx, optClient, a.flags.candidate, candidateDir, out); dlErr != nil { fmt.Fprintf(out, " warning: failed to download skill files: %s\n", dlErr) } else if n > 0 { fmt.Fprintf(out, " Downloaded %d skill file(s)\n", n) } + // Write metadata.yaml, instructions.md, skills, and tool definitions for the candidate. + if err := writeAgentConfigFromCandidate(candidateDir, candidateConfig); err != nil { + return fmt.Errorf("failed to write candidate config: %w", err) + } + fmt.Fprintf(out, " → %s\n", filepath.Join(candidateDir, "metadata.yaml")) + // Step 3: Write OPTIMIZATION_LOCAL_DIR and OPTIMIZATION_CANDIDATE_ID into agent.yaml // so the deploy pipeline knows which local optimization config to use. agentYamlPath := filepath.Join(serviceDir, "agent.yaml") fmt.Fprintf(out, " Updating %s...\n", agentYamlPath) - if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_LOCAL_DIR", optimizationDir); err != nil { + if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_LOCAL_DIR", agentConfigsDir); err != nil { return fmt.Errorf("failed to update agent.yaml: %w", err) } if err := upsertAgentYamlEnvVar(agentYamlPath, "OPTIMIZATION_CANDIDATE_ID", a.flags.candidate); err != nil { @@ -171,7 +169,7 @@ func (a *OptimizeApplyAction) apply( fmt.Fprintln(out) color.New(color.FgGreen, color.Bold).Fprintf(out, " ✓ Candidate %s applied to %s\n\n", - a.flags.candidate, filepath.Join(optimizationDir, a.flags.candidate)) + a.flags.candidate, filepath.Join(agentConfigsDir, a.flags.candidate)) fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", color.CyanString("azd deploy --service %s", svc.Name)) @@ -181,56 +179,313 @@ func (a *OptimizeApplyAction) apply( return nil } -// baselineConfig is the JSON structure saved as the agent's pre-optimization baseline. -type baselineConfig struct { - Instructions string `json:"instructions,omitempty"` - Model string `json:"model,omitempty"` - Name string `json:"name"` - SkillDir string `json:"skill_dir,omitempty"` +// agentConfigMetadata is the YAML structure written as metadata.yaml in each +// agent config version directory (baseline or candidate). +// +// It uses file pointers instead of embedding large content inline: +// - instruction_file → points to instructions.md in the same directory +// - skill_dir → points to the skills/ subdirectory +// - tools_file → points to a tools definition file (optional) +type agentConfigMetadata struct { + Name string `yaml:"name"` + Model string `yaml:"model,omitempty"` + InstructionFile string `yaml:"instruction_file,omitempty"` + SkillDir string `yaml:"skill_dir,omitempty"` + ToolsFile string `yaml:"tools_file,omitempty"` } // saveBaselineConfig writes the agent's current configuration to -// /.agent_optimization/baseline/config.json before optimization begins. -func saveBaselineConfig(agentProject, skillDir string, req *optimize_api.OptimizeRequest) error { - baseDir := filepath.Join(agentProject, optimizationDir, "baseline") +// /.agent_configs/baseline/ before optimization begins. +// It creates metadata.yaml with file pointers and writes instructions.md. +// The skill_dir in metadata.yaml points to the original skills directory +// via a relative path rather than copying the files. +func saveBaselineConfig(agentProject, skillDir, toolsFile string, req *optimize_api.OptimizeRequest) error { + baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") if err := os.MkdirAll(baseDir, 0750); err != nil { return fmt.Errorf("creating baseline directory: %w", err) } - cfg := baselineConfig{ - Instructions: req.Agent.SystemPrompt, - Model: req.Agent.Model, - Name: req.Agent.AgentName, - SkillDir: skillDir, + meta := agentConfigMetadata{ + Name: req.Agent.AgentName, + Model: req.Agent.Model, + } + + // Write instructions.md if the agent has a system prompt. + if req.Agent.SystemPrompt != "" { + instructionPath := filepath.Join(baseDir, "instructions.md") + if err := os.WriteFile(instructionPath, []byte(req.Agent.SystemPrompt), 0600); err != nil { + return fmt.Errorf("writing baseline instructions: %w", err) + } + meta.InstructionFile = "instructions.md" + } + + // Point to the original skill directory via a relative path. + if skillDir != "" { + if rel, err := filepath.Rel(baseDir, skillDir); err == nil { + meta.SkillDir = filepath.ToSlash(rel) + } else { + meta.SkillDir = skillDir + } + } + + // Point to the tools definition file via a relative path. + if toolsFile != "" { + if rel, err := filepath.Rel(baseDir, toolsFile); err == nil { + meta.ToolsFile = filepath.ToSlash(rel) + } else { + meta.ToolsFile = toolsFile + } } - data, err := json.MarshalIndent(cfg, "", " ") + // Write metadata.yaml. + data, err := yaml.Marshal(meta) if err != nil { - return fmt.Errorf("serializing baseline config: %w", err) + return fmt.Errorf("serializing baseline metadata: %w", err) } - configPath := filepath.Join(baseDir, "config.json") - if err := os.WriteFile(configPath, data, 0600); err != nil { - return fmt.Errorf("writing baseline config: %w", err) + metaPath := filepath.Join(baseDir, "metadata.yaml") + if err := os.WriteFile(metaPath, data, 0600); err != nil { + return fmt.Errorf("writing baseline metadata: %w", err) } return nil } -// loadBaselineConfig reads the baseline config from -// /.agent_optimization/baseline/config.json. -func loadBaselineConfig(agentProject string) (*baselineConfig, error) { - configPath := filepath.Join(agentProject, optimizationDir, "baseline", "config.json") - data, err := os.ReadFile(configPath) //nolint:gosec // path derived from project directory +// loadBaselineConfig reads the baseline metadata.yaml from +// /.agent_configs/baseline/metadata.yaml and resolves +// file pointers to absolute paths. +func loadBaselineConfig(agentProject string) (*agentConfigMetadata, error) { + baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") + metaPath := filepath.Join(baseDir, "metadata.yaml") + data, err := os.ReadFile(metaPath) //nolint:gosec // path derived from project directory if err != nil { return nil, err } - var cfg baselineConfig - if err := json.Unmarshal(data, &cfg); err != nil { - return nil, fmt.Errorf("parsing baseline config: %w", err) + var meta agentConfigMetadata + if err := yaml.Unmarshal(data, &meta); err != nil { + return nil, fmt.Errorf("parsing baseline metadata: %w", err) + } + return &meta, nil +} + +// writeBaselineFromEvalInit creates a baseline config from eval init context. +// It writes metadata.yaml and instructions.md into .agent_configs/baseline/. +// The skill_dir points to the original skills directory via a relative path. +func writeBaselineFromEvalInit(agentProject, agentName, instruction string) error { + baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") + if err := os.MkdirAll(baseDir, 0750); err != nil { + return fmt.Errorf("creating baseline directory: %w", err) + } + + meta := agentConfigMetadata{ + Name: agentName, + } + + if instruction != "" { + instructionPath := filepath.Join(baseDir, "instructions.md") + if err := os.WriteFile(instructionPath, []byte(instruction), 0600); err != nil { + return fmt.Errorf("writing baseline instructions: %w", err) + } + meta.InstructionFile = "instructions.md" + } + + // Auto-detect skills directory and point to it via a relative path. + for _, candidate := range []string{"skills", "skill"} { + dir := filepath.Join(agentProject, candidate) + if info, err := os.Stat(dir); err == nil && info.IsDir() { + if rel, relErr := filepath.Rel(baseDir, dir); relErr == nil { + meta.SkillDir = filepath.ToSlash(rel) + } else { + meta.SkillDir = dir + } + break + } + } + + data, err := yaml.Marshal(meta) + if err != nil { + return fmt.Errorf("serializing baseline metadata: %w", err) + } + + metaPath := filepath.Join(baseDir, "metadata.yaml") + if err := os.WriteFile(metaPath, data, 0600); err != nil { + return fmt.Errorf("writing baseline metadata: %w", err) + } + + return nil +} + +// resolveInstructions reads the instruction content from the metadata's +// instruction_file, resolved relative to configDir. +func (m *agentConfigMetadata) resolveInstructions(configDir string) string { + if m.InstructionFile == "" { + return "" + } + path := m.InstructionFile + if !filepath.IsAbs(path) { + path = filepath.Join(configDir, path) + } + data, err := os.ReadFile(path) //nolint:gosec // path derived from project directory + if err != nil { + return "" + } + return string(data) +} + +// resolveSkillDir returns the absolute path to the skill directory, +// resolved relative to configDir. Returns empty if not set. +func (m *agentConfigMetadata) resolveSkillDir(configDir string) string { + if m.SkillDir == "" { + return "" + } + if filepath.IsAbs(m.SkillDir) { + return m.SkillDir + } + return filepath.Join(configDir, m.SkillDir) +} + +// writeAgentConfigFromCandidate writes metadata.yaml, instructions.md, skill +// files, and tool definitions for an optimization candidate into the given +// directory. No config.json is written — all content is decomposed into +// individual files with pointers in metadata.yaml. +func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) error { + meta := agentConfigMetadata{} + + // Extract fields from the candidate config map. + m, _ := candidateConfig.(map[string]any) + if m != nil { + if v, exists := m["name"]; exists { + if s, ok := v.(string); ok { + meta.Name = s + } + } + if v, exists := m["agentName"]; exists { + if s, ok := v.(string); ok { + meta.Name = s + } + } + if v, exists := m["model"]; exists { + if s, ok := v.(string); ok { + meta.Model = s + } + } + } + + // Write instructions.md from the candidate's system prompt. + instructions := extractInstructions(candidateConfig) + if instructions != "" { + instructionPath := filepath.Join(candidateDir, "instructions.md") + if err := os.WriteFile(instructionPath, []byte(instructions), 0600); err != nil { + return fmt.Errorf("writing candidate instructions: %w", err) + } + meta.InstructionFile = "instructions.md" + } + + // Write inline skills from the candidate config as individual files. + if m != nil { + if err := writeInlineSkills(candidateDir, m); err != nil { + return fmt.Errorf("writing candidate skills: %w", err) + } + } + + // Set skill_dir pointer if the skills/ dir exists (from inline or downloaded skills). + skillDir := filepath.Join(candidateDir, "skills") + if info, err := os.Stat(skillDir); err == nil && info.IsDir() { + meta.SkillDir = "skills" + } + + // Write tool_definitions as a JSON file. + if m != nil { + if err := writeToolDefinitions(candidateDir, m); err != nil { + return fmt.Errorf("writing candidate tool definitions: %w", err) + } + if _, err := os.Stat(filepath.Join(candidateDir, "tools.json")); err == nil { + meta.ToolsFile = "tools.json" + } + } + + // Write metadata.yaml. + data, err := yaml.Marshal(meta) + if err != nil { + return fmt.Errorf("serializing candidate metadata: %w", err) + } + metaPath := filepath.Join(candidateDir, "metadata.yaml") + if err := os.WriteFile(metaPath, data, 0600); err != nil { + return fmt.Errorf("writing candidate metadata: %w", err) } - return &cfg, nil + + return nil +} + +// writeInlineSkills extracts the "skills" array from a candidate config and +// writes each skill as skills//SKILL.md. Each file contains a YAML +// front-matter header with the skill name and description, followed by the +// skill body. +func writeInlineSkills(candidateDir string, config map[string]any) error { + skillsRaw, exists := config["skills"] + if !exists { + return nil + } + skills, ok := skillsRaw.([]any) + if !ok || len(skills) == 0 { + return nil + } + + for _, s := range skills { + sm, ok := s.(map[string]any) + if !ok { + continue + } + name, _ := sm["name"].(string) + if name == "" { + continue + } + body, _ := sm["body"].(string) + description, _ := sm["description"].(string) + + skillSubDir := filepath.Join(candidateDir, "skills", name) + if err := os.MkdirAll(skillSubDir, 0750); err != nil { + return fmt.Errorf("creating skill directory %s: %w", name, err) + } + + // Build the skill file content with YAML front-matter. + var content strings.Builder + content.WriteString("---\n") + content.WriteString(fmt.Sprintf("name: %s\n", name)) + if description != "" { + content.WriteString(fmt.Sprintf("description: %s\n", description)) + } + content.WriteString("---\n") + if body != "" { + content.WriteString(body) + if !strings.HasSuffix(body, "\n") { + content.WriteString("\n") + } + } + + filePath := filepath.Join(skillSubDir, "SKILL.md") + if err := os.WriteFile(filePath, []byte(content.String()), 0600); err != nil { + return fmt.Errorf("writing skill %s: %w", name, err) + } + } + return nil +} + +// writeToolDefinitions extracts the "tool_definitions" field from a candidate +// config and writes it as tools.json. +func writeToolDefinitions(candidateDir string, config map[string]any) error { + toolsRaw, exists := config["tool_definitions"] + if !exists { + return nil + } + + data, err := json.MarshalIndent(toolsRaw, "", " ") + if err != nil { + return fmt.Errorf("serializing tool definitions: %w", err) + } + + return os.WriteFile(filepath.Join(candidateDir, "tools.json"), data, 0600) } // downloadSkillFilesToDir fetches the candidate manifest, downloads all skill @@ -321,12 +576,15 @@ func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateCon return } + baseDir := filepath.Join(serviceDir, agentConfigsDir, "baseline") baseline, err := loadBaselineConfig(serviceDir) - if err != nil || baseline.Instructions == "" { + if err != nil { + return + } + baselineText := baseline.resolveInstructions(baseDir) + if baselineText == "" { return } - - baselineText := baseline.Instructions baselineLines := strings.Split(baselineText, "\n") optimizedLines := strings.Split(optimized, "\n") @@ -347,8 +605,8 @@ func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateCon printPreviewLines(out, optimizedLines, "+ ", added) // Suggest command to see the full diff. - baselinePath := filepath.Join(optimizationDir, "baseline", "config.json") - candidatePath := filepath.Join(optimizationDir, candidateID, "config.json") + baselinePath := filepath.Join(agentConfigsDir, "baseline", "instructions.md") + candidatePath := filepath.Join(agentConfigsDir, candidateID, "instructions.md") fmt.Fprintf(out, "\n To see the full diff:\n") fmt.Fprintf(out, " %s\n", color.CyanString("diff %s %s", baselinePath, candidatePath)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 940f0ab1031..5b187ce6863 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -201,6 +201,16 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi req.Agent.Skills = skills } + // Load tool definitions if a tools file is specified. + // TODO: re-enable when tools optimization is supported in the service. + // if c.Agent.ToolsFile != "" { + // tools, err := loadToolDefinitions(c.Agent.ToolsFile) + // if err != nil { + // return nil, fmt.Errorf("loading tool definitions from %s: %w", c.Agent.ToolsFile, err) + // } + // req.Agent.ToolDefinitions = tools + // } + return req, nil } @@ -349,3 +359,18 @@ func splitFrontmatter(content string) (string, string) { // No closing delimiter found — treat entire content as body. return "", content } + +// loadToolDefinitions reads a JSON file containing an array of OpenAI-format +// function tool definitions and returns them as ToolDefinition structs. +func loadToolDefinitions(path string) ([]optimize_api.ToolDefinition, error) { + data, err := os.ReadFile(path) //nolint:gosec // user-provided path validated earlier + if err != nil { + return nil, fmt.Errorf("reading tool definitions file: %w", err) + } + + var tools []optimize_api.ToolDefinition + if err := json.Unmarshal(data, &tools); err != nil { + return nil, fmt.Errorf("parsing tool definitions: %w", err) + } + return tools, nil +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index a6d0006c80d..6a11442a2fc 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -6,11 +6,13 @@ package cmd import ( "context" "fmt" + "io" "os" "strings" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" "github.com/spf13/cobra" ) @@ -113,3 +115,26 @@ func loadLastOptimizeJobID(ctx context.Context) string { } return resp.Value } + +// printOptimizePortalLink prints the Foundry portal URL for an optimization job. +// Best-effort — silently skips if the portal prefix cannot be resolved. +func printOptimizePortalLink(ctx context.Context, out io.Writer, agentName, operationID string) { + azdClient, err := azdext.NewAzdClient() + if err != nil { + return + } + defer azdClient.Close() + + envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if err != nil || envResp == nil { + return + } + + prefix := resolvePortalPrefix(ctx, azdClient, envResp.Environment.Name) + if prefix == nil { + return + } + + url := prefix.OptimizationURL(agentName, operationID) + fmt.Fprintf(out, " Portal: %s\n", color.CyanString(url)) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index e56395ad824..d6e4e62b7d4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -97,6 +97,7 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR "type": "object", "properties": map[string]any{ "query": map[string]any{"type": "string"}, + //"ground_truth": map[string]any{"type": "string"}, }, }, }, @@ -115,7 +116,8 @@ func (c *EvalConfig) ToAgentTargetAdaptableEvalGroupRequest() *CreateOpenAIEvalR EvaluatorName: evaluator.Name, DataMapping: map[string]string{ //"messages": "{{item.messages}}", - "query": "{{item.query}}", + "query": "{{item.query}}", + //"ground_truth": "{{item.ground_truth}}", "response": "{{sample.output_items}}", "tool_calls": "{{sample.tool_calls}}", "tool_definitions": "{{sample.tool_definitions}}", diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go index 85b52321e82..765c3f551c7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go @@ -62,6 +62,13 @@ func (p *PortalPrefix) DatasetURL(datasetName, version string) string { return fmt.Sprintf("%s/build/data/datasets/%s/%s", p.prefix, datasetName, version) } +// OptimizationURL returns the portal URL for an optimization job. +func (p *PortalPrefix) OptimizationURL(agentName, operationID string) string { + optimizePrefix := strings.Replace(p.prefix, "https://ai.azure.com", "https://eastus2euap.ai.azure.com", 1) + return fmt.Sprintf("%s/build/agents/%s/optimization/%s?flight=enable_faos_read_ui", + optimizePrefix, agentName, operationID) +} + // encodeSubscriptionForURL encodes a subscription ID GUID as base64 without padding. func encodeSubscriptionForURL(subscriptionID string) (string, error) { guid, err := uuid.Parse(subscriptionID) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 15083472993..930936b9e43 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -142,9 +142,72 @@ type AgentRef struct { Name string `yaml:"name"` Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` Version string `yaml:"version,omitempty"` + ConfigFile string `yaml:"config,omitempty"` Model string `yaml:"model,omitempty"` Instruction InstructionRef `yaml:"instruction,omitempty"` SkillDir string `yaml:"skill_dir,omitempty"` + ToolsFile string `yaml:"tools_file,omitempty"` +} + +// ResolveFromConfig loads the metadata.yaml pointed to by ConfigFile and fills +// in empty fields (Model, Instruction, SkillDir). Relative paths are resolved +// against projectDir. File pointers inside metadata.yaml (instruction_file, +// skill_dir) are resolved relative to the directory containing the config file. +// Returns the absolute directory containing the config file, or empty string +// if ConfigFile is not set. +func (a *AgentRef) ResolveFromConfig(projectDir string) string { + if a.ConfigFile == "" { + return "" + } + + configPath := a.ConfigFile + if !filepath.IsAbs(configPath) { + configPath = filepath.Join(projectDir, configPath) + } + configDir := filepath.Dir(configPath) + + data, err := os.ReadFile(configPath) //nolint:gosec // path from project config + if err != nil { + return configDir + } + + var meta struct { + Name string `yaml:"name"` + Model string `yaml:"model"` + InstructionFile string `yaml:"instruction_file"` + SkillDir string `yaml:"skill_dir"` + ToolsFile string `yaml:"tools_file"` + } + if err := yaml.Unmarshal(data, &meta); err != nil { + return configDir + } + + if a.Model == "" && meta.Model != "" { + a.Model = meta.Model + } + if a.Instruction.IsEmpty() && meta.InstructionFile != "" { + instrPath := meta.InstructionFile + if !filepath.IsAbs(instrPath) { + instrPath = filepath.Join(configDir, instrPath) + } + a.Instruction.File = instrPath + } + if a.SkillDir == "" && meta.SkillDir != "" { + skillDir := meta.SkillDir + if !filepath.IsAbs(skillDir) { + skillDir = filepath.Join(configDir, skillDir) + } + a.SkillDir = skillDir + } + if a.ToolsFile == "" && meta.ToolsFile != "" { + toolsFile := meta.ToolsFile + if !filepath.IsAbs(toolsFile) { + toolsFile = filepath.Join(configDir, toolsFile) + } + a.ToolsFile = toolsFile + } + + return configDir } // ResolvedSystemPrompt returns the resolved instruction text. @@ -265,9 +328,9 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { if o.MaxIterations <= 0 { o.MaxIterations = 4 } - if o.Budget <= 0 { - o.Budget = 100 - } + // if o.Budget <= 0 { + // o.Budget = 100 + // } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 7cbbf4d01cb..830b34a8659 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -49,6 +49,7 @@ type AgentDefinition struct { Model string `json:"model,omitempty"` SystemPrompt string `json:"systemPrompt,omitempty"` Skills []SkillDefinition `json:"skills,omitempty"` + ToolDefinitions []ToolDefinition `json:"tools,omitempty"` } // SkillDefinition describes a skill attached to an agent. @@ -58,6 +59,22 @@ type SkillDefinition struct { Body string `json:"body,omitempty"` } +// ToolDefinition is an OpenAI-format function tool definition. +// The optimizer may mutate the function's description and per-parameter +// descriptions; schema fields (name, types, required) are immutable. +type ToolDefinition struct { + Type string `json:"type"` + Function ToolFunction `json:"function"` +} + +// ToolFunction is the inner function definition of a ToolDefinition. +type ToolFunction struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Parameters map[string]any `json:"parameters,omitempty"` + Strict *bool `json:"strict,omitempty"` +} + // DatasetTask is a single task in an inline dataset. type DatasetTask struct { Name string `json:"name,omitempty"` diff --git a/cli/azd/extensions/azure.ai.agents/version.txt b/cli/azd/extensions/azure.ai.agents/version.txt index 0a97711d3d4..5cc273dc597 100644 --- a/cli/azd/extensions/azure.ai.agents/version.txt +++ b/cli/azd/extensions/azure.ai.agents/version.txt @@ -1 +1 @@ -0.1.32-optbugbash-preview +0.1.33-optbugbash-preview diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index d73bcc49147..5a068cb6ad8 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -4089,6 +4089,82 @@ "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-windows-arm64.zip" } } + }, + { + "version": "0.1.33-optbugbash-preview", + "requiredAzdVersion": "\u003e1.23.13", + "capabilities": [ + "custom-commands", + "lifecycle-events", + "mcp-server", + "service-target-provider", + "metadata" + ], + "providers": [ + { + "name": "azure.ai.agent", + "type": "service-target", + "description": "Deploys agents to the Foundry Agent Service" + } + ], + "usage": "azd ai agent \u003ccommand\u003e [options]", + "examples": [ + { + "name": "init", + "description": "Initialize a new AI agent project.", + "usage": "azd ai agent init" + } + ], + "artifacts": { + "darwin/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "1dd0fac923612ef746538c0f08d7fc4fb4dbe35bb63816eb9a1fa055ab2b9b63" + }, + "entryPoint": "azure-ai-agents-darwin-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" + }, + "darwin/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "7fa4effc01a82fcbe713ccaf62438df75e994f4886ceea262392668fd708114b" + }, + "entryPoint": "azure-ai-agents-darwin-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" + }, + "linux/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "5abb54644fbf59e5dc865cdb8f8e54d3ad482c17fc21920b186a9a6b479482f7" + }, + "entryPoint": "azure-ai-agents-linux-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" + }, + "linux/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "8c24590181ee39b07f65b27e6e02155f6dad79a11f31e84e72056d4fb3bd572e" + }, + "entryPoint": "azure-ai-agents-linux-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" + }, + "windows/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "36ac3ab724e60bd0d30671a756acd69d97b5fae7b14d6a7b14e42f408e4626c7" + }, + "entryPoint": "azure-ai-agents-windows-amd64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-windows-amd64.zip" + }, + "windows/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "16fa8ce21c28642b8086aaa7c39fc71d1f2dfbf610c3fee9eab7699c8a26d4f9" + }, + "entryPoint": "azure-ai-agents-windows-arm64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-windows-arm64.zip" + } + } } ] }, From 6ab351229e893249f6af9e37e4ba97d832400f32 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 02:04:26 -0700 Subject: [PATCH 21/33] reorganize --- .../azd_observability_bugbash.md | 25 +- .../azure.ai.agents/internal/cmd/eval.go | 129 +--- .../internal/cmd/eval_helpers.go | 258 +++++++ .../internal/cmd/eval_helpers_test.go | 210 ++++++ .../azure.ai.agents/internal/cmd/eval_init.go | 310 ++++---- .../internal/cmd/eval_init_jobs.go | 53 +- .../internal/cmd/eval_init_prompts.go | 49 +- .../internal/cmd/eval_init_test.go | 108 ++- .../azure.ai.agents/internal/cmd/eval_list.go | 41 +- .../internal/cmd/eval_progress.go | 5 + .../internal/cmd/eval_progress_test.go | 38 + .../azure.ai.agents/internal/cmd/eval_run.go | 63 +- .../internal/cmd/eval_run_test.go | 33 +- .../azure.ai.agents/internal/cmd/eval_show.go | 17 +- .../internal/cmd/eval_show_test.go | 124 ++++ .../azure.ai.agents/internal/cmd/eval_test.go | 54 +- .../internal/cmd/eval_update.go | 13 +- .../azure.ai.agents/internal/cmd/optimize.go | 679 +++++------------- .../internal/cmd/optimize_apply.go | 219 +----- .../internal/cmd/optimize_apply_test.go | 255 +++++++ .../internal/cmd/optimize_cancel.go | 4 + .../internal/cmd/optimize_config.go | 74 +- .../internal/cmd/optimize_deploy.go | 5 + .../internal/cmd/optimize_deploy_test.go | 45 ++ .../internal/cmd/optimize_helpers.go | 29 +- .../internal/cmd/optimize_list.go | 8 +- .../internal/cmd/optimize_prompts.go | 446 ++++++++++++ .../internal/cmd/optimize_status.go | 8 +- .../internal/cmd/optimize_test.go | 23 +- .../internal/pkg/agents/eval_api/artifacts.go | 19 +- .../pkg/agents/eval_api/generation.go | 12 +- .../internal/pkg/agents/opteval/state.go | 98 +++ .../internal/pkg/agents/opteval/yaml.go | 115 ++- .../pkg/agents/optimize_api/client.go | 5 +- .../pkg/agents/optimize_api/models.go | 3 + .../pkg/agents/optimize_api/poller.go | 2 + 36 files changed, 2307 insertions(+), 1272 deletions(-) create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index 9cbe09c0673..c9dac783ae0 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -7,25 +7,13 @@ Prerequisites: [azd CLI](https://aka.ms/azd), [Go](https://go.dev/dl/), `az login` ```bash +# Installing private registry for bugbash azd ext install microsoft.azd.extensions -git clone https://github.com/Zyysurely/azure-dev.git -cd azure-dev/cli/azd/extensions/azure.ai.agents -git checkout zyying/opt_eval -azd x build -``` - -After building, register the extension and overlay the custom binary: - -```powershell -# Windows (PowerShell) -azd ext install azure.ai.agents -copy bin\azure-ai-agents-windows-amd64.exe $env:USERPROFILE\.azd\extensions\azure.ai.agents\ -Force -``` +azd ext source add --name zyysurely --type url --location https://raw.githubusercontent.com/Zyysurely/azure-dev/zyying/opt_eval/cli/azd/extensions/registry.json +azd ext install azure.ai.agents --source zyysurely --version 0.1.33-optbugbash-preview --force -```bash -# macOS / Linux -azd ext install azure.ai.agents -cp bin/azure-ai-agents-$(uname -s | tr A-Z a-z)-* ~/.azd/extensions/azure.ai.agents/ +# If then you want to switch back to the official version, use +azd ext install azure.ai.agents --force ``` Verify:`azd ai agent eval --help` and `azd ai agent optimize --help` @@ -42,7 +30,8 @@ Navigate to a fresh directory outside the extension repo, init the agent and poi ```bash mkdir bugbash-azd- && cd bugbash-azd- -azd init -t https://github.com/zyysurely/sample_agent . +git clone https://github.com/ai-platform-microsoft/foundry-observability-playground.git +cd .\foundry-observability-playground\demos\build2026\agents\travel-approver\ azd ai agent init --project-id /subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/providers/Microsoft.CognitiveServices/accounts/azd-bugbash-0514/projects/bugbash-westus2 # Customize your agent name and model deployment ``` diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 335c4f353a4..9c378454b1c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -1,6 +1,13 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval.go implements the top-level "eval" command group and shared context +// resolution logic used by all eval subcommands (init, run, update, list, show). +// +// The evalResolvedContext struct holds the resolved agent, project, and +// endpoint information. It is built from azd project state, environment +// variables, or interactive prompts, and threaded through all subcommands. + package cmd import ( @@ -26,62 +33,48 @@ import ( "go.yaml.in/yaml/v3" ) +// Default values for eval configuration. const ( defaultEvalConfigName = "eval.yaml" defaultEvalName = "smoke-core" - defaultEvalModel = "gpt-4o" defaultEvalSamples = 15 ) +// Type aliases to avoid repeating full package paths throughout the eval code. type evalConfig = eval_api.EvalConfig type evalAgentRef = opteval.AgentRef type evalDatasetRef = opteval.DatasetRef -// evalState holds transient runtime state stored in the azd environment. -type evalState struct { - InitStatus string - DatasetGenOpID string - DatasetGenStatus string - EvalGenOpID string - EvalGenStatus string - EvalID string -} - -// Azd environment keys for eval state. -const ( - evalKeyInitStatus = "LAST_EVAL_INIT_STATUS" - evalKeyDatasetGenOpID = "LAST_EVAL_DATASET_GEN_OP_ID" - evalKeyDatasetGenStatus = "LAST_EVAL_DATASET_GEN_STATUS" - evalKeyEvalGenOpID = "LAST_EVAL_GEN_OP_ID" - evalKeyEvalGenStatus = "LAST_EVAL_GEN_STATUS" - evalKeyEvalID = "LAST_EVAL_ID" -) - +// evalResolvedContext holds the fully-resolved context for an eval operation, +// including the azd client, API clients, project paths, and agent metadata. +// Built by resolveEvalContext from azd project state, environment variables, +// or interactive prompts. type evalResolvedContext struct { azdClient *azdext.AzdClient evalClient *eval_api.EvalClient datasetClient *dataset_api.DatasetClient - projectRoot string - hasProject bool - agentProject string - agentProjectSource string - agentName string - agentNameSource string - version string - versionSource string - agentKind agent_yaml.AgentKind - agentKindSource string - serviceName string - projectEndpoint string - projectEndpointSource string - envName string + projectRoot string // azd project root directory + hasProject bool // true if running within an azd project + agentProject string // agent service directory + agentProjectSource string // how agentProject was resolved + agentName string // deployed agent name + agentNameSource string // how agentName was resolved + version string // agent version + versionSource string // how version was resolved + agentKind agent_yaml.AgentKind // hosted or prompt + agentKindSource string // how agentKind was resolved + serviceName string // azure.yaml service name + projectEndpoint string // Foundry project endpoint URL + projectEndpointSource string // how projectEndpoint was resolved + envName string // azd environment name } +// evalContextOptions configures the behavior of resolveEvalContext. type evalContextOptions struct { - agent string - projectEndpoint string - requireAgent bool - noPrompt bool + agent string // explicit agent name (from --agent flag) + projectEndpoint string // explicit project endpoint (from --project-endpoint flag) + requireAgent bool // fail if agent name cannot be resolved + noPrompt bool // skip interactive prompts } func newEvalCommand(extCtx *azdext.ExtensionContext) *cobra.Command { @@ -103,6 +96,7 @@ Use eval init to generate an eval config, then eval run to execute it.`, return cmd } +// resolveEvalContext resolves the context for an eval operation by reading azd project state, environment variables, and optionally prompting the user. It returns an evalResolvedContext with API clients and metadata needed to run eval commands. func resolveEvalContext(ctx context.Context, options evalContextOptions) (*evalResolvedContext, error) { fmt.Println(output.WithGrayFormat("Resolving eval context...")) @@ -428,6 +422,7 @@ func endpointFromProjectID(projectID string) (string, error) { return buildAgentEndpoint(project.AccountName, project.ProjectName), nil } +// pollEvalOperationWithSpinner polls a long-running eval operation with a spinner, updating the provided evalProgress with status. It returns the completed job or an error if the operation failed or timed out. func pollEvalOperationWithSpinner( ctx context.Context, label string, @@ -469,62 +464,6 @@ func pollEvalOperationWithSpinner( return job, nil } -// loadEvalState reads eval runtime state from the azd environment. -// Returns an empty state if no values are set. -func loadEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) *evalState { - get := func(key string) string { - v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ - EnvName: envName, Key: key, - }) - if err != nil || v.Value == "" { - return "" - } - return v.Value - } - return &evalState{ - InitStatus: get(evalKeyInitStatus), - DatasetGenOpID: get(evalKeyDatasetGenOpID), - DatasetGenStatus: get(evalKeyDatasetGenStatus), - EvalGenOpID: get(evalKeyEvalGenOpID), - EvalGenStatus: get(evalKeyEvalGenStatus), - EvalID: get(evalKeyEvalID), - } -} - -// saveEvalState persists eval runtime state to the azd environment. -func saveEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string, state *evalState) error { - pairs := []struct { - key, val string - }{ - {evalKeyInitStatus, state.InitStatus}, - {evalKeyDatasetGenOpID, state.DatasetGenOpID}, - {evalKeyDatasetGenStatus, state.DatasetGenStatus}, - {evalKeyEvalGenOpID, state.EvalGenOpID}, - {evalKeyEvalGenStatus, state.EvalGenStatus}, - {evalKeyEvalID, state.EvalID}, - } - for _, p := range pairs { - if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ - EnvName: envName, Key: p.key, Value: p.val, - }); err != nil { - return fmt.Errorf("setting %s in azd env: %w", p.key, err) - } - } - return nil -} - -// clearEvalState removes eval state keys from the azd environment. -func clearEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) { - for _, key := range []string{ - evalKeyInitStatus, evalKeyDatasetGenOpID, evalKeyDatasetGenStatus, - evalKeyEvalGenOpID, evalKeyEvalGenStatus, evalKeyEvalID, - } { - _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ - EnvName: envName, Key: key, Value: "", - }) - } -} - func relPathForYaml(baseDir string, target string) string { if rel, err := filepath.Rel(baseDir, target); err == nil { return filepath.ToSlash(rel) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index bb59378fdd5..e6747f4caf0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -1,15 +1,28 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_helpers.go provides shared utility functions used by both eval and +// optimize commands, including portal URL construction and path display +// helpers. + package cmd import ( + "bufio" "context" + "encoding/json" + "fmt" + "io" "log" + "os" + "path/filepath" "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" + "go.yaml.in/yaml/v3" ) // resolvePortalPrefix reads AZURE_AI_PROJECT_ID from the azd environment and @@ -47,3 +60,248 @@ func buildEvalReportURL(ctx context.Context, azdClient *azdext.AzdClient, envNam } return prefix.EvalRunURL(evalID, runID) } + +// printPortalLink resolves the portal prefix and prints a portal URL. +// The buildURL callback receives the resolved prefix and returns the full URL. +// Best-effort — silently skips on any failure. +func printPortalLink(ctx context.Context, out io.Writer, azdClient *azdext.AzdClient, envName string, buildURL func(*eval_api.PortalPrefix) string) { + prefix := resolvePortalPrefix(ctx, azdClient, envName) + if prefix == nil { + return + } + fmt.Fprintf(out, " Portal: %s\n", color.CyanString(buildURL(prefix))) +} + +// relativeDisplay returns a project-relative path for display purposes. +// Used by both eval and optimize config confirmation prompts. +// Returns empty string for empty input. +func relativeDisplay(absPath, projectDir string) string { + if absPath == "" || projectDir == "" { + return absPath + } + if rel, err := filepath.Rel(projectDir, absPath); err == nil { + return rel + } + return absPath +} + +// reconcileConfigAgentName reconciles the agent name in a config with the +// environment-resolved name. Environment takes precedence. Returns true if +// the config was changed. Used by both eval run and optimize. +func reconcileConfigAgentName(agent *opteval.AgentRef, envName, configSource string) bool { + if envName == "" || agent.Name == "" || agent.Name == envName { + if envName != "" && agent.Name == "" { + agent.Name = envName + } + return false + } + fmt.Printf(" %s agent name in %s (%q) differs from environment (%q) — using environment value\n", + color.YellowString("warning:"), configSource, agent.Name, envName) + agent.Name = envName + return true +} + +// resolveAgentConfig resolves agent configuration from config metadata +// using a priority chain: +// +// 1. existingConfig's agent.config path — if the config references a +// metadata.yaml, resolve all fields from it. +// 2. Default baseline path — try .agent_configs/baseline/metadata.yaml. +// 3. Nothing found — returns nil; the caller should prompt the user +// for an instruction and then call writeBaselineIfNeeded. +// +// The returned AgentConfig contains resolved instruction file path, model, +// skill_dir, and tools_file. Eval init uses only instruction fields; +// optimize also uses skill_dir and tools_file. +func resolveAgentConfig( + existingConfig *opteval.Config, + projectDir string, +) *opteval.AgentConfig { + // Step 1: existing config has a config pointer — resolve from it. + if existingConfig != nil && existingConfig.Agent.ConfigFile != "" { + ref := opteval.AgentRef{ConfigFile: existingConfig.Agent.ConfigFile} + return ref.ResolveConfig(projectDir) + } + + // Step 2: try the default baseline path. + if projectDir != "" { + relPath := opteval.BaselineConfigRelPath() + if fileExists(filepath.Join(projectDir, relPath)) { + ref := opteval.AgentRef{ConfigFile: relPath} + return ref.ResolveConfig(projectDir) + } + } + + // Step 3: nothing found — caller should prompt and write baseline. + return nil +} + +// writeBaselineIfNeeded creates a baseline config when no config was resolved +// but an instruction is available. Returns the config file relative path +// (empty if nothing was written). +func writeBaselineIfNeeded( + projectDir, instruction string, +) string { + if projectDir == "" || instruction == "" { + return "" + } + defaultConfigFile := opteval.BaselineConfigRelPath() + absConfigFile := filepath.Join(projectDir, defaultConfigFile) + // Don't overwrite an existing baseline. + if fileExists(absConfigFile) { + return "" + } + if err := writeBaselineConfig(projectDir, baselineParams{ + Instruction: instruction, + }); err != nil { + fmt.Printf(" warning: failed to write baseline config: %s\n", err) + return "" + } + fmt.Printf(" Baseline: %s\n", absConfigFile) + return defaultConfigFile +} + +// baselineParams holds optional inputs for writing a baseline agent config. +type baselineParams struct { + Model string // agent model (optional) + Instruction string // system prompt text (optional) + SkillDir string // absolute skill dir path (empty = auto-detect) + ToolsFile string // absolute tools file path (optional) +} + +// writeBaselineConfig writes a baseline agent config to .agent_configs/baseline/. +// It creates metadata.yaml with file pointers and writes instructions.md. +// When skillDir is empty, it auto-detects a "skills" or "skill" directory. +// Used by both eval init and optimize. +func writeBaselineConfig(agentProject string, p baselineParams) error { + baseDir := filepath.Join(agentProject, opteval.AgentConfigsDir, opteval.BaselineDir) + if err := os.MkdirAll(baseDir, 0750); err != nil { + return fmt.Errorf("creating baseline directory: %w", err) + } + + meta := struct { + Model string `yaml:"model,omitempty"` + InstructionFile string `yaml:"instruction_file,omitempty"` + SkillDir string `yaml:"skill_dir,omitempty"` + ToolsFile string `yaml:"tools_file,omitempty"` + }{ + Model: p.Model, + } + + if p.Instruction != "" { + instructionPath := filepath.Join(baseDir, opteval.InstructionFile) + if err := os.WriteFile(instructionPath, []byte(p.Instruction), 0600); err != nil { + return fmt.Errorf("writing baseline instructions: %w", err) + } + meta.InstructionFile = opteval.InstructionFile + } + + // Resolve skill_dir: use explicit path, or auto-detect from project. + skillDir := p.SkillDir + if skillDir == "" { + for _, candidate := range []string{"skills", "skill"} { + dir := filepath.Join(agentProject, candidate) + if info, err := os.Stat(dir); err == nil && info.IsDir() { + skillDir = dir + break + } + } + } + if skillDir != "" { + if rel, err := filepath.Rel(baseDir, skillDir); err == nil { + meta.SkillDir = filepath.ToSlash(rel) + } else { + meta.SkillDir = skillDir + } + } + + if p.ToolsFile != "" { + if rel, err := filepath.Rel(baseDir, p.ToolsFile); err == nil { + meta.ToolsFile = filepath.ToSlash(rel) + } else { + meta.ToolsFile = p.ToolsFile + } + } + + data, err := yaml.Marshal(meta) + if err != nil { + return fmt.Errorf("serializing baseline metadata: %w", err) + } + + metaPath := filepath.Join(baseDir, opteval.MetadataFile) + if err := os.WriteFile(metaPath, data, 0600); err != nil { + return fmt.Errorf("writing baseline metadata: %w", err) + } + + return nil +} + +// loadJSONLFile reads a JSONL file and unmarshals each non-empty line into T. +// Returns an error if the file cannot be read, a line fails to parse, or no items are found. +func loadJSONLFile[T any](path string) ([]T, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) + } + defer f.Close() + + var items []T + scanner := bufio.NewScanner(f) + lineNum := 0 + for scanner.Scan() { + lineNum++ + line := scanner.Text() + if line == "" { + continue + } + var item T + if err := json.Unmarshal([]byte(line), &item); err != nil { + return nil, fmt.Errorf("failed to parse dataset line %d: %w", lineNum, err) + } + items = append(items, item) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading dataset file %s: %w", path, err) + } + if len(items) == 0 { + return nil, fmt.Errorf("dataset file %s contains no items", path) + } + return items, nil +} + +// statusLabelAndColor maps a raw status to a display label and color function. +func statusLabelAndColor(status string) (string, func(string, ...any) string) { + switch status { + case "completed": + return "Completed", color.GreenString + case "succeeded": + return "Succeeded", color.GreenString + case "failed": + return "Failed", color.RedString + case "cancelled", "canceled": + return "Cancelled", color.YellowString + case "running", "in_progress": + return "Running", color.CyanString + case "partial": + return "Partial", color.YellowString + case "": + return "No runs", color.HiBlackString + default: + return status, fmt.Sprintf + } +} + +// colorizeStatus returns a colorized status string for display. +func colorizeStatus(status string) string { + label, colorFn := statusLabelAndColor(status) + return colorFn(label) +} + +// padColorizedStatus returns a fixed-width colored status string so that +// tabwriter aligns columns correctly despite ANSI escape sequences. +func padColorizedStatus(status string) string { + const statusWidth = 10 // wide enough for "Completed", "Cancelled", etc. + label, colorFn := statusLabelAndColor(status) + padded := fmt.Sprintf("%-*s", statusWidth, label) + return colorFn(padded) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go new file mode 100644 index 00000000000..37df0f88027 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go @@ -0,0 +1,210 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ---- relativeDisplay ---- + +func TestRelativeDisplay(t *testing.T) { + t.Parallel() + tests := []struct { + name string + absPath string + projectDir string + want string + }{ + {"relative path", filepath.Join("/project", "sub", "file.yaml"), "/project", filepath.Join("sub", "file.yaml")}, + {"same dir", filepath.Join("/project", "file.yaml"), "/project", "file.yaml"}, + {"empty absPath", "", "/project", ""}, + {"empty projectDir", "/project/file.yaml", "", "/project/file.yaml"}, + {"both empty", "", "", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := relativeDisplay(tt.absPath, tt.projectDir) + assert.Equal(t, tt.want, got) + }) + } +} + +// ---- reconcileConfigAgentName ---- + +func TestReconcileConfigAgentName(t *testing.T) { + t.Parallel() + t.Run("no change when names match", func(t *testing.T) { + t.Parallel() + agent := &opteval.AgentRef{Name: "my-agent"} + changed := reconcileConfigAgentName(agent, "my-agent", "config.yaml") + assert.False(t, changed) + assert.Equal(t, "my-agent", agent.Name) + }) + + t.Run("sets name when agent name is empty", func(t *testing.T) { + t.Parallel() + agent := &opteval.AgentRef{} + changed := reconcileConfigAgentName(agent, "env-agent", "config.yaml") + assert.False(t, changed) + assert.Equal(t, "env-agent", agent.Name) + }) + + t.Run("overrides when names differ", func(t *testing.T) { + t.Parallel() + agent := &opteval.AgentRef{Name: "config-agent"} + changed := reconcileConfigAgentName(agent, "env-agent", "config.yaml") + assert.True(t, changed) + assert.Equal(t, "env-agent", agent.Name) + }) + + t.Run("no change when envName is empty", func(t *testing.T) { + t.Parallel() + agent := &opteval.AgentRef{Name: "my-agent"} + changed := reconcileConfigAgentName(agent, "", "config.yaml") + assert.False(t, changed) + assert.Equal(t, "my-agent", agent.Name) + }) +} + +// ---- statusLabelAndColor ---- + +func TestStatusLabelAndColor(t *testing.T) { + t.Parallel() + tests := []struct { + status string + wantLabel string + }{ + {"completed", "Completed"}, + {"succeeded", "Succeeded"}, + {"failed", "Failed"}, + {"cancelled", "Cancelled"}, + {"canceled", "Cancelled"}, + {"running", "Running"}, + {"in_progress", "Running"}, + {"partial", "Partial"}, + {"", "No runs"}, + {"unknown_status", "unknown_status"}, + } + for _, tt := range tests { + t.Run(tt.status, func(t *testing.T) { + t.Parallel() + label, colorFn := statusLabelAndColor(tt.status) + assert.Equal(t, tt.wantLabel, label) + assert.NotNil(t, colorFn) + }) + } +} + +func TestColorizeStatus(t *testing.T) { + t.Parallel() + // colorizeStatus should return a non-empty string for any input. + assert.NotEmpty(t, colorizeStatus("completed")) + assert.NotEmpty(t, colorizeStatus("failed")) + assert.NotEmpty(t, colorizeStatus("")) + assert.NotEmpty(t, colorizeStatus("unknown")) +} + +func TestPadColorizedStatus(t *testing.T) { + t.Parallel() + // padColorizedStatus should return a non-empty string for any input. + result := padColorizedStatus("completed") + assert.NotEmpty(t, result) + // The padded string should be longer than the label due to padding + ANSI. + assert.Contains(t, result, "Completed") +} + +// ---- writeBaselineConfig ---- + +func TestWriteBaselineConfig(t *testing.T) { + t.Parallel() + t.Run("writes metadata and instruction file", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + err := writeBaselineConfig(dir, baselineParams{ + Model: "gpt-4o", + Instruction: "You are a helpful assistant.", + }) + require.NoError(t, err) + + metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + assert.FileExists(t, metaPath) + + instrPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.InstructionFile) + assert.FileExists(t, instrPath) + content, err := os.ReadFile(instrPath) + require.NoError(t, err) + assert.Equal(t, "You are a helpful assistant.", string(content)) + }) + + t.Run("writes metadata without instruction", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + err := writeBaselineConfig(dir, baselineParams{ + Model: "gpt-4o", + }) + require.NoError(t, err) + + metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + assert.FileExists(t, metaPath) + + instrPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.InstructionFile) + assert.NoFileExists(t, instrPath) + }) + + t.Run("auto-detects skill dir", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "skills"), 0750)) + + err := writeBaselineConfig(dir, baselineParams{ + Instruction: "test", + }) + require.NoError(t, err) + + metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + data, err := os.ReadFile(metaPath) + require.NoError(t, err) + assert.Contains(t, string(data), "skill_dir") + }) +} + +// ---- writeBaselineIfNeeded ---- + +func TestWriteBaselineIfNeeded(t *testing.T) { + t.Parallel() + t.Run("creates baseline when none exists", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + result := writeBaselineIfNeeded(dir, "test instruction") + assert.NotEmpty(t, result) + assert.FileExists(t, filepath.Join(dir, result)) + }) + + t.Run("skips when baseline already exists", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + // Create existing baseline. + absPath := filepath.Join(dir, opteval.BaselineConfigRelPath()) + require.NoError(t, os.MkdirAll(filepath.Dir(absPath), 0750)) + require.NoError(t, os.WriteFile(absPath, []byte("existing"), 0600)) + + result := writeBaselineIfNeeded(dir, "test instruction") + assert.Empty(t, result) + }) + + t.Run("returns empty for empty inputs", func(t *testing.T) { + t.Parallel() + assert.Empty(t, writeBaselineIfNeeded("", "instruction")) + assert.Empty(t, writeBaselineIfNeeded("/some/dir", "")) + }) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index f7f97a68e43..36db9e10baa 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -1,6 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_init.go implements the "eval init" command, which generates a local +// eval suite (eval.yaml) for a deployed agent. It resolves context, submits +// dataset and evaluator generation jobs, polls for completion (unless +// --no-wait), downloads review artifacts, and writes the eval config. + package cmd import ( @@ -22,33 +27,33 @@ import ( // DataGenerationAPIVersion is the API version used for data generation jobs. const DataGenerationAPIVersion = "v1" -// EvalInitFlags defines the customized flags for the eval init command. +// evalInitFlags holds CLI flags and interactive prompt state for eval init. type evalInitFlags struct { - name string - agent string - projectEndpoint string - instruction string - instructionFile string - configFile string - skillDir string - toolsFile string - evalModel string - dataset string - output string - maxSamples int - evaluators []string - noWait bool - resetDefaults bool - evalModelSet bool - maxSamplesSet bool - traceDays int - // Internal flags set during interactive prompts. + // CLI flags. + name string // eval suite name + agent string // target agent name + projectEndpoint string // Foundry project endpoint + instruction string // inline agent instruction + instructionFile string // path to agent instruction file + configFile string // agent config metadata path + evalModel string // model for evaluation and generation + dataset string // existing dataset file or name + output string // eval config output path + maxSamples int // number of samples to generate + evaluators []string // built-in or custom evaluator names + noWait bool // submit and return immediately + resetDefaults bool // overwrite existing eval config + evalModelSet bool // true if --eval-model was explicitly set + maxSamplesSet bool // true if --max-samples was explicitly set + traceDays int // include traces from last N days + + // Internal state set during interactive prompts. regenerateDataset bool regenerateEvaluator bool } func newEvalInitCommand(extCtx *azdext.ExtensionContext) *cobra.Command { - flags := &evalInitFlags{evalModel: defaultEvalModel, maxSamples: defaultEvalSamples, output: defaultEvalConfigName} + flags := &evalInitFlags{maxSamples: defaultEvalSamples, output: defaultEvalConfigName} cmd := &cobra.Command{ Use: "init", Short: "Generate a local eval suite for a deployed agent.", @@ -78,7 +83,7 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().StringVarP(&flags.projectEndpoint, "project-endpoint", "p", "", "Microsoft Foundry project endpoint URL") cmd.Flags().StringVarP(&flags.instruction, "gen-instruction", "g", "", "Agent instruction used for dataset and evaluator generation") cmd.Flags().StringVarP(&flags.instructionFile, "gen-instruction-file", "G", "", "Path to a file containing the agent instruction") - cmd.Flags().StringVar(&flags.evalModel, "eval-model", defaultEvalModel, "Model used for evaluation and generation, and also as the default model for evaluation") + cmd.Flags().StringVar(&flags.evalModel, "eval-model", "", "Model used for evaluation and generation") cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Number of samples to generate (15-1000)") cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, "Built-in or custom evaluator name") @@ -89,7 +94,9 @@ the agent project root. Use --no-wait to write pending operation IDs and return. return cmd } -// runEvalInit executes the eval init command logic. It resolves context, prompts for missing options, submits generation jobs, polls for completion (unless --no-wait), writes the eval config, and prints next steps. +// runEvalInit executes the eval init command logic. It resolves context, +// prompts for missing options, submits generation jobs, polls for completion +// (unless --no-wait), writes the eval config, and prints next steps. func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error { if flags.instruction != "" && flags.instructionFile != "" { return fmt.Errorf("cannot use both --gen-instruction and --gen-instruction-file; provide one or the other") @@ -123,88 +130,43 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } } - configPath := eval_api.ResolveEvalOutputPath(flags.output, resolved.agentProject) + configPath := eval_api.ResolveRelPath(flags.output, resolved.agentProject) printEvalDetectedContext(resolved, configPath) - // Auto-detect agent config metadata if no instruction was provided. - // This looks for .agent_configs/baseline/metadata.yaml and resolves - // instruction and skill_dir from it. + // Load existing eval.yaml and resolve agent config. + existingCfg, hasExisting := tryLoadExistingEvalConfig(configPath) + isRegenerate := false + + // Resolve agent config: eval.yaml config → default baseline → nothing. if flags.instruction == "" && flags.instructionFile == "" && resolved.hasProject { - defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") - absConfigFile := filepath.Join(resolved.agentProject, defaultConfigFile) - if _, err := os.Stat(absConfigFile); err == nil { - // Found a default config — resolve all fields from it. - var agent opteval.AgentRef - agent.ConfigFile = defaultConfigFile - agent.ResolveFromConfig(resolved.agentProject) - flags.configFile = defaultConfigFile - flags.instructionFile = agent.Instruction.File - flags.instruction = agent.Instruction.Value - flags.skillDir = agent.SkillDir - flags.toolsFile = agent.ToolsFile - fmt.Printf(" Config: %s\n", absConfigFile) + var existing *opteval.Config + if hasExisting && !flags.resetDefaults { + existing = &existingCfg.Config + } + if agentCfg := resolveAgentConfig(existing, resolved.agentProject); agentCfg != nil { + flags.configFile = agentCfg.ConfigFile + flags.instructionFile = agentCfg.InstructionFile + fmt.Printf(" Agent Config: %s\n", filepath.Join(resolved.agentProject, agentCfg.ConfigFile)) } } - // When eval.yaml exists, decide whether to regenerate or create fresh. - existingCfg, hasExisting := tryLoadExistingEvalConfig(configPath) - isRegenerate := false - var builtinEvals opteval.EvaluatorList - + // If --reset-defaults is set, clear existing state so the user can start fresh. if flags.resetDefaults && resolved.envName != "" { - clearEvalState(ctx, resolved.azdClient, resolved.envName) + opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) } + // Handle existing eval.yaml: prompt for regeneration, carry forward options. if hasExisting && !flags.resetDefaults { - if noPrompt { - // --no-prompt: treat as full regeneration. - flags.regenerateDataset = true - flags.regenerateEvaluator = true - } else { - if err := promptRegenerateChoices(ctx, resolved, existingCfg, flags); err != nil { - return err - } - if !flags.regenerateDataset && !flags.regenerateEvaluator { - fmt.Println("Keeping existing eval config unchanged.") - return nil - } - } - isRegenerate = true - - // Carry forward existing options when not explicitly overridden. - if flags.name == "" && existingCfg.Name != "" { - flags.name = existingCfg.Name - } - if existingCfg.Options != nil && !flags.evalModelSet { - flags.evalModel = existingCfg.Options.EvalModel - } - if flags.configFile == "" && existingCfg.Agent.ConfigFile != "" { - flags.configFile = existingCfg.Agent.ConfigFile - // Resolve all fields from the config for generation API calls. - var agentRef opteval.AgentRef - agentRef.ConfigFile = flags.configFile - agentRef.ResolveFromConfig(resolved.agentProject) - if flags.instruction == "" && flags.instructionFile == "" { - flags.instructionFile = agentRef.Instruction.File - flags.instruction = agentRef.Instruction.Value - } - if flags.skillDir == "" { - flags.skillDir = agentRef.SkillDir - } - if flags.toolsFile == "" { - flags.toolsFile = agentRef.ToolsFile - } - } - if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { - flags.maxSamples = existingCfg.MaxSamples - } - if flags.traceDays == 0 && existingCfg.TraceDays > 0 { - flags.traceDays = existingCfg.TraceDays + var keepExisting bool + keepExisting, err = handleExistingEvalConfig(ctx, resolved, existingCfg, flags, noPrompt) + if err != nil { + return err } - // Track builtin evaluators for preservation during evaluator regeneration. - if flags.regenerateEvaluator { - _, builtinEvals = eval_api.SplitEvaluators(existingCfg.Evaluators) + if keepExisting { + fmt.Println("Keeping existing eval config unchanged.") + return nil } + isRegenerate = true } // When the user hasn't explicitly set --eval-model, use the deployed model. @@ -221,33 +183,18 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return err } - // If no baseline config exists yet and we have an instruction, write it - // so that optimize can use it later. - if flags.configFile == "" && resolved.hasProject && - (flags.instruction != "" || flags.instructionFile != "") { - defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") - absConfigFile := filepath.Join(resolved.agentProject, defaultConfigFile) - if _, err := os.Stat(absConfigFile); err != nil { - // Baseline doesn't exist — create it. - instruction := resolvedInstruction(flags) - if writeErr := writeBaselineFromEvalInit( - resolved.agentProject, resolved.agentName, instruction, - ); writeErr != nil { - fmt.Printf(" warning: failed to write baseline config: %s\n", writeErr) - } else { - flags.configFile = defaultConfigFile - fmt.Printf(" Baseline: %s\n", absConfigFile) - } + // Write baseline config if none was resolved but we have an instruction. + if flags.configFile == "" && resolved.hasProject { + instruction := resolvedInstruction(flags) + if cfgFile := writeBaselineIfNeeded(resolved.agentProject, instruction); cfgFile != "" { + flags.configFile = cfgFile } } - // Finalize the eval suite name. On fresh init, add a random suffix to - // avoid collisions. On regeneration, keep the existing name. if !isRegenerate { - flags.name = resolveEvalName(flags) + "-" + randomSuffix() + flags.name = resolveEvalName(flags) } - // Prompt agents use the agent source directly; hosted agents require an instruction. if resolved.agentKind != agent_yaml.AgentKindPrompt && flags.instruction == "" && flags.instructionFile == "" && flags.configFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { @@ -257,15 +204,92 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return fmt.Errorf("--max-samples must be between 15 and 1000") } + // Build config and submit generation jobs. evalCfg := newEvalConfig(flags, resolved) - state := &evalState{} + var extraEvals opteval.EvaluatorList + if !isRegenerate && len(flags.evaluators) > 0 { + extraEvals = evaluatorsFromFlags(flags.evaluators) + } + + state, err := submitEvalJobs(ctx, resolved, flags, evalCfg, existingCfg, isRegenerate) + if err != nil { + return err + } + + if flags.noWait { + if state.DatasetGenOpID != "" || state.EvalGenOpID != "" { + state.InitStatus = opteval.InitStatusPending + } + return writePendingEvalInit(ctx, resolved, configPath, evalCfg, state) + } + + pollRes, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, extraEvals) + if err != nil { + if _, ok := errors.AsType[*initTimeoutError](err); ok { + return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) + } + return err + } + + state.InitStatus = opteval.InitStatusCompleted + opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) + return writeAndPrintEvalResult(ctx, resolved, evalCfg, pollRes, configPath, isRegenerate) +} + +// handleExistingEvalConfig processes an existing eval.yaml by prompting for +// regeneration choices and carrying forward options that weren't overridden. +// Returns keepExisting=true if the user chose not to regenerate anything. +func handleExistingEvalConfig( + ctx context.Context, + resolved *evalResolvedContext, + existingCfg *evalConfig, + flags *evalInitFlags, + noPrompt bool, +) (keepExisting bool, err error) { + if noPrompt { + // --no-prompt: keep existing config unchanged by default. + return true, nil + } + + if err := promptRegenerateChoices(ctx, resolved, existingCfg, flags); err != nil { + return false, err + } + if !flags.regenerateDataset && !flags.regenerateEvaluator { + return true, nil + } + + // Carry forward existing options when not explicitly overridden. + if flags.name == "" && existingCfg.Name != "" { + flags.name = existingCfg.Name + } + if existingCfg.Options != nil && !flags.evalModelSet { + flags.evalModel = existingCfg.Options.EvalModel + } + if !flags.maxSamplesSet && existingCfg.MaxSamples > 0 { + flags.maxSamples = existingCfg.MaxSamples + } + if flags.traceDays == 0 && existingCfg.TraceDays > 0 { + flags.traceDays = existingCfg.TraceDays + } + return false, nil +} + +// submitEvalJobs determines which generation jobs are needed and submits them. +// It preserves existing config fields when regenerating only a subset. +func submitEvalJobs( + ctx context.Context, + resolved *evalResolvedContext, + flags *evalInitFlags, + evalCfg *evalConfig, + existingCfg *evalConfig, + isRegenerate bool, +) (*opteval.EvalState, error) { + state := &opteval.EvalState{} - // Determine which generation jobs to submit. var needDatasetGen, needEvalGen bool if isRegenerate { needDatasetGen = flags.regenerateDataset needEvalGen = flags.regenerateEvaluator - // Preserve fields that are not being regenerated. if !needDatasetGen { evalCfg.DatasetFile = existingCfg.DatasetFile evalCfg.Config.DatasetReference = existingCfg.Config.DatasetReference @@ -275,26 +299,20 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } } else { needDatasetGen = flags.dataset == "" - needEvalGen = true // always generate adaptive evaluator + needEvalGen = true if !needDatasetGen { - // User provided a local dataset file — use it directly. datasetPath, err := resolveLocalDatasetFile(flags.dataset, resolved.agentProject) if err != nil { - return err + return nil, err } evalCfg.DatasetFile = datasetPath } - // --evaluator values are merged with the generated adaptive evaluator. - if len(flags.evaluators) > 0 { - builtinEvals = evaluatorsFromFlags(flags.evaluators) - } } - // Submit generation jobs (fast API calls). if needDatasetGen { job, err := submitDatasetGeneration(ctx, resolved, flags) if err != nil { - return err + return nil, err } state.DatasetGenOpID = job.OperationID() state.DatasetGenStatus = job.NormalizedStatus() @@ -302,29 +320,26 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if needEvalGen { job, err := submitEvaluatorGeneration(ctx, resolved, flags) if err != nil { - return err + return nil, err } state.EvalGenOpID = job.OperationID() state.EvalGenStatus = job.NormalizedStatus() } - if flags.noWait { - if needDatasetGen || needEvalGen { - state.InitStatus = "pending" - } - return writePendingEvalInit(ctx, resolved, configPath, evalCfg, state) - } - - pollRes, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, builtinEvals) - if err != nil { - if _, ok := errors.AsType[*initTimeoutError](err); ok { - return writeTimedOutEvalInit(ctx, resolved, configPath, evalCfg, state) - } - return err - } + return state, nil +} - state.InitStatus = "completed" - clearEvalState(ctx, resolved.azdClient, resolved.envName) +// writeAndPrintEvalResult writes the eval config and review artifacts, then +// prints a summary of the generated assets along with portal links and +// next-step instructions. +func writeAndPrintEvalResult( + ctx context.Context, + resolved *evalResolvedContext, + evalCfg *evalConfig, + pollRes *pollResults, + configPath string, + isRegenerate bool, +) error { if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { return err } @@ -364,13 +379,14 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } } - // Print evaluator rubric dimensions if available. printEvalDimensions(pollRes) - - // Print portal links. printEvalPortalLinks(ctx, resolved, evalCfg) - fmt.Printf("\n Review the generated assets, then run:\n %s\n", color.CyanString("azd ai agent eval run")) + fmt.Println("\n Next steps:") + fmt.Printf(" %s\n", color.CyanString("azd ai agent eval run")) + fmt.Printf(" Run the eval suite against your agent.\n") + fmt.Printf(" %s\n", color.CyanString("azd ai agent eval update")) + fmt.Printf(" Edit the generated dataset or evaluator locally, then upload changes.\n") return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index d86ae9893f9..5edffcdd457 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -1,12 +1,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_init_jobs.go handles generation job submission and polling for the +// eval init command. It submits dataset and evaluator generation requests, +// polls for completion in parallel, downloads artifacts on success, and +// persists state for resume on timeout. + package cmd import ( "context" - "crypto/rand" - "encoding/hex" "encoding/json" "errors" "fmt" @@ -21,6 +24,7 @@ import ( "github.com/fatih/color" ) +// resolveEvalName returns the eval suite name from flags, falling back to defaultEvalName. func resolveEvalName(flags *evalInitFlags) string { if flags.name != "" { return flags.name @@ -28,15 +32,6 @@ func resolveEvalName(flags *evalInitFlags) string { return defaultEvalName } -// randomSuffix returns a short random hex string (4 bytes = 8 chars). -func randomSuffix() string { - b := make([]byte, 4) - if _, err := rand.Read(b); err != nil { - return "0000" - } - return hex.EncodeToString(b) -} - // resolvedInstruction returns the instruction content from flags, reading // from file if instructionFile is set. func resolvedInstruction(flags *evalInitFlags) string { @@ -50,6 +45,7 @@ func resolvedInstruction(flags *evalInitFlags) string { return flags.instruction } +// newEvalConfig builds an evalConfig from flags and resolved context, applying defaults as needed. func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalConfig { agent := evalAgentRef{ Name: resolved.agentName, @@ -72,6 +68,7 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon } } +// submitDatasetGeneration submits a dataset generation job and returns the created job or an error. func submitDatasetGeneration( ctx context.Context, resolved *evalResolvedContext, @@ -85,12 +82,10 @@ func submitDatasetGeneration( request := eval_api.NewDataGenerationJobRequest( resolveEvalName(flags), flags.evalModel, flags.maxSamples, sources, ) - if body, err := json.MarshalIndent(request, "", " "); err == nil { - log.Printf("[debug] submitDatasetGeneration request:\n%s", body) - } return resolved.evalClient.CreateDataGenerationJob(ctx, request, DataGenerationAPIVersion) } +// submitEvaluatorGeneration submits an evaluator generation job and returns the created job or an error. func submitEvaluatorGeneration( ctx context.Context, resolved *evalResolvedContext, @@ -157,12 +152,13 @@ func buildOpenAIEvalRequest(evalCfg *evalConfig) *eval_api.CreateOpenAIEvalReque return evalCfg.ToAgentTargetAdaptableEvalGroupRequest() } +// resumeEvalInit handles resuming an eval init when generation jobs are still pending. It polls for job completion, updates state and config on success, and persists state for later resume if polling times out. func resumeEvalInit( ctx context.Context, resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *evalState, + state *opteval.EvalState, ) error { if _, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, nil); err != nil { if _, ok := errors.AsType[*initTimeoutError](err); ok { @@ -170,8 +166,8 @@ func resumeEvalInit( } return err } - state.InitStatus = "completed" - clearEvalState(ctx, resolved.azdClient, resolved.envName) + state.InitStatus = opteval.InitStatusCompleted + opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) if resolved.hasProject { eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) } @@ -187,14 +183,14 @@ type pollResults struct { // pollAndFinalizeJobs polls pending dataset and evaluator generation jobs in // parallel, saves artifacts when an azd project exists, and updates state and // evalCfg. Jobs whose status is already terminal are skipped (safe for resume). -// builtinEvals are prepended to the generated evaluator name on completion; -// pass nil for fresh inits. +// extraEvals are prepended to the generated evaluator list on completion; +// pass nil for fresh inits without --evaluator flags. func pollAndFinalizeJobs( ctx context.Context, resolved *evalResolvedContext, evalCfg *evalConfig, - state *evalState, - builtinEvals opteval.EvaluatorList, + state *opteval.EvalState, + extraEvals opteval.EvaluatorList, ) (*pollResults, error) { results := &pollResults{} // Each goroutine writes to distinct fields of evalCfg and state, so no @@ -324,7 +320,7 @@ func pollAndFinalizeJobs( Version: evalVersion, LocalURI: eval_api.EvaluatorLocalURI(evalName), } - evalCfg.Evaluators = append(builtinEvals, evalRef) + evalCfg.Evaluators = append(extraEvals, evalRef) results.EvaluatorResult = eval_api.ParseEvaluatorResult(completed.Result) @@ -384,9 +380,9 @@ func writePendingEvalInit( resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *evalState, + state *opteval.EvalState, ) error { - if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { @@ -400,7 +396,6 @@ func writePendingEvalInit( fmt.Printf(" evaluator generation: %s (%s)\n", state.EvalGenOpID, state.EvalGenStatus) } fmt.Printf("\n Config written to: %s\n", configPath) - fmt.Printf(" State saved to: azd environment %q\n", resolved.envName) fmt.Println("\n When ready, run:") fmt.Println(" azd ai agent eval run") return nil @@ -413,10 +408,10 @@ func writeTimedOutEvalInit( resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *evalState, + state *opteval.EvalState, ) error { - state.InitStatus = "pending" - if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + state.InitStatus = opteval.InitStatusPending + if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { @@ -432,7 +427,7 @@ func writeTimedOutEvalInit( fmt.Printf("\n Config written to: %s\n", configPath) fmt.Printf(" State saved to: azd environment %q\n", resolved.envName) fmt.Println("\n To resume polling, run:") - fmt.Println(" azd ai agent eval init") + fmt.Println(" azd ai agent eval run") fmt.Println("\n To start fresh and clear timed-out state, run:") fmt.Println(" azd ai agent eval init --reset-defaults") return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 111e3cb09a4..0082e8266e7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -1,6 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_init_prompts.go implements interactive prompts for the eval init +// command, including eval suite name, instruction source, trace inclusion, +// eval model selection, and regeneration choices for existing configs. + package cmd import ( @@ -12,11 +16,13 @@ import ( "strings" "azureaiagent/internal/pkg/agents/agent_yaml" - "azureaiagent/internal/pkg/agents/eval_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" ) +// promptEvalInitOptions runs interactive prompts for eval init options that +// were not provided via flags: name, instruction, trace days, eval model, +// and max samples. func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, flags *evalInitFlags, noPrompt bool) error { azdClient := resolved.azdClient if noPrompt { @@ -303,19 +309,11 @@ func promptRegenerateChoices( } // Ask about evaluator. - generated, builtin := eval_api.SplitEvaluators(existingCfg.Evaluators) - if len(generated) > 0 { - generatedLabel := strings.Join(generated.Names(), ", ") - msg := fmt.Sprintf("Existing evaluator: %s. Do you want to regenerate?", generatedLabel) - if len(builtin) > 0 { - msg = fmt.Sprintf( - "Existing evaluator: %s (built-in evaluators %s will be kept). Do you want to regenerate?", - generatedLabel, strings.Join(builtin.Names(), ", "), - ) - } + if len(existingCfg.Evaluators) > 0 { + evalLabel := strings.Join(existingCfg.Evaluators.Names(), ", ") resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ Options: &azdext.ConfirmOptions{ - Message: msg, + Message: fmt.Sprintf("Existing evaluator: %s. Do you want to regenerate?", evalLabel), DefaultValue: new(false), }, }) @@ -326,19 +324,8 @@ func promptRegenerateChoices( flags.regenerateEvaluator = true } } else { - // No generated evaluators exist — ask whether to generate one. - resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ - Options: &azdext.ConfirmOptions{ - Message: "No custom evaluator found. Do you want to generate one?", - DefaultValue: new(true), - }, - }) - if err != nil { - return fmt.Errorf("prompting for evaluator generation: %w", err) - } - if resp.Value != nil && *resp.Value { - flags.regenerateEvaluator = true - } + // No evaluators exist — generate one by default. + flags.regenerateEvaluator = true } return nil @@ -380,15 +367,3 @@ func promptConfigConfirmation( return nil } - -// relativeDisplay returns a project-relative path for display purposes. -// Returns empty string for empty input. -func relativeDisplay(absPath, projectDir string) string { - if absPath == "" || projectDir == "" { - return absPath - } - if rel, err := filepath.Rel(projectDir, absPath); err == nil { - return rel - } - return absPath -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index a0b2e3c1ed3..5803385542e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -5,6 +5,8 @@ package cmd import ( "encoding/json" + "errors" + "fmt" "os" "path/filepath" "testing" @@ -36,7 +38,7 @@ func TestNewEvalInitCommand_Flags(t *testing.T) { {"project-endpoint", ""}, {"gen-instruction", ""}, {"gen-instruction-file", ""}, - {"eval-model", defaultEvalModel}, + {"eval-model", ""}, {"dataset", ""}, {"max-samples", "15"}, {"out-file", defaultEvalConfigName}, @@ -90,7 +92,7 @@ func TestRunEvalInit_InstructionFile(t *testing.T) { flags := &evalInitFlags{ instructionFile: instrFile, - evalModel: defaultEvalModel, + evalModel: "test-model", maxSamples: 10, } // runEvalInit will fail later (no azd client), but file validation should pass. @@ -614,3 +616,105 @@ func TestSplitEvaluators(t *testing.T) { }) } } + +// --------------------------------------------------------------------------- +// resolveEvalName — name resolution +// --------------------------------------------------------------------------- + +func TestResolveEvalName(t *testing.T) { + t.Parallel() + t.Run("returns flag name when set", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{name: "my-eval"} + assert.Equal(t, "my-eval", resolveEvalName(flags)) + }) + + t.Run("returns default when flag is empty", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{} + assert.Equal(t, defaultEvalName, resolveEvalName(flags)) + }) +} + +// --------------------------------------------------------------------------- +// resolvedInstruction — instruction from flags +// --------------------------------------------------------------------------- + +func TestResolvedInstruction(t *testing.T) { + t.Parallel() + t.Run("returns inline instruction", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{instruction: "Be helpful."} + assert.Equal(t, "Be helpful.", resolvedInstruction(flags)) + }) + + t.Run("reads from instruction file", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + filePath := filepath.Join(dir, "prompt.md") + require.NoError(t, os.WriteFile(filePath, []byte("File instruction."), 0600)) + + flags := &evalInitFlags{instructionFile: filePath} + assert.Equal(t, "File instruction.", resolvedInstruction(flags)) + }) + + t.Run("falls back to inline when file missing", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{ + instructionFile: "/nonexistent/path.md", + instruction: "fallback", + } + assert.Equal(t, "fallback", resolvedInstruction(flags)) + }) + + t.Run("returns empty when nothing set", func(t *testing.T) { + t.Parallel() + flags := &evalInitFlags{} + assert.Empty(t, resolvedInstruction(flags)) + }) +} + +// --------------------------------------------------------------------------- +// isPollerTimeout — timeout error detection +// --------------------------------------------------------------------------- + +func TestIsPollerTimeout(t *testing.T) { + t.Parallel() + t.Run("true for PollerTimeoutError", func(t *testing.T) { + t.Parallel() + err := &eval_api.PollerTimeoutError{} + assert.True(t, isPollerTimeout(err)) + }) + + t.Run("true for wrapped PollerTimeoutError", func(t *testing.T) { + t.Parallel() + inner := &eval_api.PollerTimeoutError{} + wrapped := fmt.Errorf("context: %w", inner) + assert.True(t, isPollerTimeout(wrapped)) + }) + + t.Run("false for other errors", func(t *testing.T) { + t.Parallel() + assert.False(t, isPollerTimeout(errors.New("some error"))) + }) + + t.Run("false for nil", func(t *testing.T) { + t.Parallel() + assert.False(t, isPollerTimeout(nil)) + }) +} + +// --------------------------------------------------------------------------- +// initTimeoutError — error message +// --------------------------------------------------------------------------- + +func TestInitTimeoutError(t *testing.T) { + t.Parallel() + err := &initTimeoutError{ + datasetOpID: "ds-123", + evaluatorOpID: "ev-456", + datasetTimedOut: true, + evaluatorTimedOut: false, + } + assert.Contains(t, err.Error(), "polling timeout") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go index 01123f4b6a2..4bff60cb9e2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_list.go implements the "eval list" command, which lists recent +// evaluations for the current Foundry project with run counts and status. + package cmd import ( @@ -11,14 +14,15 @@ import ( "text/tabwriter" "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" - "github.com/fatih/color" "github.com/spf13/cobra" ) +// evalListFlags holds CLI flags for the eval list command. type evalListFlags struct { - limit int + limit int // maximum number of evals to return } func newEvalListCommand() *cobra.Command { @@ -54,7 +58,7 @@ func runEvalList(ctx context.Context, flags *evalListFlags) error { // Load the active eval ID from the azd environment. var activeEvalID string if resolved.envName != "" { - state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) activeEvalID = state.EvalID } @@ -119,34 +123,3 @@ func runEvalList(ctx context.Context, flags *evalListFlags) error { fmt.Printf("(showing %d — use --limit to change)\n", len(items)) return nil } - -// padColorizedStatus returns a fixed-width colored status string so that -// tabwriter aligns columns correctly despite ANSI escape sequences. -func padColorizedStatus(status string) string { - const statusWidth = 10 // wide enough for "Completed", "Cancelled", etc. - label, colorFn := statusLabelAndColor(status) - padded := fmt.Sprintf("%-*s", statusWidth, label) - return colorFn(padded) -} - -// statusLabelAndColor maps a raw status to a display label and color function. -func statusLabelAndColor(status string) (string, func(string, ...any) string) { - switch status { - case "completed": - return "Completed", color.GreenString - case "succeeded": - return "Succeeded", color.GreenString - case "failed": - return "Failed", color.RedString - case "cancelled", "canceled": - return "Cancelled", color.YellowString - case "running", "in_progress": - return "Running", color.CyanString - case "partial": - return "Partial", color.YellowString - case "": - return "No runs", color.HiBlackString - default: - return status, fmt.Sprintf - } -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go index b981f6bd6e9..8d227804b08 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go @@ -1,6 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_progress.go provides a concurrent-safe progress display with an +// animated spinner for long-running eval operations (generation polling, +// eval runs). Status transitions (running → done/failed/timed-out) are +// printed above the spinner line. + package cmd import ( diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress_test.go new file mode 100644 index 00000000000..cd5934768ef --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress_test.go @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +// ---- durationText ---- + +func TestDurationText(t *testing.T) { + t.Parallel() + tests := []struct { + name string + duration time.Duration + want string + }{ + {"less than a second", 500 * time.Millisecond, "less than a second"}, + {"exactly 1 second", 1 * time.Second, "1 second"}, + {"multiple seconds", 30 * time.Second, "30 seconds"}, + {"exactly 59 seconds", 59 * time.Second, "59 seconds"}, + {"exactly 1 minute", 60 * time.Second, "1 minute"}, + {"exactly 2 minutes", 120 * time.Second, "2 minutes"}, + {"1 minute 30 seconds", 90 * time.Second, "1m 30s"}, + {"2 minutes 15 seconds", 135 * time.Second, "2m 15s"}, + {"zero", 0, "less than a second"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, durationText(tt.duration)) + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index bfe49efc44c..d8892430ed1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -1,15 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_run.go implements the "eval run" command, which executes an evaluation +// run using an eval.yaml config. It creates or reuses an OpenAI eval, submits +// a run with the configured dataset and agent target, and polls for results. + package cmd import ( - "bufio" "context" - "encoding/json" "fmt" "net/url" - "os" "strings" "time" @@ -21,10 +22,11 @@ import ( "github.com/spf13/cobra" ) +// evalRunFlags holds CLI flags for the eval run command. type evalRunFlags struct { - config string - name string - noWait bool + config string // eval config path + name string // eval run name + noWait bool // start and return immediately } func newEvalRunCommand(extCtx *azdext.ExtensionContext) *cobra.Command { @@ -53,7 +55,7 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { } defer resolved.azdClient.Close() - configPath := eval_api.ResolveEvalConfigPath(flags.config, resolved.agentProject) + configPath := eval_api.ResolveRelPath(flags.config, resolved.agentProject) evalCfg, err := eval_api.LoadEvalConfig(configPath) if err != nil { return err @@ -61,14 +63,9 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { // Reconcile agent name/version between environment and eval.yaml. // Environment values take precedence; warn and update the config if they differ. - configChanged := false + configChanged := reconcileConfigAgentName(&evalCfg.Agent, resolved.agentName, flags.config) if resolved.agentName == "" { resolved.agentName = evalCfg.Agent.Name - } else if evalCfg.Agent.Name != "" && evalCfg.Agent.Name != resolved.agentName { - fmt.Printf(" %s agent name in %s (%q) differs from environment (%q) — using environment value\n", - color.YellowString("warning:"), flags.config, evalCfg.Agent.Name, resolved.agentName) - evalCfg.Agent.Name = resolved.agentName - configChanged = true } if resolved.version == "" { resolved.version = evalCfg.Agent.Version @@ -86,9 +83,9 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { } } - state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) - if state.InitStatus == "pending" { + if state.InitStatus == opteval.InitStatusPending { if err := resumeEvalInit(ctx, resolved, configPath, evalCfg, state); err != nil { return err } @@ -120,7 +117,7 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { evalID = evalCfg.Name } state.EvalID = evalID - if err := saveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } } @@ -137,7 +134,7 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { // Set source from local dataset file or remote dataset reference. if evalCfg.DatasetFile != "" { - items, err := loadEvalDatasetFile(evalCfg.DatasetFile) + items, err := loadJSONLFile[map[string]any](evalCfg.DatasetFile) if err != nil { return err } @@ -280,38 +277,6 @@ func pollEvalRun( return nil, fmt.Errorf("eval run %s did not complete within %d attempts", runID, maxAttempts) } -// loadEvalDatasetFile reads a JSONL file and returns each line as a map. -func loadEvalDatasetFile(path string) ([]map[string]any, error) { - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) - } - defer f.Close() - - var items []map[string]any - scanner := bufio.NewScanner(f) - lineNum := 0 - for scanner.Scan() { - lineNum++ - line := scanner.Text() - if line == "" { - continue - } - var item map[string]any - if err := json.Unmarshal([]byte(line), &item); err != nil { - return nil, fmt.Errorf("failed to parse dataset line %d: %w", lineNum, err) - } - items = append(items, item) - } - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error reading dataset file %s: %w", path, err) - } - if len(items) == 0 { - return nil, fmt.Errorf("dataset file %s contains no items", path) - } - return items, nil -} - // buildDatasetFileID constructs an azureai:// URI for a remote dataset reference. // Format: azureai://accounts//projects//data//versions/ // The account and project are extracted from the project endpoint URL diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go index ebcf66f6881..c608e7989c0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go @@ -41,37 +41,37 @@ func TestNewEvalRunCommand_UseString(t *testing.T) { } // --------------------------------------------------------------------------- -// loadEvalDatasetFile +// loadJSONLFile // --------------------------------------------------------------------------- -func TestLoadEvalDatasetFile(t *testing.T) { +func TestLoadJSONLFile(t *testing.T) { t.Parallel() dir := t.TempDir() f := filepath.Join(dir, "data.jsonl") content := "{\"query\":\"hello\",\"id\":\"1\"}\n{\"query\":\"world\",\"id\":\"2\"}\n" require.NoError(t, os.WriteFile(f, []byte(content), 0600)) - items, err := loadEvalDatasetFile(f) + items, err := loadJSONLFile[map[string]any](f) require.NoError(t, err) require.Len(t, items, 2) assert.Equal(t, "hello", items[0]["query"]) assert.Equal(t, "2", items[1]["id"]) } -func TestLoadEvalDatasetFile_Empty(t *testing.T) { +func TestLoadJSONLFile_Empty(t *testing.T) { t.Parallel() dir := t.TempDir() f := filepath.Join(dir, "empty.jsonl") require.NoError(t, os.WriteFile(f, []byte(""), 0600)) - _, err := loadEvalDatasetFile(f) + _, err := loadJSONLFile[map[string]any](f) assert.Error(t, err) assert.Contains(t, err.Error(), "contains no items") } -func TestLoadEvalDatasetFile_NotFound(t *testing.T) { +func TestLoadJSONLFile_NotFound(t *testing.T) { t.Parallel() - _, err := loadEvalDatasetFile("/nonexistent/data.jsonl") + _, err := loadJSONLFile[map[string]any]("/nonexistent/data.jsonl") assert.Error(t, err) } @@ -147,3 +147,22 @@ func TestBuildDatasetFileID(t *testing.T) { }) } } + +// --------------------------------------------------------------------------- +// agentVersionPtr — version string to pointer +// --------------------------------------------------------------------------- + +func TestAgentVersionPtr(t *testing.T) { + t.Parallel() + t.Run("returns nil for empty string", func(t *testing.T) { + t.Parallel() + assert.Nil(t, agentVersionPtr("")) + }) + + t.Run("returns pointer to version", func(t *testing.T) { + t.Parallel() + v := agentVersionPtr("v2") + require.NotNil(t, v) + assert.Equal(t, "v2", *v) + }) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go index 6564cc41bd6..f765bbbb1f7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_show.go implements the "eval show" command, which displays eval +// definitions, run history, and per-criteria result breakdowns. + package cmd import ( @@ -10,16 +13,18 @@ import ( "text/tabwriter" "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/opteval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" "github.com/spf13/cobra" ) +// evalShowFlags holds CLI flags for the eval show command. type evalShowFlags struct { - evalRunID string - limit int - output string + evalRunID string // specific eval run to show + limit int // maximum number of runs to display + output string // export results to JSON file } func newEvalShowCommand() *cobra.Command { @@ -58,7 +63,7 @@ func runEvalShow(ctx context.Context, evalID string, flags *evalShowFlags) error // Fall back to the eval ID stored in the azd environment. if evalID == "" && resolved.envName != "" { - state := loadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) evalID = state.EvalID } if evalID == "" { @@ -118,7 +123,7 @@ func printEvalSummary(evalObj *eval_api.OpenAIEval, runs []eval_api.OpenAIEvalRu } fmt.Fprintf(w, " %s\t%s\t%s\t%s\t%s\n", run.ID, - run.Status, + colorizeStatus(run.Status), passed, failed, eval_api.FormatTimestamp(run.CreatedAt), @@ -137,7 +142,7 @@ func printEvalRunSummary(evalID string, run *eval_api.OpenAIEvalRun) error { if run.Name != "" { fmt.Printf("Name: %s\n", run.Name) } - fmt.Printf("Status: %s\n", run.Status) + fmt.Printf("Status: %s\n", colorizeStatus(run.Status)) fmt.Printf("Created: %s\n", eval_api.FormatTimestamp(run.CreatedAt)) if run.CreatedBy != "" { fmt.Printf("Created by: %s\n", run.CreatedBy) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show_test.go new file mode 100644 index 00000000000..27fe3187b00 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show_test.go @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ---- newEvalShowCommand ---- + +func TestNewEvalShowCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newEvalShowCommand() + assert.Equal(t, "show [eval-id]", cmd.Use) +} + +func TestNewEvalShowCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newEvalShowCommand() + + tests := []struct { + name string + flag string + wantNil bool + defValue string + }{ + {"eval-run-id flag", "eval-run-id", false, ""}, + {"limit flag", "limit", false, "20"}, + {"out-file flag", "out-file", false, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + f := cmd.Flags().Lookup(tt.flag) + if tt.wantNil { + assert.Nil(t, f) + } else { + require.NotNil(t, f, "--%s flag should be registered", tt.flag) + assert.Equal(t, tt.defValue, f.DefValue) + } + }) + } +} + +func TestNewEvalShowCommand_AcceptsOptionalPositionalArg(t *testing.T) { + t.Parallel() + cmd := newEvalShowCommand() + // MaximumNArgs(1) — should accept 0 args without error from arg validation. + assert.NotNil(t, cmd.Args) +} + +func TestNewEvalShowCommand_HasOutFileShorthand(t *testing.T) { + t.Parallel() + cmd := newEvalShowCommand() + f := cmd.Flags().Lookup("out-file") + require.NotNil(t, f) + assert.Equal(t, "O", f.Shorthand) +} + +// ---- newEvalUpdateCommand ---- + +func TestNewEvalUpdateCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newEvalUpdateCommand(&azdext.ExtensionContext{}) + assert.Equal(t, "update", cmd.Use) +} + +func TestNewEvalUpdateCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newEvalUpdateCommand(&azdext.ExtensionContext{}) + + tests := []struct { + name string + flag string + defValue string + }{ + {"config flag", "config", defaultEvalConfigName}, + {"dataset-only flag", "dataset-only", "false"}, + {"evaluator-only flag", "evaluator-only", "false"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + f := cmd.Flags().Lookup(tt.flag) + require.NotNil(t, f, "--%s flag should be registered", tt.flag) + assert.Equal(t, tt.defValue, f.DefValue) + }) + } +} + +func TestNewEvalUpdateCommand_NoArgs(t *testing.T) { + t.Parallel() + cmd := newEvalUpdateCommand(&azdext.ExtensionContext{}) + assert.NotNil(t, cmd.Args) +} + +// ---- eval "update" in parent command ---- + +func TestNewEvalCommand_HasUpdateSubcommand(t *testing.T) { + t.Parallel() + cmd := newEvalCommand(&azdext.ExtensionContext{}) + var names []string + for _, sub := range cmd.Commands() { + names = append(names, sub.Name()) + } + assert.Contains(t, names, "update") +} + +// ---- eval "show" in parent command ---- + +func TestNewEvalCommand_HasShowSubcommand(t *testing.T) { + t.Parallel() + cmd := newEvalCommand(&azdext.ExtensionContext{}) + var names []string + for _, sub := range cmd.Commands() { + names = append(names, sub.Name()) + } + assert.Contains(t, names, "show") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 397501c6bdc..d4bc3786745 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -131,41 +131,25 @@ func TestFormatTimestamp(t *testing.T) { } // --------------------------------------------------------------------------- -// eval_api.ResolveEvalOutputPath / eval_api.ResolveEvalConfigPath +// eval_api.ResolveRelPath // --------------------------------------------------------------------------- -func TestResolveEvalOutputPath(t *testing.T) { +func TestResolveRelPath(t *testing.T) { t.Parallel() t.Run("absolute path returned as-is", func(t *testing.T) { t.Parallel() abs := filepath.Join(os.TempDir(), "eval.yaml") - assert.Equal(t, abs, eval_api.ResolveEvalOutputPath(abs, "/project")) + assert.Equal(t, abs, eval_api.ResolveRelPath(abs, "/project")) }) t.Run("relative path joined with agent project", func(t *testing.T) { t.Parallel() - result := eval_api.ResolveEvalOutputPath("eval.yaml", "/project/agent") + result := eval_api.ResolveRelPath("eval.yaml", "/project/agent") assert.Equal(t, filepath.Join("/project/agent", "eval.yaml"), result) }) } -func TestResolveEvalConfigPath(t *testing.T) { - t.Parallel() - - t.Run("absolute path returned as-is", func(t *testing.T) { - t.Parallel() - abs := filepath.Join(os.TempDir(), "eval.yaml") - assert.Equal(t, abs, eval_api.ResolveEvalConfigPath(abs, "/project")) - }) - - t.Run("relative path joined with agent project when file does not exist", func(t *testing.T) { - t.Parallel() - result := eval_api.ResolveEvalConfigPath("nonexistent.yaml", "/project/agent") - assert.Equal(t, filepath.Join("/project/agent", "nonexistent.yaml"), result) - }) -} - // --------------------------------------------------------------------------- // detectEvalAgentKind // --------------------------------------------------------------------------- @@ -219,7 +203,7 @@ func TestDetectEvalAgentKind(t *testing.T) { } // --------------------------------------------------------------------------- -// evalState — stored in azd environment (integration-tested via eval init/run) +// EvalState — stored in azd environment (integration-tested via eval init/run) // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- @@ -473,3 +457,31 @@ func TestReadEvalConfig_MissingFile(t *testing.T) { _, err := eval_api.LoadEvalConfig("/nonexistent/path/eval.yaml") assert.Error(t, err) } + +// --------------------------------------------------------------------------- +// endpointFromProjectID — project ID to endpoint conversion +// --------------------------------------------------------------------------- + +func TestEndpointFromProjectID(t *testing.T) { + t.Parallel() + t.Run("valid project ID", func(t *testing.T) { + t.Parallel() + projectID := "/subscriptions/sub123/resourceGroups/rg/providers/Microsoft.CognitiveServices/accounts/myaccount/projects/myproject" + endpoint, err := endpointFromProjectID(projectID) + require.NoError(t, err) + assert.Contains(t, endpoint, "myaccount") + assert.Contains(t, endpoint, "myproject") + }) + + t.Run("invalid project ID", func(t *testing.T) { + t.Parallel() + _, err := endpointFromProjectID("not-a-valid-id") + assert.Error(t, err) + }) + + t.Run("empty project ID", func(t *testing.T) { + t.Parallel() + _, err := endpointFromProjectID("") + assert.Error(t, err) + }) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go index 573863be0cf..54c6d5792b2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go @@ -1,6 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// eval_update.go implements the "eval update" command, which uploads new +// versions of locally-edited evaluators and datasets. It reads eval.yaml, +// detects assets with local_uri pointers, and uploads them as new versions. + package cmd import ( @@ -18,10 +22,11 @@ import ( "github.com/spf13/cobra" ) +// evalUpdateFlags holds CLI flags for the eval update command. type evalUpdateFlags struct { - config string - datasetOnly bool - evaluatorOnly bool + config string // eval config path + datasetOnly bool // only update the dataset + evaluatorOnly bool // only update evaluators } func newEvalUpdateCommand(extCtx *azdext.ExtensionContext) *cobra.Command { @@ -57,7 +62,7 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e } defer resolved.azdClient.Close() - configPath := eval_api.ResolveEvalConfigPath(flags.config, resolved.agentProject) + configPath := eval_api.ResolveRelPath(flags.config, resolved.agentProject) evalCfg, err := eval_api.LoadEvalConfig(configPath) if err != nil { return fmt.Errorf("failed to load eval config: %w", err) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 57e413426a8..50c3976d593 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -1,6 +1,13 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize.go implements the top-level "optimize" command, which submits +// agent optimization jobs. It resolves the agent, loads or builds a config, +// prompts for instruction/skills/model, and polls for results. +// +// Subcommands (status, list, cancel, apply, deploy) are registered here +// and implemented in their own files. + package cmd import ( @@ -20,16 +27,17 @@ import ( "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" "github.com/spf13/cobra" - "go.yaml.in/yaml/v3" ) -// optimizeAgentContext holds the resolved agent name and project directory. +// optimizeAgentContext holds the resolved agent name and project directory +// for an optimization operation. type optimizeAgentContext struct { - agentName string - agentProject string // project directory path (empty if not resolved from azd project) + agentName string // deployed agent name + agentProject string // agent project directory (empty if not in an azd project) } -// resolveOptimizeAgent resolves the agent name and project directory using: +// resolveOptimizeAgent resolves the agent name and project directory. +// Resolution order: // 1. Explicit --agent flag // 2. azd project context (resolveAgentService + environment variables) // 3. Error with guidance @@ -69,17 +77,18 @@ func resolveOptimizeAgent(ctx context.Context, flagValue string, noPrompt bool) return nil, fmt.Errorf("agent name is required: use --agent , or run from an azd project after 'azd deploy'") } +// optimizeFlags holds CLI flags for the optimize (submit) command. type optimizeFlags struct { - configFile string - agent string - evalModel string - targetAttributes []string - noWait bool - watch bool - pollInterval int + configFile string // path to YAML config file + agent string // agent name override + evalModel string // model for evaluation + targetAttributes []string // optimization targets (instruction, skill) + noWait bool // return immediately after submission + pollInterval int // polling interval in seconds optimizeConnectionFlags } +// newOptimizeCommand creates the top-level "optimize" command and registers its subcommands. func newOptimizeCommand(extCtx *azdext.ExtensionContext) *cobra.Command { flags := &optimizeFlags{} action := &OptimizeAction{flags: flags, noPrompt: extCtx.NoPrompt} @@ -130,7 +139,6 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", "gpt-4.1-mini", "Model for evaluation") cmd.Flags().StringArrayVarP(&flags.targetAttributes, "target", "s", nil, "Target attribute for optimization: instruction, skill (repeatable)") cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit job and return immediately without waiting for completion") - cmd.Flags().BoolVar(&flags.watch, "watch", true, "Watch for job completion (opposite of --no-wait)") cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") flags.optimizeConnectionFlags.register(cmd) @@ -149,75 +157,116 @@ type OptimizeAction struct { noPrompt bool } +// Run executes the optimize command: resolves the agent, loads/builds the config, applies overrides, submits the job, and optionally polls for results. func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { endpoint, err := a.flags.resolve(ctx) if err != nil { return err } - var cfg *OptimizeConfig - configSource := "" // tracks where the config came from for user messaging - hasProject := false - agentProject := "" + cfg, configSource, agentProject, err := a.resolveConfig(ctx) + if err != nil { + return err + } + hasProject := agentProject != "" - if a.flags.configFile != "" { - cfg, err = LoadOptimizeConfig(a.flags.configFile) - if err != nil { - return fmt.Errorf("%w\n\nCheck that the file path is correct and contains valid YAML", err) - } - configSource = a.flags.configFile + if err := a.applyOverrides(ctx, cfg, agentProject); err != nil { + return err + } + + out := cmd.OutOrStdout() + bold := color.New(color.Bold) + + bold.Fprintf(out, "Optimizing agent %q...\n", cfg.Agent.Name) + if configSource == "" { + fmt.Fprintf(out, " Dataset: built-in (3 tasks, 12 criteria)\n") } else { - resolved, err := resolveOptimizeAgent(ctx, a.flags.agent, a.noPrompt) + fmt.Fprintf(out, " Config: %s\n", configSource) + } + + resp, client, err := a.submitJob(ctx, out, endpoint, cfg, agentProject) + if err != nil { + return err + } + + if !a.flags.noWait && !optimize_api.IsTerminal(resp.Status) { + finalStatus, err := pollOptimizeJob(cmd, client, a.flags.pollInterval, resp.OperationID) if err != nil { return err } - hasProject = resolved.agentProject != "" - agentProject = resolved.agentProject - - // Check if eval.yaml exists in the agent project and offer to use it - if resolved.agentProject != "" { - evalPath := filepath.Join(resolved.agentProject, defaultEvalConfigName) - if _, statErr := os.Stat(evalPath); statErr == nil && !a.noPrompt { - azdClient, clientErr := azdext.NewAzdClient() - if clientErr == nil { - defer azdClient.Close() - resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ - Options: &azdext.ConfirmOptions{ - Message: fmt.Sprintf("Found %s in project. Use it for optimization?", defaultEvalConfigName), - DefaultValue: new(true), - }, - }) - if promptErr == nil && resp.Value != nil && *resp.Value { - cfg, err = LoadOptimizeConfig(evalPath) - if err != nil { - return fmt.Errorf("failed to load %s: %w", evalPath, err) - } - configSource = evalPath - } - } - } + printOptimizeResults(out, finalStatus, hasProject) + } + + return nil +} + +// resolveConfig loads or builds an OptimizeConfig from flags, eval.yaml +// detection, and agent resolution. Returns the config, its source path +// (empty if using defaults), and the agent project directory. +func (a *OptimizeAction) resolveConfig( + ctx context.Context, +) (cfg *OptimizeConfig, configSource, agentProject string, err error) { + if a.flags.configFile != "" { + cfg, err = LoadOptimizeConfig(a.flags.configFile) + if err != nil { + return nil, "", "", fmt.Errorf("%w\n\nCheck that the file path is correct and contains valid YAML", err) } + return cfg, a.flags.configFile, "", nil + } - if cfg == nil { - cfg = defaultOptimizeConfig(resolved.agentName) - } else if resolved.agentName != "" && cfg.Agent.Name != "" && cfg.Agent.Name != resolved.agentName { - // Config loaded from eval.yaml but agent name differs from environment. - fmt.Printf(" %s agent name in %s (%q) differs from environment (%q) — using environment value\n", - color.YellowString("warning:"), configSource, cfg.Agent.Name, resolved.agentName) - cfg.Agent.Name = resolved.agentName - if data, mErr := yaml.Marshal(cfg); mErr == nil { - if wErr := os.WriteFile(configSource, data, 0600); wErr == nil { - fmt.Printf(" Updated %s with current environment values\n", configSource) + resolved, err := resolveOptimizeAgent(ctx, a.flags.agent, a.noPrompt) + if err != nil { + return nil, "", "", err + } + agentProject = resolved.agentProject + + // Check if eval.yaml exists in the agent project and offer to use it. + if resolved.agentProject != "" { + evalPath := filepath.Join(resolved.agentProject, defaultEvalConfigName) + if _, statErr := os.Stat(evalPath); statErr == nil && !a.noPrompt { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr == nil { + defer azdClient.Close() + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: fmt.Sprintf("Found %s in project. Use it for optimization?", defaultEvalConfigName), + DefaultValue: new(true), + }, + }) + if promptErr == nil && resp.Value != nil && *resp.Value { + cfg, err = LoadOptimizeConfig(evalPath) + if err != nil { + return nil, "", "", fmt.Errorf("failed to load %s: %w", evalPath, err) + } + configSource = evalPath } } } } + if cfg == nil { + cfg = defaultOptimizeConfig(resolved.agentName) + } else { + reconcileConfigAgentName(&cfg.Agent, resolved.agentName, configSource) + } + + return cfg, configSource, agentProject, nil +} + +// applyOverrides applies CLI flag overrides, resolves baseline agent config, +// and interactively fills missing instruction/skills/model values. +func (a *OptimizeAction) applyOverrides( + ctx context.Context, + cfg *OptimizeConfig, + agentProject string, +) error { if err := cfg.Validate(); err != nil { return fmt.Errorf("invalid config: %w", err) } - // CLI flags override config values + hasProject := agentProject != "" + + // CLI flags override config values. if a.flags.evalModel != "" { cfg.Options.EvalModel = a.flags.evalModel } @@ -225,21 +274,9 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { cfg.Options.TargetAttributes = a.flags.targetAttributes } - // Resolve agent config directory pointer — fills in instruction, skill_dir, - // tools_file, and model from metadata.yaml if the config pointer is set. + // Resolve agent config: try existing config pointer, then default baseline. if hasProject { - cfg.Agent.ResolveFromConfig(agentProject) - } - - // Auto-detect baseline config if no config pointer is set yet. - if cfg.Agent.ConfigFile == "" && hasProject { - defaultConfigFile := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") - absConfigFile := filepath.Join(agentProject, defaultConfigFile) - if _, statErr := os.Stat(absConfigFile); statErr == nil { - cfg.Agent.ConfigFile = defaultConfigFile - cfg.Agent.ResolveFromConfig(agentProject) - fmt.Printf(" Baseline: %s\n", absConfigFile) - } + mergeAgentBaseline(cfg, agentProject) } // When baseline config is detected, show resolved values and let the user confirm. @@ -250,14 +287,14 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { } // Resolve relative skill_dir against agent project directory. - if cfg.Agent.SkillDir != "" && hasProject && !filepath.IsAbs(cfg.Agent.SkillDir) { - cfg.Agent.SkillDir = filepath.Join(agentProject, cfg.Agent.SkillDir) + if cfg.SkillDir != "" && hasProject && !filepath.IsAbs(cfg.SkillDir) { + cfg.SkillDir = filepath.Join(agentProject, cfg.SkillDir) } // Resolve relative tools_file against agent project directory. // TODO: re-enable when tools optimization is supported in the service. - // if cfg.Agent.ToolsFile != "" && hasProject && !filepath.IsAbs(cfg.Agent.ToolsFile) { - // cfg.Agent.ToolsFile = filepath.Join(agentProject, cfg.Agent.ToolsFile) + // if cfg.ToolsFile != "" && hasProject && !filepath.IsAbs(cfg.ToolsFile) { + // cfg.ToolsFile = filepath.Join(agentProject, cfg.ToolsFile) // } // Resolve agent instruction using a well-defined lifecycle: @@ -269,7 +306,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { } // Resolve skill_dir: auto-detect, check baseline, or prompt user. - if cfg.Agent.SkillDir == "" && hasProject { + if cfg.SkillDir == "" && hasProject { if err := resolveOptimizeSkillDir(ctx, cfg, agentProject, a.noPrompt); err != nil { return err } @@ -282,26 +319,57 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { } } - out := cmd.OutOrStdout() - bold := color.New(color.Bold) + return nil +} - bold.Fprintf(out, "Optimizing agent %q...\n", cfg.Agent.Name) - if configSource == "" { - fmt.Fprintf(out, " Dataset: built-in (3 tasks, 12 criteria)\n") - } else { - fmt.Fprintf(out, " Config: %s\n", configSource) +// mergeAgentBaseline resolves the baseline agent config and merges missing +// fields (instruction, model, skills, tools) into the OptimizeConfig. +func mergeAgentBaseline(cfg *OptimizeConfig, agentProject string) { + var existing *opteval.Config + if cfg.Agent.ConfigFile != "" { + existing = &opteval.Config{Agent: cfg.Agent} + } + agentCfg := resolveAgentConfig(existing, agentProject) + if agentCfg == nil { + return + } + cfg.Agent.ConfigFile = agentCfg.ConfigFile + if cfg.Agent.Instruction.IsEmpty() && agentCfg.InstructionFile != "" { + cfg.Agent.Instruction.File = agentCfg.InstructionFile + } + if cfg.Agent.Model == "" { + cfg.Agent.Model = agentCfg.Model + } + if cfg.SkillDir == "" { + cfg.SkillDir = agentCfg.SkillDir + } + if cfg.ToolsFile == "" { + cfg.ToolsFile = agentCfg.ToolsFile } + if existing == nil { + fmt.Printf(" Baseline: %s\n", filepath.Join(agentProject, agentCfg.ConfigFile)) + } +} +// submitJob builds the optimization request, saves the baseline config, +// submits the job, and prints initial status. +func (a *OptimizeAction) submitJob( + ctx context.Context, + out io.Writer, + endpoint string, + cfg *OptimizeConfig, + agentProject string, +) (*optimize_api.OptimizeResponse, *optimize_api.OptimizeClient, error) { credential, err := newAgentCredential() if err != nil { - return err + return nil, nil, err } client := optimize_api.NewOptimizeClient(endpoint, credential) optimizeReq, err := cfg.ToRequest(endpoint) if err != nil { - return fmt.Errorf("failed to build optimization request: %w", err) + return nil, nil, fmt.Errorf("failed to build optimization request: %w", err) } if body, jsonErr := json.MarshalIndent(optimizeReq, "", " "); jsonErr == nil { @@ -309,13 +377,18 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { } // Save baseline config before starting optimization. + hasProject := agentProject != "" if hasProject { - if err := saveBaselineConfig(agentProject, cfg.Agent.SkillDir, cfg.Agent.ToolsFile, optimizeReq); err != nil { + if err := writeBaselineConfig(agentProject, baselineParams{ + Model: optimizeReq.Agent.Model, + Instruction: optimizeReq.Agent.SystemPrompt, + SkillDir: cfg.SkillDir, + ToolsFile: cfg.ToolsFile, + }); err != nil { fmt.Fprintf(out, " warning: failed to save baseline config: %s\n", err) } else { - baselineMetaPath := filepath.Join(agentConfigsDir, "baseline", "metadata.yaml") + baselineMetaPath := opteval.BaselineConfigRelPath() fmt.Fprintf(out, " Baseline saved to %s\n", baselineMetaPath) - // Set config pointer so eval.yaml references the baseline. if cfg.Agent.ConfigFile == "" { cfg.Agent.ConfigFile = baselineMetaPath } @@ -324,435 +397,22 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { resp, err := client.StartOptimize(ctx, optimizeReq) if err != nil { - return fmt.Errorf("failed to submit optimization job: %w\n\nCheck that the endpoint %q is reachable", err, endpoint) + return nil, nil, fmt.Errorf( + "failed to submit optimization job: %w\n\nCheck that the endpoint %q is reachable", err, endpoint) } fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(resp.OperationID)) fmt.Fprintf(out, " Status: %s\n", resp.Status) - // Print portal link for the optimization job. printOptimizePortalLink(ctx, out, cfg.Agent.Name, resp.OperationID) - fmt.Fprintln(out) - // Store last operation ID in azd environment for use by status/deploy saveLastOptimizeJobID(ctx, resp.OperationID) - if !a.flags.noWait && !optimize_api.IsTerminal(resp.Status) { - finalStatus, err := pollOptimizeJob(cmd, client, a.flags.pollInterval, resp.OperationID) - if err != nil { - return err - } - printOptimizeResults(out, finalStatus, hasProject) - } - - return nil -} - -// resolveOptimizeSystemPrompt resolves the agent's system prompt: -// -// 1. Config dir pointer (agent.config): instruction from metadata.yaml (already resolved). -// 2. Config (eval.yaml / --config): inline instruction or file reference. -// 3. Interactive prompt: ask the user to provide inline text or a file path. -// -// Relative file paths are resolved against agentProject. -func resolveOptimizeSystemPrompt( - ctx context.Context, - cfg *OptimizeConfig, - agentProject string, - hasProject bool, - noPrompt bool, -) error { - // Resolve relative instruction file paths against the agent project directory. - if cfg.Agent.Instruction.File != "" && hasProject && !filepath.IsAbs(cfg.Agent.Instruction.File) { - cfg.Agent.Instruction.File = filepath.Join(agentProject, cfg.Agent.Instruction.File) - } - - // Step 1: Config explicitly declares a file reference — validate it's readable. - if cfg.Agent.Instruction.File != "" { - if _, err := os.Stat(cfg.Agent.Instruction.File); err != nil { - return fmt.Errorf("instruction file %q from config is not accessible: %w", - cfg.Agent.Instruction.File, err) - } - return nil - } - - // Step 1b: Config already has inline instruction — nothing to do. - if cfg.Agent.Instruction.Value != "" { - return nil - } - - // Step 2: Interactive prompt — ask user to provide inline text or a file path. - if noPrompt { - return fmt.Errorf("instruction is required for optimization.\n\n" + - "Provide it via one of:\n" + - " 1. Set agent.config in eval.yaml to point to a config dir with metadata.yaml\n" + - " 2. Set instruction in eval.yaml (agent section): inline string or file reference\n" + - " 3. Run without --no-prompt to enter it interactively") - } - - azdClient, clientErr := azdext.NewAzdClient() - if clientErr != nil { - return fmt.Errorf("instruction is required but could not open interactive prompt: %w", clientErr) - } - defer azdClient.Close() - - inputChoices := []*azdext.SelectChoice{ - {Label: "Type inline", Value: "inline"}, - {Label: "Load from file", Value: "file"}, - } - defaultIdx := int32(0) - selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ - Options: &azdext.SelectOptions{ - Message: "No instruction found in config or baseline. " + - "How would you like to provide it?", - Choices: inputChoices, - SelectedIndex: &defaultIdx, - }, - }) - if selErr != nil { - return fmt.Errorf("prompting for instruction input method: %w", selErr) - } - - if inputChoices[int(*selResp.Value)].Value == "file" { - pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ - Options: &azdext.PromptOptions{ - Message: "Path to instruction file", - IgnoreHintKeys: true, - }, - }) - if pathErr != nil { - return fmt.Errorf("prompting for instruction file path: %w", pathErr) - } - filePath := strings.TrimSpace(pathResp.Value) - // Resolve relative paths against the agent project directory. - if !filepath.IsAbs(filePath) && hasProject { - filePath = filepath.Join(agentProject, filePath) - } - if _, err := os.Stat(filePath); err != nil { - return fmt.Errorf("instruction file %q is not accessible: %w", filePath, err) - } - cfg.Agent.Instruction.File = filePath - } else { - resp, promptErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ - Options: &azdext.PromptOptions{ - Message: "Enter the agent's instruction", - IgnoreHintKeys: true, - }, - }) - if promptErr != nil { - return fmt.Errorf("prompting for instruction: %w", promptErr) - } - cfg.Agent.Instruction.Value = strings.TrimSpace(resp.Value) - } - - return nil -} - -// resolveOptimizeSkillDir resolves the agent's skill directory: -// 1. Config dir pointer (agent.config): skill_dir from metadata.yaml (already resolved). -// 2. Auto-detect: look for a "skills/" folder in the agent project — confirm with user. -// 3. Interactive prompt: ask the user to provide a path or skip. -func resolveOptimizeSkillDir( - ctx context.Context, - cfg *OptimizeConfig, - agentProject string, - noPrompt bool, -) error { - // Step 1: Auto-detect common skill directory names. - var detectedDir string - for _, candidate := range []string{"skills", "skill"} { - dir := filepath.Join(agentProject, candidate) - if info, err := os.Stat(dir); err == nil && info.IsDir() { - detectedDir = dir - break - } - } - - if noPrompt { - // In no-prompt mode, use whatever was detected (may be empty). - cfg.Agent.SkillDir = detectedDir - return nil - } - - azdClient, clientErr := azdext.NewAzdClient() - if clientErr != nil { - cfg.Agent.SkillDir = detectedDir - return nil - } - defer azdClient.Close() - - if detectedDir != "" { - // Found a skill directory — ask user to confirm or provide a different one. - choices := []*azdext.SelectChoice{ - {Label: fmt.Sprintf("Use detected: %s", detectedDir), Value: "use"}, - {Label: "Provide a different path", Value: "other"}, - {Label: "Skip (no skills)", Value: "skip"}, - } - defaultIdx := int32(0) - selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ - Options: &azdext.SelectOptions{ - Message: fmt.Sprintf("Found skills directory: %s", detectedDir), - Choices: choices, - SelectedIndex: &defaultIdx, - }, - }) - if selErr != nil { - cfg.Agent.SkillDir = detectedDir - return nil - } - - switch choices[int(*selResp.Value)].Value { - case "use": - cfg.Agent.SkillDir = detectedDir - return nil - case "skip": - return nil - case "other": - // Fall through to path prompt below. - } - } else { - // No skill directory found — ask if they want to provide one. - resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ - Options: &azdext.ConfirmOptions{ - Message: "No skills directory found. Would you like to provide one?", - DefaultValue: new(bool), // default false - }, - }) - if promptErr != nil || !resp.GetValue() { - return nil // skip skills - } - } - - // Prompt for a custom path. - pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ - Options: &azdext.PromptOptions{ - Message: "Path to skills directory", - IgnoreHintKeys: true, - }, - }) - if pathErr != nil { - return fmt.Errorf("prompting for skills directory: %w", pathErr) - } - - dir := strings.TrimSpace(pathResp.Value) - if dir == "" { - return nil - } - if !filepath.IsAbs(dir) { - dir = filepath.Join(agentProject, dir) - } - if info, err := os.Stat(dir); err != nil || !info.IsDir() { - return fmt.Errorf("skills directory %q is not accessible or not a directory", dir) - } - - cfg.Agent.SkillDir = dir - return nil -} - -// promptOptimizeConfigConfirmation shows the resolved values from the baseline -// config and lets the user confirm or override instruction file, skills -// directory, and tools file. -func promptOptimizeConfigConfirmation(ctx context.Context, cfg *OptimizeConfig, agentProject string) error { - azdClient, clientErr := azdext.NewAzdClient() - if clientErr != nil { - return nil // non-fatal — skip confirmation prompts - } - defer azdClient.Close() - prompt := azdClient.Prompt() - - // Instruction file. - instrDefault := relativeOptDisplay(cfg.Agent.Instruction.File, agentProject) - resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ - Options: &azdext.PromptOptions{ - Message: "Instruction file", - DefaultValue: instrDefault, - IgnoreHintKeys: true, - }, - }) - if err != nil { - return fmt.Errorf("prompting for instruction file: %w", err) - } - if value := strings.TrimSpace(resp.Value); value != "" { - if !filepath.IsAbs(value) && agentProject != "" { - value = filepath.Join(agentProject, value) - } - if _, err := os.Stat(value); err != nil { - return fmt.Errorf("instruction file %q is not accessible: %w", value, err) - } - cfg.Agent.Instruction.File = value - cfg.Agent.Instruction.Value = "" - } - - // Skills directory. - skillDefault := relativeOptDisplay(cfg.Agent.SkillDir, agentProject) - resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ - Options: &azdext.PromptOptions{ - Message: "Skills directory (enter to skip)", - DefaultValue: skillDefault, - IgnoreHintKeys: true, - }, - }) - if err != nil { - return fmt.Errorf("prompting for skills directory: %w", err) - } - if value := strings.TrimSpace(resp.Value); value != "" { - if !filepath.IsAbs(value) && agentProject != "" { - value = filepath.Join(agentProject, value) - } - cfg.Agent.SkillDir = value - } else { - cfg.Agent.SkillDir = "" - } - - // TODO: re-enable tools file prompt when tools optimization is supported. - // // Tools file. - // toolsDefault := relativeOptDisplay(cfg.Agent.ToolsFile, agentProject) - // resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ - // Options: &azdext.PromptOptions{ - // Message: "Tools file (enter to skip)", - // DefaultValue: toolsDefault, - // IgnoreHintKeys: true, - // }, - // }) - // if err != nil { - // return fmt.Errorf("prompting for tools file: %w", err) - // } - // if value := strings.TrimSpace(resp.Value); value != "" { - // if !filepath.IsAbs(value) && agentProject != "" { - // value = filepath.Join(agentProject, value) - // } - // cfg.Agent.ToolsFile = value - // } else { - // cfg.Agent.ToolsFile = "" - // } - - return nil -} - -// relativeOptDisplay returns a project-relative path for display. -func relativeOptDisplay(absPath, projectDir string) string { - if absPath == "" || projectDir == "" { - return absPath - } - if rel, err := filepath.Rel(projectDir, absPath); err == nil { - return rel - } - return absPath -} - -// knownOptimizationModels is the list of models commonly used for optimization. -var knownOptimizationModels = []string{ - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", -} - -// resolveOptimizeTargetModels prompts the user to select model candidates -// for optimization (target_config.model). Shows the current deployed model -// and allows multi-select from known models. -func resolveOptimizeTargetModels( - ctx context.Context, - cfg *OptimizeConfig, -) error { - azdClient, clientErr := azdext.NewAzdClient() - if clientErr != nil { - return nil - } - defer azdClient.Close() - - currentModel := cfg.Agent.Model - - message := "Select target models for optimization" - if currentModel != "" { - message = fmt.Sprintf("Select target models for optimization (current: %s)", currentModel) - } - - resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ - Options: &azdext.ConfirmOptions{ - Message: "Would you like to specify target models for optimization?", - DefaultValue: new(bool), // default false - }, - }) - if promptErr != nil || !resp.GetValue() { - return nil - } - - // Build choices — include current model if not already in the known list. - choices := buildOptimizeModelChoices(currentModel) - - multiResp, multiErr := azdClient.Prompt().MultiSelect(ctx, &azdext.MultiSelectRequest{ - Options: &azdext.MultiSelectOptions{ - Message: message, - Choices: choices, - }, - }) - if multiErr != nil { - return fmt.Errorf("prompting for target models: %w", multiErr) - } - - var models []string - for _, v := range multiResp.Values { - models = append(models, v.Value) - } - - if len(models) > 0 { - if cfg.Options.TargetConfig == nil { - cfg.Options.TargetConfig = &opteval.TargetConfig{} - } - cfg.Options.TargetConfig.Model = models - } - - return nil -} - -// buildOptimizeModelChoices returns MultiSelectChoice items for model selection. -// The current deployed model is included and pre-selected; placed first if not in the known list. -func buildOptimizeModelChoices(currentModel string) []*azdext.MultiSelectChoice { - seen := make(map[string]bool) - var choices []*azdext.MultiSelectChoice - - // If the current model is not in the known list, prepend it. - if currentModel != "" { - found := false - for _, m := range knownOptimizationModels { - if m == currentModel { - found = true - break - } - } - if !found { - choices = append(choices, &azdext.MultiSelectChoice{ - Label: currentModel + " (current)", - Value: currentModel, - Selected: true, - }) - seen[currentModel] = true - } - } - - for _, m := range knownOptimizationModels { - if seen[m] { - continue - } - label := m - selected := false - if m == currentModel { - label = m + " (current)" - selected = true - } - choices = append(choices, &azdext.MultiSelectChoice{ - Label: label, - Value: m, - Selected: selected, - }) - } - - return choices + return resp, client, nil } +// pollOptimizeJob polls the optimization job until it reaches a terminal state. func pollOptimizeJob( cmd *cobra.Command, client *optimize_api.OptimizeClient, @@ -800,6 +460,7 @@ func pollOptimizeJob( return finalStatus, nil } +// printOptimizeResults prints the optimization results table and next-step commands. func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus, hasProject bool) { if status.Error != nil { fmt.Fprintf(out, "\n %s %s\n", color.RedString("Error:"), status.Error.Message) @@ -873,6 +534,7 @@ func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus, fmt.Fprintln(out) } +// formatOptimizeStatus returns a colorized string for the given job status. func formatOptimizeStatus(status string) string { switch status { case optimize_api.StatusCompleted: @@ -890,6 +552,7 @@ func formatOptimizeStatus(status string) string { } } +// truncateString truncates s to maxLen characters, appending "..." if trimmed. func truncateString(s string, maxLen int) string { if len(s) <= maxLen { return s diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 6886e2e3f00..6fdfe4466a9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -1,6 +1,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_apply.go implements the "optimize apply" command, which downloads +// an optimization candidate and applies it locally to the azd project. +// +// It writes the candidate's instruction, skills, and tool definitions +// into .agent_configs//, updates agent.yaml environment +// variables, and shows a diff summary (prompt and skills) against the +// baseline. + package cmd import ( @@ -12,6 +20,7 @@ import ( "path/filepath" "strings" + "azureaiagent/internal/pkg/agents/opteval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -20,13 +29,13 @@ import ( "go.yaml.in/yaml/v3" ) -// agentConfigsDir is the default folder that holds agent configuration versions -// (baseline and optimized candidates). -const agentConfigsDir = ".agent_configs" +// agentConfigsDir aliases the shared constant for local use. +const agentConfigsDir = opteval.AgentConfigsDir +// optimizeApplyFlags holds CLI flags for the optimize apply command. type optimizeApplyFlags struct { - candidate string - agent string + candidate string // candidate ID from optimization results + agent string // agent service name optimizeConnectionFlags } @@ -86,6 +95,8 @@ func (a *OptimizeApplyAction) Run(ctx context.Context, cmd *cobra.Command) error return a.apply(ctx, azdClient, svc, project, out, bold) } +// apply downloads and writes the candidate config, updates agent.yaml, +// stores state, and prints a diff summary. func (a *OptimizeApplyAction) apply( ctx context.Context, azdClient *azdext.AzdClient, @@ -136,7 +147,7 @@ func (a *OptimizeApplyAction) apply( if err := writeAgentConfigFromCandidate(candidateDir, candidateConfig); err != nil { return fmt.Errorf("failed to write candidate config: %w", err) } - fmt.Fprintf(out, " → %s\n", filepath.Join(candidateDir, "metadata.yaml")) + fmt.Fprintf(out, " → %s\n", filepath.Join(candidateDir, opteval.MetadataFile)) // Step 3: Write OPTIMIZATION_LOCAL_DIR and OPTIMIZATION_CANDIDATE_ID into agent.yaml // so the deploy pipeline knows which local optimization config to use. @@ -173,8 +184,12 @@ func (a *OptimizeApplyAction) apply( fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", color.CyanString("azd deploy --service %s", svc.Name)) - // Show prompt diff (baseline → optimized). - printPromptDiff(out, serviceDir, a.flags.candidate, candidateConfig) + // Point the user to the config folders for comparison. + baselinePath := filepath.Join(serviceDir, agentConfigsDir, opteval.BaselineDir) + candidatePath := filepath.Join(serviceDir, agentConfigsDir, a.flags.candidate) + fmt.Fprintf(out, "\n To see the full diff, compare the files in:\n") + fmt.Fprintf(out, " Baseline: %s\n", color.CyanString(baselinePath)) + fmt.Fprintf(out, " Optimized: %s\n", color.CyanString(candidatePath)) return nil } @@ -194,69 +209,12 @@ type agentConfigMetadata struct { ToolsFile string `yaml:"tools_file,omitempty"` } -// saveBaselineConfig writes the agent's current configuration to -// /.agent_configs/baseline/ before optimization begins. -// It creates metadata.yaml with file pointers and writes instructions.md. -// The skill_dir in metadata.yaml points to the original skills directory -// via a relative path rather than copying the files. -func saveBaselineConfig(agentProject, skillDir, toolsFile string, req *optimize_api.OptimizeRequest) error { - baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") - if err := os.MkdirAll(baseDir, 0750); err != nil { - return fmt.Errorf("creating baseline directory: %w", err) - } - - meta := agentConfigMetadata{ - Name: req.Agent.AgentName, - Model: req.Agent.Model, - } - - // Write instructions.md if the agent has a system prompt. - if req.Agent.SystemPrompt != "" { - instructionPath := filepath.Join(baseDir, "instructions.md") - if err := os.WriteFile(instructionPath, []byte(req.Agent.SystemPrompt), 0600); err != nil { - return fmt.Errorf("writing baseline instructions: %w", err) - } - meta.InstructionFile = "instructions.md" - } - - // Point to the original skill directory via a relative path. - if skillDir != "" { - if rel, err := filepath.Rel(baseDir, skillDir); err == nil { - meta.SkillDir = filepath.ToSlash(rel) - } else { - meta.SkillDir = skillDir - } - } - - // Point to the tools definition file via a relative path. - if toolsFile != "" { - if rel, err := filepath.Rel(baseDir, toolsFile); err == nil { - meta.ToolsFile = filepath.ToSlash(rel) - } else { - meta.ToolsFile = toolsFile - } - } - - // Write metadata.yaml. - data, err := yaml.Marshal(meta) - if err != nil { - return fmt.Errorf("serializing baseline metadata: %w", err) - } - - metaPath := filepath.Join(baseDir, "metadata.yaml") - if err := os.WriteFile(metaPath, data, 0600); err != nil { - return fmt.Errorf("writing baseline metadata: %w", err) - } - - return nil -} - // loadBaselineConfig reads the baseline metadata.yaml from // /.agent_configs/baseline/metadata.yaml and resolves // file pointers to absolute paths. func loadBaselineConfig(agentProject string) (*agentConfigMetadata, error) { - baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") - metaPath := filepath.Join(baseDir, "metadata.yaml") + baseDir := filepath.Join(agentProject, agentConfigsDir, opteval.BaselineDir) + metaPath := filepath.Join(baseDir, opteval.MetadataFile) data, err := os.ReadFile(metaPath) //nolint:gosec // path derived from project directory if err != nil { return nil, err @@ -269,53 +227,6 @@ func loadBaselineConfig(agentProject string) (*agentConfigMetadata, error) { return &meta, nil } -// writeBaselineFromEvalInit creates a baseline config from eval init context. -// It writes metadata.yaml and instructions.md into .agent_configs/baseline/. -// The skill_dir points to the original skills directory via a relative path. -func writeBaselineFromEvalInit(agentProject, agentName, instruction string) error { - baseDir := filepath.Join(agentProject, agentConfigsDir, "baseline") - if err := os.MkdirAll(baseDir, 0750); err != nil { - return fmt.Errorf("creating baseline directory: %w", err) - } - - meta := agentConfigMetadata{ - Name: agentName, - } - - if instruction != "" { - instructionPath := filepath.Join(baseDir, "instructions.md") - if err := os.WriteFile(instructionPath, []byte(instruction), 0600); err != nil { - return fmt.Errorf("writing baseline instructions: %w", err) - } - meta.InstructionFile = "instructions.md" - } - - // Auto-detect skills directory and point to it via a relative path. - for _, candidate := range []string{"skills", "skill"} { - dir := filepath.Join(agentProject, candidate) - if info, err := os.Stat(dir); err == nil && info.IsDir() { - if rel, relErr := filepath.Rel(baseDir, dir); relErr == nil { - meta.SkillDir = filepath.ToSlash(rel) - } else { - meta.SkillDir = dir - } - break - } - } - - data, err := yaml.Marshal(meta) - if err != nil { - return fmt.Errorf("serializing baseline metadata: %w", err) - } - - metaPath := filepath.Join(baseDir, "metadata.yaml") - if err := os.WriteFile(metaPath, data, 0600); err != nil { - return fmt.Errorf("writing baseline metadata: %w", err) - } - - return nil -} - // resolveInstructions reads the instruction content from the metadata's // instruction_file, resolved relative to configDir. func (m *agentConfigMetadata) resolveInstructions(configDir string) string { @@ -375,11 +286,11 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err // Write instructions.md from the candidate's system prompt. instructions := extractInstructions(candidateConfig) if instructions != "" { - instructionPath := filepath.Join(candidateDir, "instructions.md") + instructionPath := filepath.Join(candidateDir, opteval.InstructionFile) if err := os.WriteFile(instructionPath, []byte(instructions), 0600); err != nil { return fmt.Errorf("writing candidate instructions: %w", err) } - meta.InstructionFile = "instructions.md" + meta.InstructionFile = opteval.InstructionFile } // Write inline skills from the candidate config as individual files. @@ -390,9 +301,9 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err } // Set skill_dir pointer if the skills/ dir exists (from inline or downloaded skills). - skillDir := filepath.Join(candidateDir, "skills") + skillDir := filepath.Join(candidateDir, opteval.SkillsDir) if info, err := os.Stat(skillDir); err == nil && info.IsDir() { - meta.SkillDir = "skills" + meta.SkillDir = opteval.SkillsDir } // Write tool_definitions as a JSON file. @@ -400,8 +311,8 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err if err := writeToolDefinitions(candidateDir, m); err != nil { return fmt.Errorf("writing candidate tool definitions: %w", err) } - if _, err := os.Stat(filepath.Join(candidateDir, "tools.json")); err == nil { - meta.ToolsFile = "tools.json" + if _, err := os.Stat(filepath.Join(candidateDir, opteval.ToolsFile)); err == nil { + meta.ToolsFile = opteval.ToolsFile } } @@ -410,7 +321,7 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err if err != nil { return fmt.Errorf("serializing candidate metadata: %w", err) } - metaPath := filepath.Join(candidateDir, "metadata.yaml") + metaPath := filepath.Join(candidateDir, opteval.MetadataFile) if err := os.WriteFile(metaPath, data, 0600); err != nil { return fmt.Errorf("writing candidate metadata: %w", err) } @@ -444,7 +355,7 @@ func writeInlineSkills(candidateDir string, config map[string]any) error { body, _ := sm["body"].(string) description, _ := sm["description"].(string) - skillSubDir := filepath.Join(candidateDir, "skills", name) + skillSubDir := filepath.Join(candidateDir, opteval.SkillsDir, name) if err := os.MkdirAll(skillSubDir, 0750); err != nil { return fmt.Errorf("creating skill directory %s: %w", name, err) } @@ -485,7 +396,7 @@ func writeToolDefinitions(candidateDir string, config map[string]any) error { return fmt.Errorf("serializing tool definitions: %w", err) } - return os.WriteFile(filepath.Join(candidateDir, "tools.json"), data, 0600) + return os.WriteFile(filepath.Join(candidateDir, opteval.ToolsFile), data, 0600) } // downloadSkillFilesToDir fetches the candidate manifest, downloads all skill @@ -541,7 +452,7 @@ func downloadSkillFilesToDir( } // cleanOtherCandidates removes all subdirectories in the optimization folder -// except "baseline" and the candidate being applied. +// except the baseline and the candidate being applied. func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { entries, err := os.ReadDir(optimizeDir) if err != nil { @@ -553,7 +464,7 @@ func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { continue } name := entry.Name() - if name == "baseline" || name == currentCandidate { + if name == opteval.BaselineDir || name == currentCandidate { continue } dir := filepath.Join(optimizeDir, name) @@ -565,64 +476,6 @@ func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { } } -// maxDiffPreviewLines is the max lines shown per section in the prompt diff preview. -const maxDiffPreviewLines = 4 - -// printPromptDiff displays an abbreviated prompt diff (baseline → optimized) -// with a short preview and a suggested command for the full diff. -func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateConfig any) { - optimized := extractInstructions(candidateConfig) - if optimized == "" { - return - } - - baseDir := filepath.Join(serviceDir, agentConfigsDir, "baseline") - baseline, err := loadBaselineConfig(serviceDir) - if err != nil { - return - } - baselineText := baseline.resolveInstructions(baseDir) - if baselineText == "" { - return - } - baselineLines := strings.Split(baselineText, "\n") - optimizedLines := strings.Split(optimized, "\n") - - fmt.Fprintf(out, "\n Prompt diff (baseline → optimized):\n\n") - - // Baseline preview (removed). - removed := color.New(color.FgRed) - removed.Fprintf(out, " — Baseline (%d lines, %d chars):\n", - len(baselineLines), len(baselineText)) - printPreviewLines(out, baselineLines, "- ", removed) - - fmt.Fprintln(out) - - // Optimized preview (added). - added := color.New(color.FgGreen) - added.Fprintf(out, " — Optimized (%d lines, %d chars):\n", - len(optimizedLines), len(optimized)) - printPreviewLines(out, optimizedLines, "+ ", added) - - // Suggest command to see the full diff. - baselinePath := filepath.Join(agentConfigsDir, "baseline", "instructions.md") - candidatePath := filepath.Join(agentConfigsDir, candidateID, "instructions.md") - fmt.Fprintf(out, "\n To see the full diff:\n") - fmt.Fprintf(out, " %s\n", - color.CyanString("diff %s %s", baselinePath, candidatePath)) -} - -// printPreviewLines prints up to maxDiffPreviewLines with a prefix, then "..." if truncated. -func printPreviewLines(out io.Writer, lines []string, prefix string, c *color.Color) { - limit := min(len(lines), maxDiffPreviewLines) - for _, line := range lines[:limit] { - c.Fprintf(out, " %s%s\n", prefix, line) - } - if len(lines) > maxDiffPreviewLines { - c.Fprintf(out, " %s... (%d more lines)\n", prefix, len(lines)-maxDiffPreviewLines) - } -} - // extractInstructions retrieves the system prompt string from a candidate config // returned by the optimization service. func extractInstructions(config any) string { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go new file mode 100644 index 00000000000..3f042e533d9 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -0,0 +1,255 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ---- newOptimizeApplyCommand — command shape ---- + +func TestNewOptimizeApplyCommand_UseString(t *testing.T) { + t.Parallel() + cmd := newOptimizeApplyCommand(&azdext.ExtensionContext{}) + assert.Equal(t, "apply", cmd.Use) +} + +func TestNewOptimizeApplyCommand_Flags(t *testing.T) { + t.Parallel() + cmd := newOptimizeApplyCommand(&azdext.ExtensionContext{}) + + require.NotNil(t, cmd.Flags().Lookup("candidate")) + require.NotNil(t, cmd.Flags().Lookup("agent")) + require.NotNil(t, cmd.Flags().Lookup("endpoint")) + require.NotNil(t, cmd.Flags().Lookup("project-endpoint")) +} + +func TestNewOptimizeApplyCommand_CandidateIsRequired(t *testing.T) { + t.Parallel() + cmd := newOptimizeApplyCommand(&azdext.ExtensionContext{}) + cmd.SetArgs([]string{}) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "candidate") +} + +// ---- extractInstructions ---- + +func TestExtractInstructions(t *testing.T) { + t.Parallel() + tests := []struct { + name string + config any + want string + }{ + { + "systemPrompt field", + map[string]any{"systemPrompt": "You are a helpful assistant."}, + "You are a helpful assistant.", + }, + { + "instructions field", + map[string]any{"instructions": "Follow the rules."}, + "Follow the rules.", + }, + { + "systemPrompt takes precedence", + map[string]any{ + "systemPrompt": "From systemPrompt", + "instructions": "From instructions", + }, + "From systemPrompt", + }, + {"nil config", nil, ""}, + {"non-map config", "just a string", ""}, + {"empty map", map[string]any{}, ""}, + {"non-string value", map[string]any{"systemPrompt": 42}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, extractInstructions(tt.config)) + }) + } +} + +// ---- agentConfigMetadata.resolveInstructions ---- + +func TestAgentConfigMetadata_ResolveInstructions(t *testing.T) { + t.Parallel() + t.Run("reads instruction file", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "instructions.md"), []byte("Be helpful."), 0600)) + + meta := &agentConfigMetadata{InstructionFile: "instructions.md"} + assert.Equal(t, "Be helpful.", meta.resolveInstructions(dir)) + }) + + t.Run("returns empty when no file set", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{} + assert.Empty(t, meta.resolveInstructions(t.TempDir())) + }) + + t.Run("returns empty when file missing", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{InstructionFile: "nonexistent.md"} + assert.Empty(t, meta.resolveInstructions(t.TempDir())) + }) +} + +// ---- agentConfigMetadata.resolveSkillDir ---- + +func TestAgentConfigMetadata_ResolveSkillDir(t *testing.T) { + t.Parallel() + t.Run("returns empty when not set", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{} + assert.Empty(t, meta.resolveSkillDir("/some/dir")) + }) + + t.Run("resolves relative path", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{SkillDir: "skills"} + result := meta.resolveSkillDir("/project/config") + assert.Equal(t, filepath.Join("/project/config", "skills"), result) + }) + + t.Run("preserves absolute path", func(t *testing.T) { + t.Parallel() + abs := filepath.Join(os.TempDir(), "absolute-skills") + meta := &agentConfigMetadata{SkillDir: abs} + assert.Equal(t, abs, meta.resolveSkillDir("/any/dir")) + }) +} + +// ---- writeAgentConfigFromCandidate ---- + +func TestWriteAgentConfigFromCandidate(t *testing.T) { + t.Parallel() + t.Run("writes metadata and instructions", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + config := map[string]any{ + "name": "test-agent", + "model": "gpt-4o", + "systemPrompt": "Test prompt.", + } + + err := writeAgentConfigFromCandidate(dir, config) + require.NoError(t, err) + + assert.FileExists(t, filepath.Join(dir, opteval.MetadataFile)) + assert.FileExists(t, filepath.Join(dir, opteval.InstructionFile)) + + content, err := os.ReadFile(filepath.Join(dir, opteval.InstructionFile)) + require.NoError(t, err) + assert.Equal(t, "Test prompt.", string(content)) + }) + + t.Run("writes inline skills", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + config := map[string]any{ + "systemPrompt": "prompt", + "skills": []any{ + map[string]any{ + "name": "search", + "description": "Search the web", + "body": "Search content here.", + }, + }, + } + + err := writeAgentConfigFromCandidate(dir, config) + require.NoError(t, err) + + skillFile := filepath.Join(dir, opteval.SkillsDir, "search", "SKILL.md") + assert.FileExists(t, skillFile) + }) + + t.Run("handles nil config gracefully", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + err := writeAgentConfigFromCandidate(dir, nil) + require.NoError(t, err) + assert.FileExists(t, filepath.Join(dir, opteval.MetadataFile)) + }) +} + +// ---- cleanOtherCandidates ---- + +func TestCleanOtherCandidates(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + // Create baseline, current candidate, and old candidate directories. + require.NoError(t, os.MkdirAll(filepath.Join(dir, opteval.BaselineDir), 0750)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, "cand_current"), 0750)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, "cand_old"), 0750)) + + var buf bytes.Buffer + cleanOtherCandidates(dir, "cand_current", &buf) + + // baseline and cand_current should remain; cand_old should be removed. + assert.DirExists(t, filepath.Join(dir, opteval.BaselineDir)) + assert.DirExists(t, filepath.Join(dir, "cand_current")) + assert.NoDirExists(t, filepath.Join(dir, "cand_old")) +} + +// ---- isSkillFile ---- + +func TestIsSkillFile(t *testing.T) { + t.Parallel() + tests := []struct { + name string + file optimize_api.CandidateFile + want bool + }{ + {"skill type", optimize_api.CandidateFile{Type: "skill", Path: "foo.md"}, true}, + {"skills path prefix", optimize_api.CandidateFile{Type: "file", Path: "skills/search/SKILL.md"}, true}, + {"other type and path", optimize_api.CandidateFile{Type: "file", Path: "config.yaml"}, false}, + {"empty", optimize_api.CandidateFile{}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, isSkillFile(tt.file)) + }) + } +} + +// ---- isReservedEnvVarError ---- + +func TestIsReservedEnvVarError(t *testing.T) { + t.Parallel() + tests := []struct { + name string + err error + want bool + }{ + {"nil error", nil, false}, + {"reserved for platform use", fmt.Errorf("variable is reserved for platform use"), true}, + {"AGENT_* variables", fmt.Errorf("AGENT_* variables are reserved"), true}, + {"unrelated error", fmt.Errorf("connection refused"), false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, isReservedEnvVarError(tt.err)) + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go index 7908627eede..597f3c8ce72 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_cancel.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_cancel.go implements the "optimize cancel" command, which cancels +// a running optimization job by its operation ID. + package cmd import ( @@ -12,6 +15,7 @@ import ( "github.com/spf13/cobra" ) +// optimizeCancelFlags holds connection settings for the cancel command. type optimizeCancelFlags struct { optimizeConnectionFlags } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 5b187ce6863..0427af277f6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -1,11 +1,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_config.go defines OptimizeConfig (the YAML config structure for +// optimization jobs), provides loading/validation, and converts configs into +// API requests. It also handles reading skills from disk and parsing YAML +// frontmatter in skill files. + package cmd import ( "bufio" - "encoding/json" "fmt" "os" "path/filepath" @@ -26,6 +30,10 @@ type OptimizeConfig struct { Criteria []OptimizeConfigCriterion `yaml:"criteria,omitempty"` Options *opteval.Options `yaml:"options"` InlineDataset []optimize_api.DatasetTask `yaml:"-"` // populated by defaultOptimizeConfig, not from YAML + + // Runtime-only: resolved skill directory and tools file (not serialized to YAML). + SkillDir string `yaml:"-"` + ToolsFile string `yaml:"-"` } // OptimizeConfigCriterion is a named evaluation criterion with a natural-language instruction. @@ -183,7 +191,7 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi } if c.DatasetFile != "" { - tasks, err := loadDatasetFile(c.DatasetFile) + tasks, err := loadJSONLFile[optimize_api.DatasetTask](c.DatasetFile) if err != nil { return nil, err } @@ -193,20 +201,20 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi } // Load skills from skill_dir if specified. - if c.Agent.SkillDir != "" { - skills, err := loadSkillsFromDir(c.Agent.SkillDir) + if c.SkillDir != "" { + skills, err := loadSkillsFromDir(c.SkillDir) if err != nil { - return nil, fmt.Errorf("loading skills from %s: %w", c.Agent.SkillDir, err) + return nil, fmt.Errorf("loading skills from %s: %w", c.SkillDir, err) } req.Agent.Skills = skills } // Load tool definitions if a tools file is specified. // TODO: re-enable when tools optimization is supported in the service. - // if c.Agent.ToolsFile != "" { - // tools, err := loadToolDefinitions(c.Agent.ToolsFile) + // if c.ToolsFile != "" { + // tools, err := loadToolDefinitions(c.ToolsFile) // if err != nil { - // return nil, fmt.Errorf("loading tool definitions from %s: %w", c.Agent.ToolsFile, err) + // return nil, fmt.Errorf("loading tool definitions from %s: %w", c.ToolsFile, err) // } // req.Agent.ToolDefinitions = tools // } @@ -214,41 +222,6 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi return req, nil } -// loadDatasetFile reads a JSONL file where each line is a JSON DatasetTask. -func loadDatasetFile(path string) ([]optimize_api.DatasetTask, error) { - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) - } - defer f.Close() - - var tasks []optimize_api.DatasetTask - scanner := bufio.NewScanner(f) - lineNum := 0 - for scanner.Scan() { - lineNum++ - line := scanner.Text() - if line == "" { - continue - } - var task optimize_api.DatasetTask - if err := json.Unmarshal([]byte(line), &task); err != nil { - return nil, fmt.Errorf("failed to parse dataset line %d: %w", lineNum, err) - } - tasks = append(tasks, task) - } - - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error reading dataset file %s: %w", path, err) - } - - if len(tasks) == 0 { - return nil, fmt.Errorf("dataset file %s contains no tasks", path) - } - - return tasks, nil -} - // loadSkillsFromDir reads skill files from a directory and returns SkillDefinitions. // For markdown files (.md), YAML frontmatter is parsed to extract name and description; // the content after the frontmatter becomes the skill body. @@ -359,18 +332,3 @@ func splitFrontmatter(content string) (string, string) { // No closing delimiter found — treat entire content as body. return "", content } - -// loadToolDefinitions reads a JSON file containing an array of OpenAI-format -// function tool definitions and returns them as ToolDefinition structs. -func loadToolDefinitions(path string) ([]optimize_api.ToolDefinition, error) { - data, err := os.ReadFile(path) //nolint:gosec // user-provided path validated earlier - if err != nil { - return nil, fmt.Errorf("reading tool definitions file: %w", err) - } - - var tools []optimize_api.ToolDefinition - if err := json.Unmarshal(data, &tools); err != nil { - return nil, fmt.Errorf("parsing tool definitions: %w", err) - } - return tools, nil -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 194b2a40eb6..c9f1d05a308 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -1,6 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_deploy.go implements the "optimize deploy" command, which deploys +// an optimization candidate directly to a Foundry agent (without requiring +// an azd project). It fetches the candidate config, patches the agent, and +// creates a new agent version. + package cmd import ( diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go index bc4f0e408a7..29289f60e84 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go @@ -4,6 +4,8 @@ package cmd import ( + "os" + "path/filepath" "testing" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -154,3 +156,46 @@ func TestNormalizeProtocolVersions_MissingField(t *testing.T) { def := map[string]any{"kind": "hosted"} normalizeProtocolVersions(def) // should not panic } + +// ---- upsertAgentYamlEnvVar ---- + +func TestUpsertAgentYamlEnvVar_InsertsNew(t *testing.T) { + t.Parallel() + dir := t.TempDir() + yamlPath := filepath.Join(dir, "agent.yaml") + require.NoError(t, os.WriteFile(yamlPath, []byte("name: test-agent\n"), 0600)) + + err := upsertAgentYamlEnvVar(yamlPath, "MY_VAR", "my_value") + require.NoError(t, err) + + data, err := os.ReadFile(yamlPath) + require.NoError(t, err) + assert.Contains(t, string(data), "MY_VAR") + assert.Contains(t, string(data), "my_value") +} + +func TestUpsertAgentYamlEnvVar_UpdatesExisting(t *testing.T) { + t.Parallel() + dir := t.TempDir() + yamlPath := filepath.Join(dir, "agent.yaml") + content := `name: test-agent +environment_variables: + - name: MY_VAR + value: old_value +` + require.NoError(t, os.WriteFile(yamlPath, []byte(content), 0600)) + + err := upsertAgentYamlEnvVar(yamlPath, "MY_VAR", "new_value") + require.NoError(t, err) + + data, err := os.ReadFile(yamlPath) + require.NoError(t, err) + assert.Contains(t, string(data), "new_value") + assert.NotContains(t, string(data), "old_value") +} + +func TestUpsertAgentYamlEnvVar_FileMissing(t *testing.T) { + t.Parallel() + err := upsertAgentYamlEnvVar("/nonexistent/agent.yaml", "KEY", "VALUE") + assert.Error(t, err) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index 6a11442a2fc..7129ca9bf77 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -1,6 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_helpers.go provides shared utilities for optimize commands: +// connection flag resolution, job ID persistence in the azd environment, +// and portal link construction. + package cmd import ( @@ -10,16 +14,16 @@ import ( "os" "strings" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "azureaiagent/internal/pkg/agents/eval_api" + azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" - "github.com/fatih/color" "github.com/spf13/cobra" ) // optimizeConnectionFlags holds connection settings shared across all optimize sub-commands. type optimizeConnectionFlags struct { - projectEndpoint string - endpoint string // override: direct optimization service URL (for local dev only) + projectEndpoint string // Foundry project endpoint URL + endpoint string // direct optimization service URL (for local dev only) } // register adds the connection flags to the given cobra command. @@ -64,13 +68,6 @@ const optimizeAPIVersion = "v1" // optimizeLastJobIDKey is the azd environment key for the last optimization job ID. const optimizeLastJobIDKey = "OPTIMIZE_LAST_OPERATION_ID" -// tokenRequestOptions returns the token request options for Azure AI scope. -func tokenRequestOptions() policy.TokenRequestOptions { - return policy.TokenRequestOptions{ - Scopes: []string{"https://ai.azure.com/.default"}, - } -} - // saveLastOptimizeJobID stores the operation ID in the azd environment. // Best-effort — silently ignores errors (e.g., when running outside azd). func saveLastOptimizeJobID(ctx context.Context, operationID string) { @@ -130,11 +127,7 @@ func printOptimizePortalLink(ctx context.Context, out io.Writer, agentName, oper return } - prefix := resolvePortalPrefix(ctx, azdClient, envResp.Environment.Name) - if prefix == nil { - return - } - - url := prefix.OptimizationURL(agentName, operationID) - fmt.Fprintf(out, " Portal: %s\n", color.CyanString(url)) + printPortalLink(ctx, out, azdClient, envResp.Environment.Name, func(prefix *eval_api.PortalPrefix) string { + return prefix.OptimizationURL(agentName, operationID) + }) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go index 36de799fd3b..312c1d213b9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_list.go implements the "optimize list" command, which lists +// recent optimization jobs with status, agent, and score. + package cmd import ( @@ -14,9 +17,10 @@ import ( "github.com/spf13/cobra" ) +// optimizeListFlags holds CLI flags for the optimize list command. type optimizeListFlags struct { - limit int - status string + limit int // maximum number of results + status string // filter by job status optimizeConnectionFlags } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go new file mode 100644 index 00000000000..5dae37c0a8c --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go @@ -0,0 +1,446 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// optimize_prompts.go contains interactive resolution functions for the +// optimize command: system prompt, skill directory, config confirmation, +// and target model selection. + +package cmd + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "azureaiagent/internal/pkg/agents/opteval" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" +) + +// resolveOptimizeSystemPrompt resolves the agent's system prompt: +// +// 1. Config dir pointer (agent.config): instruction from metadata.yaml (already resolved). +// 2. Config (eval.yaml / --config): inline instruction or file reference. +// 3. Interactive prompt: ask the user to provide inline text or a file path. +// +// Relative file paths are resolved against agentProject. +func resolveOptimizeSystemPrompt( + ctx context.Context, + cfg *OptimizeConfig, + agentProject string, + hasProject bool, + noPrompt bool, +) error { + // Resolve relative instruction file paths against the agent project directory. + if cfg.Agent.Instruction.File != "" && hasProject && !filepath.IsAbs(cfg.Agent.Instruction.File) { + cfg.Agent.Instruction.File = filepath.Join(agentProject, cfg.Agent.Instruction.File) + } + + // Step 1: Config explicitly declares a file reference — validate it's readable. + if cfg.Agent.Instruction.File != "" { + if _, err := os.Stat(cfg.Agent.Instruction.File); err != nil { + return fmt.Errorf("instruction file %q from config is not accessible: %w", + cfg.Agent.Instruction.File, err) + } + return nil + } + + // Step 1b: Config already has inline instruction — nothing to do. + if cfg.Agent.Instruction.Value != "" { + return nil + } + + // Step 2: Interactive prompt — ask user to provide inline text or a file path. + if noPrompt { + return fmt.Errorf("instruction is required for optimization.\n\n" + + "Provide it via one of:\n" + + " 1. Set agent.config in eval.yaml to point to a config dir with metadata.yaml\n" + + " 2. Set instruction in eval.yaml (agent section): inline string or file reference\n" + + " 3. Run without --no-prompt to enter it interactively") + } + + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return fmt.Errorf("instruction is required but could not open interactive prompt: %w", clientErr) + } + defer azdClient.Close() + + inputChoices := []*azdext.SelectChoice{ + {Label: "Type inline", Value: "inline"}, + {Label: "Load from file", Value: "file"}, + } + defaultIdx := int32(0) + selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: "No instruction found in config or baseline. " + + "How would you like to provide it?", + Choices: inputChoices, + SelectedIndex: &defaultIdx, + }, + }) + if selErr != nil { + return fmt.Errorf("prompting for instruction input method: %w", selErr) + } + + if inputChoices[int(*selResp.Value)].Value == "file" { + pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Path to instruction file", + IgnoreHintKeys: true, + }, + }) + if pathErr != nil { + return fmt.Errorf("prompting for instruction file path: %w", pathErr) + } + filePath := strings.TrimSpace(pathResp.Value) + // Resolve relative paths against the agent project directory. + if !filepath.IsAbs(filePath) && hasProject { + filePath = filepath.Join(agentProject, filePath) + } + if _, err := os.Stat(filePath); err != nil { + return fmt.Errorf("instruction file %q is not accessible: %w", filePath, err) + } + cfg.Agent.Instruction.File = filePath + } else { + resp, promptErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Enter the agent's instruction", + IgnoreHintKeys: true, + }, + }) + if promptErr != nil { + return fmt.Errorf("prompting for instruction: %w", promptErr) + } + cfg.Agent.Instruction.Value = strings.TrimSpace(resp.Value) + } + + return nil +} + +// resolveOptimizeSkillDir resolves the agent's skill directory: +// 1. Config dir pointer (agent.config): skill_dir from metadata.yaml (already resolved). +// 2. Auto-detect: look for a "skills/" folder in the agent project — confirm with user. +// 3. Interactive prompt: ask the user to provide a path or skip. +func resolveOptimizeSkillDir( + ctx context.Context, + cfg *OptimizeConfig, + agentProject string, + noPrompt bool, +) error { + // Step 1: Auto-detect common skill directory names. + var detectedDir string + for _, candidate := range []string{"skills", "skill"} { + dir := filepath.Join(agentProject, candidate) + if info, err := os.Stat(dir); err == nil && info.IsDir() { + detectedDir = dir + break + } + } + + if noPrompt { + // In no-prompt mode, use whatever was detected (may be empty). + cfg.SkillDir = detectedDir + return nil + } + + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + cfg.SkillDir = detectedDir + return nil + } + defer azdClient.Close() + + if detectedDir != "" { + // Found a skill directory — ask user to confirm or provide a different one. + choices := []*azdext.SelectChoice{ + {Label: fmt.Sprintf("Use detected: %s", detectedDir), Value: "use"}, + {Label: "Provide a different path", Value: "other"}, + {Label: "Skip (no skills)", Value: "skip"}, + } + defaultIdx := int32(0) + selResp, selErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: fmt.Sprintf("Found skills directory: %s", detectedDir), + Choices: choices, + SelectedIndex: &defaultIdx, + }, + }) + if selErr != nil { + cfg.SkillDir = detectedDir + return nil + } + + switch choices[int(*selResp.Value)].Value { + case "use": + cfg.SkillDir = detectedDir + return nil + case "skip": + return nil + case "other": + // Fall through to path prompt below. + } + } else { + // No skill directory found — ask if they want to provide one. + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "No skills directory found. Would you like to provide one?", + DefaultValue: new(bool), // default false + }, + }) + if promptErr != nil || !resp.GetValue() { + return nil // skip skills + } + } + + // Prompt for a custom path. + pathResp, pathErr := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Path to skills directory", + IgnoreHintKeys: true, + }, + }) + if pathErr != nil { + return fmt.Errorf("prompting for skills directory: %w", pathErr) + } + + dir := strings.TrimSpace(pathResp.Value) + if dir == "" { + return nil + } + if !filepath.IsAbs(dir) { + dir = filepath.Join(agentProject, dir) + } + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + return fmt.Errorf("skills directory %q is not accessible or not a directory", dir) + } + + cfg.SkillDir = dir + return nil +} + +// promptOptimizeConfigConfirmation shows the resolved values from the baseline +// config and lets the user confirm or override instruction file, skills +// directory, and tools file. +func promptOptimizeConfigConfirmation(ctx context.Context, cfg *OptimizeConfig, agentProject string) error { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return nil // non-fatal — skip confirmation prompts + } + defer azdClient.Close() + prompt := azdClient.Prompt() + + // Instruction file. + instrDefault := relativeDisplay(cfg.Agent.Instruction.File, agentProject) + resp, err := prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Instruction file", + DefaultValue: instrDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for instruction file: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && agentProject != "" { + value = filepath.Join(agentProject, value) + } + if _, err := os.Stat(value); err != nil { + return fmt.Errorf("instruction file %q is not accessible: %w", value, err) + } + cfg.Agent.Instruction.File = value + cfg.Agent.Instruction.Value = "" + } + + // Skills directory. + skillDefault := relativeDisplay(cfg.SkillDir, agentProject) + resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Skills directory (enter to skip)", + DefaultValue: skillDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for skills directory: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && agentProject != "" { + value = filepath.Join(agentProject, value) + } + cfg.SkillDir = value + } else { + cfg.SkillDir = "" + } + + // TODO: re-enable tools file prompt when tools optimization is supported. + // // Tools file. + // toolsDefault := relativeDisplay(cfg.ToolsFile, agentProject) + // resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ + // Options: &azdext.PromptOptions{ + // Message: "Tools file (enter to skip)", + // DefaultValue: toolsDefault, + // IgnoreHintKeys: true, + // }, + // }) + // if err != nil { + // return fmt.Errorf("prompting for tools file: %w", err) + // } + // if value := strings.TrimSpace(resp.Value); value != "" { + // if !filepath.IsAbs(value) && agentProject != "" { + // value = filepath.Join(agentProject, value) + // } + // cfg.ToolsFile = value + // } else { + // cfg.ToolsFile = "" + // } + + return nil +} + +// resolveOptimizeTargetModels prompts the user to select model candidates +// for optimization (target_config.model). Fetches actual deployments from the +// Foundry project and allows multi-select. +func resolveOptimizeTargetModels( + ctx context.Context, + cfg *OptimizeConfig, +) error { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return nil + } + defer azdClient.Close() + + currentModel := cfg.Agent.Model + + resp, promptErr := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ + Options: &azdext.ConfirmOptions{ + Message: "Would you like to specify target models for optimization?", + DefaultValue: new(bool), // default false + }, + }) + if promptErr != nil || !resp.GetValue() { + return nil + } + + // Fetch deployed models from the Foundry project. + choices := buildOptimizeModelChoices(ctx, azdClient, currentModel) + + message := "Select target models for optimization" + if currentModel != "" { + message = fmt.Sprintf("Select target models for optimization (current: %s)", currentModel) + } + + multiResp, multiErr := azdClient.Prompt().MultiSelect(ctx, &azdext.MultiSelectRequest{ + Options: &azdext.MultiSelectOptions{ + Message: message, + Choices: choices, + }, + }) + if multiErr != nil { + return fmt.Errorf("prompting for target models: %w", multiErr) + } + + var models []string + for _, v := range multiResp.Values { + models = append(models, v.Value) + } + + if len(models) > 0 { + if cfg.Options.TargetConfig == nil { + cfg.Options.TargetConfig = &opteval.TargetConfig{} + } + cfg.Options.TargetConfig.Model = models + } + + return nil +} + +// buildOptimizeModelChoices fetches Foundry project deployments and returns +// MultiSelectChoice items. The current deployed model is pre-selected. +// Falls back to an empty list if deployments cannot be fetched. +func buildOptimizeModelChoices(ctx context.Context, azdClient *azdext.AzdClient, currentModel string) []*azdext.MultiSelectChoice { + deployments := listDeploymentsFromEnv(ctx, azdClient) + + var choices []*azdext.MultiSelectChoice + seen := make(map[string]bool) + + // If current model is present in deployments, it will be marked below. + // If not (and it's non-empty), prepend it as a pre-selected entry. + if currentModel != "" { + found := false + for _, d := range deployments { + if d.Name == currentModel { + found = true + break + } + } + if !found { + choices = append(choices, &azdext.MultiSelectChoice{ + Label: currentModel + " (current)", + Value: currentModel, + Selected: true, + }) + seen[currentModel] = true + } + } + + for _, d := range deployments { + if seen[d.Name] { + continue + } + label := d.Name + if d.ModelName != "" && d.ModelName != d.Name { + label = fmt.Sprintf("%s (%s)", d.Name, d.ModelName) + } + selected := d.Name == currentModel + if selected { + label += " (current)" + } + choices = append(choices, &azdext.MultiSelectChoice{ + Label: label, + Value: d.Name, + Selected: selected, + }) + seen[d.Name] = true + } + + return choices +} + +// listDeploymentsFromEnv reads AZURE_AI_PROJECT_ID from the azd environment +// and returns the Foundry project's model deployments. Returns nil on failure. +func listDeploymentsFromEnv(ctx context.Context, azdClient *azdext.AzdClient) []FoundryDeploymentInfo { + envResp, err := azdClient.Environment().GetCurrent(ctx, &azdext.EmptyRequest{}) + if err != nil || envResp == nil || envResp.Environment == nil { + return nil + } + + v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envResp.Environment.Name, + Key: "AZURE_AI_PROJECT_ID", + }) + if err != nil || v.Value == "" { + return nil + } + + project, err := extractProjectDetails(v.Value) + if err != nil { + return nil + } + + cred, err := newAgentCredential() + if err != nil { + return nil + } + + deployments, _ := listProjectDeployments( + ctx, cred, + project.SubscriptionId, + project.ResourceGroupName, + project.AccountName, + ) + return deployments +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go index 4c6b912aa5b..9af0f57abfe 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// optimize_status.go implements the "optimize status" command, which checks +// or watches the status of an optimization job. + package cmd import ( @@ -14,9 +17,10 @@ import ( "github.com/spf13/cobra" ) +// optimizeStatusFlags holds CLI flags for the optimize status command. type optimizeStatusFlags struct { - watch bool - pollInterval int + watch bool // poll until job completes + pollInterval int // polling interval in seconds optimizeConnectionFlags } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go index 21f3c8ef33d..b47292ca9aa 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -48,7 +48,6 @@ func TestOptimizeCommand_AcceptsConfigFlag(t *testing.T) { require.NotNil(t, f, "--config flag should be registered") assert.Equal(t, "c", f.Shorthand, "--config should have -c shorthand") - assert.NotNil(t, cmd.Flags().Lookup("watch")) assert.NotNil(t, cmd.Flags().Lookup("poll-interval")) assert.NotNil(t, cmd.Flags().Lookup("endpoint")) assert.NotNil(t, cmd.Flags().Lookup("agent")) @@ -58,10 +57,6 @@ func TestOptimizeCommand_AcceptsConfigFlag(t *testing.T) { func TestOptimizeCommand_DefaultFlags(t *testing.T) { cmd := newOptimizeCommand(&azdext.ExtensionContext{}) - watchVal, err := cmd.Flags().GetBool("watch") - require.NoError(t, err) - assert.True(t, watchVal, "--watch should default to true") - pollVal, err := cmd.Flags().GetInt("poll-interval") require.NoError(t, err) assert.Equal(t, 5, pollVal, "--poll-interval should default to 5") @@ -89,3 +84,21 @@ func TestFormatOptimizeStatus(t *testing.T) { assert.NotEmpty(t, formatOptimizeStatus(optimize_api.StatusRunning)) assert.NotEmpty(t, formatOptimizeStatus("unknown")) } + +// ---- defaultOptimizeConfig ---- + +func TestDefaultOptimizeConfig(t *testing.T) { + t.Parallel() + cfg := defaultOptimizeConfig("my-agent") + + assert.Equal(t, "my-agent", cfg.Agent.Name) + assert.NotEmpty(t, cfg.InlineDataset) + require.NotNil(t, cfg.Options) + assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) + assert.Equal(t, "optimize", cfg.Options.Mode) + assert.Equal(t, 5, cfg.Options.Budget) + assert.Contains(t, cfg.Options.TargetAttributes, "instruction") + assert.Contains(t, cfg.Options.TargetAttributes, "skill") + require.Len(t, cfg.Evaluators, 1) + assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index 00036744a78..736bb4633e2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -23,20 +23,13 @@ const ( EvaluatorContractFile = "rubric_dimensions.json" ) -// ResolveEvalOutputPath resolves the eval output config path. If output is -// already absolute it is returned as-is; otherwise it is joined with the -// agent project directory. -func ResolveEvalOutputPath(output, agentProject string) string { - if filepath.IsAbs(output) { - return output +// ResolveRelPath resolves a relative path against the agent project directory. +// If the path is already absolute it is returned as-is. +func ResolveRelPath(path, agentProject string) string { + if filepath.IsAbs(path) { + return path } - return filepath.Join(agentProject, output) -} - -// ResolveEvalConfigPath resolves the eval config path for reading. Follows the -// same logic as ResolveEvalOutputPath. -func ResolveEvalConfigPath(config, agentProject string) string { - return ResolveEvalOutputPath(config, agentProject) + return filepath.Join(agentProject, path) } // DownloadDatasetArtifact downloads the dataset and writes it locally. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index 699ce2680f1..6a45ede72ec 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -61,26 +61,18 @@ func BuildGenerationSources(agentKind, agentName, version, instruction string, t // --------------------------------------------------------------------------- // NewDataGenerationJobRequest builds a DataGenerationJobRequest from the -// provided parameters. When sources contain a "traces" entry, the generation -// type is set to "traces"; otherwise it defaults to "simple_qna". +// provided parameters. Currently, it's always "simple_qna" type with multiple sources func NewDataGenerationJobRequest( name, evalModel string, maxSamples int, sources []GenerationSource, ) *DataGenerationJobRequest { - genType := "simple_qna" - for _, s := range sources { - if s.Type == "traces" { - genType = "traces" - break - } - } return &DataGenerationJobRequest{ Inputs: DataGenerationInputs{ Name: name, Scenario: "evaluation", Options: DataGenerationOptions{ - Type: genType, + Type: "simple_qna", MaxSamples: maxSamples, ModelOptions: ModelOptions{ Model: evalModel, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go new file mode 100644 index 00000000000..d6aa897acd1 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go @@ -0,0 +1,98 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// state.go centralizes transient runtime state that is persisted in the azd +// environment across CLI invocations. This covers eval job tracking and any +// other cross-invocation state needed by eval, optimize, or related commands. + +package opteval + +import ( + "context" + "fmt" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" +) + +// EvalState holds transient runtime state stored in the azd environment +// for tracking generation job progress across CLI invocations. +type EvalState struct { + InitStatus string // overall init status + DatasetGenOpID string // dataset generation operation ID + DatasetGenStatus string // dataset generation job status + EvalGenOpID string // evaluator generation operation ID + EvalGenStatus string // evaluator generation job status + EvalID string // created eval ID for running evals +} + +// InitStatus values. +const ( + InitStatusPending = "pending" + InitStatusCompleted = "completed" +) + +// Azd environment keys for persisting eval state across CLI invocations. +const ( + evalKeyInitStatus = "LAST_EVAL_INIT_STATUS" + evalKeyDatasetGenOpID = "LAST_EVAL_DATASET_GEN_OP_ID" + evalKeyDatasetGenStatus = "LAST_EVAL_DATASET_GEN_STATUS" + evalKeyEvalGenOpID = "LAST_EVAL_GEN_OP_ID" + evalKeyEvalGenStatus = "LAST_EVAL_GEN_STATUS" + evalKeyEvalID = "LAST_EVAL_ID" +) + +// LoadEvalState reads eval runtime state from the azd environment. +// Returns an empty state if no values are set. +func LoadEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) *EvalState { + get := func(key string) string { + v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, Key: key, + }) + if err != nil || v.Value == "" { + return "" + } + return v.Value + } + return &EvalState{ + InitStatus: get(evalKeyInitStatus), + DatasetGenOpID: get(evalKeyDatasetGenOpID), + DatasetGenStatus: get(evalKeyDatasetGenStatus), + EvalGenOpID: get(evalKeyEvalGenOpID), + EvalGenStatus: get(evalKeyEvalGenStatus), + EvalID: get(evalKeyEvalID), + } +} + +// SaveEvalState persists eval runtime state to the azd environment. +func SaveEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string, state *EvalState) error { + pairs := []struct { + key, val string + }{ + {evalKeyInitStatus, state.InitStatus}, + {evalKeyDatasetGenOpID, state.DatasetGenOpID}, + {evalKeyDatasetGenStatus, state.DatasetGenStatus}, + {evalKeyEvalGenOpID, state.EvalGenOpID}, + {evalKeyEvalGenStatus, state.EvalGenStatus}, + {evalKeyEvalID, state.EvalID}, + } + for _, p := range pairs { + if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, Key: p.key, Value: p.val, + }); err != nil { + return fmt.Errorf("setting %s in azd env: %w", p.key, err) + } + } + return nil +} + +// ClearEvalState removes eval state keys from the azd environment. +func ClearEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) { + for _, key := range []string{ + evalKeyInitStatus, evalKeyDatasetGenOpID, evalKeyDatasetGenStatus, + evalKeyEvalGenOpID, evalKeyEvalGenStatus, evalKeyEvalID, + } { + _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, Key: key, Value: "", + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index 930936b9e43..d32bd103702 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -137,7 +137,81 @@ func (el EvaluatorList) SetLocalURI(name, uri string) { } } +// Agent config directory structure +// +// Each agent configuration version (baseline or optimized candidate) is stored +// under AgentConfigsDir as a self-contained directory with a fixed layout: +// +// .agent_configs/ +// ├── baseline/ # original agent config captured by eval init or optimize +// │ ├── metadata.yaml # MetadataFile — model, file pointers +// │ ├── instructions.md # InstructionFile — system prompt +// │ ├── skills/ # SkillsDir — skill definitions (optional) +// │ └── tools.json # ToolsFile — tool definitions (optional) +// └── / # optimized candidate written by optimize apply +// ├── metadata.yaml +// ├── instructions.md +// ├── skills/ +// └── tools.json +// +// Both eval and optimize commands share these constants and layout conventions. +// Eval init writes the baseline directory; optimize apply writes candidate +// directories and reads the baseline for diff display. +const ( + // AgentConfigsDir is the top-level folder that holds agent configuration + // versions (baseline and optimized candidates). + AgentConfigsDir = ".agent_configs" + + // BaselineDir is the subdirectory name for the original agent configuration. + BaselineDir = "baseline" + + // MetadataFile is the YAML file in each config directory that describes + // the agent model, instruction file path, skill directory, and tools file. + MetadataFile = "metadata.yaml" + + // InstructionFile is the Markdown file containing the agent's system prompt. + InstructionFile = "instructions.md" + + // SkillsDir is the subdirectory containing skill definition files. + SkillsDir = "skills" + + // ToolsFile is the JSON file containing tool definitions. + ToolsFile = "tools.json" +) + +// BaselineConfigRelPath returns the project-relative path to the baseline +// metadata file: ".agent_configs/baseline/metadata.yaml". +func BaselineConfigRelPath() string { + return filepath.Join(AgentConfigsDir, BaselineDir, MetadataFile) +} + +// AgentConfig holds resolved agent configuration from metadata.yaml. +// Unlike AgentRef (the YAML-serializable reference), AgentConfig contains +// fully resolved absolute paths and values for use during command execution. +type AgentConfig struct { + ConfigFile string // project-relative path to metadata.yaml + Model string // resolved model name + InstructionFile string // absolute path to instruction file + SkillDir string // absolute path to skills directory + ToolsFile string // absolute path to tools definition file +} + +// ResolvedInstruction reads and returns the instruction file content. +// Returns empty string if no instruction file is set or the file cannot be read. +func (c *AgentConfig) ResolvedInstruction() string { + if c.InstructionFile == "" { + return "" + } + data, err := os.ReadFile(c.InstructionFile) //nolint:gosec // path from project config + if err != nil { + return "" + } + return string(data) +} + // AgentRef references the agent under evaluation/optimization. +// Optimize-specific fields (skill_dir, tools_file) are stored in +// OptimizeConfig, not here, so eval.yaml stays target-agnostic. type AgentRef struct { Name string `yaml:"name"` Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` @@ -145,19 +219,15 @@ type AgentRef struct { ConfigFile string `yaml:"config,omitempty"` Model string `yaml:"model,omitempty"` Instruction InstructionRef `yaml:"instruction,omitempty"` - SkillDir string `yaml:"skill_dir,omitempty"` - ToolsFile string `yaml:"tools_file,omitempty"` } -// ResolveFromConfig loads the metadata.yaml pointed to by ConfigFile and fills -// in empty fields (Model, Instruction, SkillDir). Relative paths are resolved -// against projectDir. File pointers inside metadata.yaml (instruction_file, -// skill_dir) are resolved relative to the directory containing the config file. -// Returns the absolute directory containing the config file, or empty string -// if ConfigFile is not set. -func (a *AgentRef) ResolveFromConfig(projectDir string) string { +// ResolveConfig loads the metadata.yaml pointed to by ConfigFile and returns +// a resolved AgentConfig without mutating the AgentRef. Relative paths inside +// metadata.yaml are resolved against the directory containing the config file. +// Returns nil if ConfigFile is not set. +func (a *AgentRef) ResolveConfig(projectDir string) *AgentConfig { if a.ConfigFile == "" { - return "" + return nil } configPath := a.ConfigFile @@ -166,48 +236,47 @@ func (a *AgentRef) ResolveFromConfig(projectDir string) string { } configDir := filepath.Dir(configPath) + cfg := &AgentConfig{ConfigFile: a.ConfigFile} + data, err := os.ReadFile(configPath) //nolint:gosec // path from project config if err != nil { - return configDir + return cfg } var meta struct { - Name string `yaml:"name"` Model string `yaml:"model"` InstructionFile string `yaml:"instruction_file"` SkillDir string `yaml:"skill_dir"` ToolsFile string `yaml:"tools_file"` } if err := yaml.Unmarshal(data, &meta); err != nil { - return configDir + return cfg } - if a.Model == "" && meta.Model != "" { - a.Model = meta.Model - } - if a.Instruction.IsEmpty() && meta.InstructionFile != "" { + cfg.Model = meta.Model + if meta.InstructionFile != "" { instrPath := meta.InstructionFile if !filepath.IsAbs(instrPath) { instrPath = filepath.Join(configDir, instrPath) } - a.Instruction.File = instrPath + cfg.InstructionFile = instrPath } - if a.SkillDir == "" && meta.SkillDir != "" { + if meta.SkillDir != "" { skillDir := meta.SkillDir if !filepath.IsAbs(skillDir) { skillDir = filepath.Join(configDir, skillDir) } - a.SkillDir = skillDir + cfg.SkillDir = skillDir } - if a.ToolsFile == "" && meta.ToolsFile != "" { + if meta.ToolsFile != "" { toolsFile := meta.ToolsFile if !filepath.IsAbs(toolsFile) { toolsFile = filepath.Join(configDir, toolsFile) } - a.ToolsFile = toolsFile + cfg.ToolsFile = toolsFile } - return configDir + return cfg } // ResolvedSystemPrompt returns the resolved instruction text. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go index 14f1b67a1e6..fc77e2e7ed3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// Package optimize_api provides an HTTP client for the agent optimization +// service API. It supports job submission, status polling, cancellation, +// and candidate config/file retrieval. package optimize_api import ( @@ -225,7 +228,7 @@ func (c *OptimizeClient) ReportDeployment( report *DeploymentReport, ) error { url := fmt.Sprintf( - "%s/optimize/candidates/%s/deployments?api-version=v1", + "%s/optimize/candidates/%s:promote?api-version=v1", c.endpoint, report.CandidateID, ) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 830b34a8659..e4337ace452 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// models.go defines the request and response types for the optimization +// service API, including job status, candidate results, agent definitions, +// dataset tasks, and skill/tool definitions. package optimize_api import "encoding/json" diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go index 36fbf030042..c633e0815b9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// poller.go provides a polling loop for optimization jobs that calls +// a progress callback on each tick until the job reaches a terminal state. package optimize_api import ( From 7a4f1340df5caf67d723cdd4d28a846f2661f208 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 08:18:59 -0700 Subject: [PATCH 22/33] bug bash --- .../azd_observability_bugbash.md | 250 +++++++++++++----- .../extensions/azure.ai.agents/extension.yaml | 2 +- .../azure.ai.agents/internal/cmd/optimize.go | 10 +- .../internal/cmd/optimize_apply.go | 56 +++- .../internal/cmd/optimize_apply_test.go | 138 ++++++++++ .../internal/cmd/optimize_test.go | 72 +++++ .../extensions/azure.ai.agents/version.txt | 2 +- cli/azd/extensions/registry.json | 76 ++++++ 8 files changed, 531 insertions(+), 75 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md index c9dac783ae0..09ec71bce83 100644 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md @@ -29,11 +29,10 @@ Please activate `Foundry User` and `Owner` access Navigate to a fresh directory outside the extension repo, init the agent and point to our bugbash project, if you already have an azd project with TiP foundry account, you can continue to use it. ```bash -mkdir bugbash-azd- && cd bugbash-azd- git clone https://github.com/ai-platform-microsoft/foundry-observability-playground.git cd .\foundry-observability-playground\demos\build2026\agents\travel-approver\ azd ai agent init --project-id /subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/providers/Microsoft.CognitiveServices/accounts/azd-bugbash-0514/projects/bugbash-westus2 -# Customize your agent name and model deployment +# !!! Customize your agent name and model deployment ``` The template includes `agent_optimization/` — a small package that reads config @@ -71,17 +70,30 @@ Verify: `azd ai agent invoke "Hello!"` > **If you have Owner permissions** and want fresh resources: run `azd provision` before `azd deploy`. -## 6. E2E Hero Scenario (inside an azd project with a hosted agent) +## 6. E2E Hero Scenarios + +There are two paths depending on whether you use the **bugbash project** or **your own project**. + +--- + +### Path A: Using the bugbash project (eval + optimize) + +> Use this path if you cloned the template in step 3 and deployed to the bugbash Foundry project. +> You have access to the eval APIs and can run the full eval → optimize flow. All commands below auto-detect the agent context from the current azd environment. Run them from your deployed azd project directory. -### 6a. Initialize an eval suite +#### 6a-A. Initialize an eval suite -> **Note:** The dataset generation API is not yet available. Use the sample `data.jsonl` included in the template. +> Generating the eval suite which can optimize your agent adaptively, which could used for optimization as well ```bash -azd ai agent eval init --dataset ./data.jsonl +# including both data generation and evaluator generation +azd ai agent eval init + +# (Recommended) Using our provided golden dataset, but also adaptive evaluator +azd ai agent eval init --dataset eval/travel_approval_golden.jsonl ``` The command resolves your agent from `azure.yaml` and prompts interactively: @@ -93,34 +105,54 @@ Resolving eval context... Resolving Foundry project endpoint... Detected eval target: - (✓) Service: sample-agent (azure.yaml) - (✓) Agent: sample-agent (AGENT_SAMPLE_AGENT_NAME) - (✓) Version: 1 (AGENT_SAMPLE_AGENT_VERSION) + (✓) Service: travel-approver-bb (azure.yaml) + (✓) Agent: travel-approver-azd-bb (AGENT_TRAVEL_APPROVER_BB_NAME) + (✓) Version: 2 (AGENT_TRAVEL_APPROVER_BB_VERSION) (✓) Kind: hosted (agent.yaml) - (✓) Endpoint: https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 - (✓) Project: D:\optimization\bugbash-agent-zyying - Eval config: D:\optimization\bugbash-agent-zyying\eval.yaml - -? Eval suite name: smoke-core-zyying -? How would you like to provide the generation instruction?: Type inline -? Describe what this agent does and what scenarios to test: test agent -? Select the model for evaluation and generation: Select another deployment -? Select a model deployment: gpt-4o (gpt-4o) -? Max samples: 100 -\ Evaluator generation... (✓) Done Evaluator generation (1m16s) - - Artifacts: D:\optimization\bugbash-agent-zyying\.azure\.foundry - evaluators/smoke-core-zyying-35368f67.json -Eval suite created - Config: D:\optimization\bugbash-agent-zyying\eval.yaml - Dataset: D:\optimization\bugbash-agent-zyying\data.jsonl - Evaluator: smoke-core-zyying-35368f67 + (✓) Endpoint: https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 (AZURE_AI_PROJECT_ENDPOINT) + (✓) Project: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver (azure.yaml service "travel-approver-bb" project path) + Eval config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\eval.yaml + + Agent Config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\.agent_configs\baseline\metadata.yaml +? Eval suite name: travel-approver-azd-bb +? Instruction file: .agent_configs\baseline\instructions.md +? Include agent traces for evaluator generation?: No +? Select the model for evaluation and generation: gpt-4o (deployed) +? Max samples (between 15 and 1000): 15 + (–) Running Evaluator generation (evaluatorgen-travel-approver-azd-bb-v1-3392d06e) + (–) Running Dataset generation (datagen-c00db6c5b7ee4585aa9f25f7089a05a6) + (✓) Done Evaluator generation (34 seconds) + (✓) Done Dataset generation (2m 19s) - Review the generated assets, then run: +Eval suite created + Config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\eval.yaml + Dataset: travel-approver-azd-bb (2.0) + D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\datasets\travel-approver-azd-bb + Evaluator: travel-approver-azd-bb (1) + D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\evaluators\travel-approver-azd-bb\rubric_dimensions.json + + Evaluator dimensions (6): + Weight Dimension + ────── ───────── + 10 policy_compliance + 6 budget_accuracy + 5 alternative_suggestions_specificity + 4 decision_explanation_clarity + 3 user_constraint_adherence + 5 general_quality + + Portal: + Dataset: https://ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/data/datasets/travel-approver-azd-bb/2.0 + Evaluator: https://ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/evaluations/catalog/travel-approver-azd-bb/1 + + Next steps: azd ai agent eval run + Run the eval suite against your agent. + azd ai agent eval update + Edit the generated dataset or evaluator locally, then upload changes. ``` -### 6b. Run an eval +#### 6b-A. Run an eval (Optional, if you want to try evaluation run) ```bash azd ai agent eval run @@ -128,7 +160,7 @@ azd ai agent eval run Reads `eval.yaml`, creates the eval on the Foundry backend, and submits a run against your deployed agent. -### 6c. Browse eval results +#### 6c-A. Browse eval results (Optional) ```bash # List all evals (table with status, run count, created date) @@ -141,39 +173,46 @@ azd ai agent eval show azd ai agent eval show -O results.json ``` -### 6d. Optimize the agent +#### 6d-A. Optimize the agent After the eval suite is ready, run optimize. It auto-detects the `eval.yaml` you just created. ```bash azd ai agent optimize -# → Prompts: "Found eval.yaml in project. Use it for optimization?" -# Select Yes to use your eval config, or No to use the built-in dataset. ``` Expected output (takes ~5–20 minutes): ``` -Optimizing agent "sample-agent"... - Config: D:\optimization\bugbash-agent-zyying\eval.yaml - Job ID: opt_f74131d58c774ebba1765fae1005a9f8 - ⠦ completed · strategy: gepa · iteration 1 · score: 0.95 · 3m0s +# azd ai agent optimize +? Select an agent service: travel-zyying-new +? Found eval.yaml in project. Use it for optimization?: Yes +? Instruction file: .agent_configs\baseline\instructions.md +? Skills directory (enter to skip): skills +? Would you like to specify target models for optimization?: Yes +? Select target models for optimization (current: gpt-4o): gpt-4o (current), gpt-4.1 +Optimizing agent "travel-zyying-new"... + Config: D:\optimization\public\viveks-scratch\optimization-demo-v2\src\travel-approver-demo\eval.yaml + Baseline saved to .agent_configs\baseline\metadata.yaml + Job ID: opt_b1cca48e468b4a508d21bfa19cdd16de + Status: pending + Portal: https://eastus2euap.ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/agents/travel-zyying-new/optimization/opt_b1cca48e468b4a508d21bfa19cdd16de?flight=enable_faos_read_ui + + ⠼ completed · strategy: gepa · iteration 1 · score: 0.77 · 7m50s Results: Candidate Score Pass Tokens ──────────────────── ─────── ─────── ──────── - baseline 0.73 100% 430 - baseline_instr_v2 0.77 100% 1180 - baseline_instr_v3 0.85 100% 1204 - baseline_instr_v1 ★ 0.92 100% 1063 + baseline ★ 0.77 100% 0 + candidate_1 0.74 100% 0 Candidate IDs: - baseline_instr_v2 cand_445fe8e68e224d6d94cbb37b022945eb - baseline_instr_v3 cand_51b87d7ce10b43ba801776483a9b5506 - ★ baseline_instr_v1 cand_6b5c23ed295f4f4e9be87b7fdb3809b0 + ★ baseline cand_c6532ad867594dd4b6878a45604a4994 + candidate_1 cand_d9bedab23c5641d4a2d83c98aa635c2f - Deploy the best candidate: - azd ai agent optimize deploy --candidate cand_6b5c23ed295f4f4e9be87b7fdb3809b0 + Apply the best candidate locally, then deploy: + azd ai agent optimize apply --candidate cand_c6532ad867594dd4b6878a45604a4994 + azd deploy ``` The ★ marks the best candidate. Copy the deploy command from the output to promote it. @@ -185,32 +224,23 @@ You can fine-tune optimization behavior by adding or modifying the `options:` se ```yaml options: eval_model: "gpt-4o" # (string) Model used for evaluation. Default: "gpt-4o" - mode: "optimize" # (string) Run mode. Default: "optimize" - strategies: # ([]string) Optimization strategies to try. - - instruction # Default: ["instruction", "skill", "agents-optimization-job"] - - skill - budget: 5 # (int) Max optimization budget (number of candidates). Default: 5 - max_iterations: 2 # (int) Max iterations per strategy. Default: 2 (when strategies are default) - min_improvement: 0.0 # (float) Minimum score improvement to accept a candidate. Default: 0 (not set) - improvement_threshold: 0.0 # (float) Threshold for incremental improvement. Default: 0 (not set) - pass_threshold: 0.0 # (float) Minimum passing score. Default: 0 (not set) + target_attributes: # If not specify, we should auto detect it + - instruction + - skill + - model + target_config: + model: + - gpt-4.1 + - gpt-4.1-mini + - gpt-4o + budget: 0 # Deprecating # (int) Max optimization budget (number of candidates). Default: 5 + max_iterations: 4 # (int) Max iterations per strategy. Default: 4 (when strategies are default) + min_improvement: 0.0 # (float) Minimum score improvement to accept a candidate. keep_versions: false # (bool) Keep all intermediate agent versions. Default: false - tasks_per_iteration: 0 # (int) Number of tasks per iteration. Default: 0 (server decides) reflection_model: "" # (string) Model for reflection steps. Default: "" (uses eval_model) ``` -For example, to increase the budget and use a different eval model: - -```yaml -options: - eval_model: "gpt-4.1" - budget: 10 - max_iterations: 3 -``` - -Fields you omit will use the defaults above. The `strategies` field defaults to all three strategies if not specified. - -### 6e. Monitor optimization jobs +#### 6e-A. Monitor optimization jobs ```bash # Watch a running job in real-time @@ -223,7 +253,7 @@ azd ai agent optimize list azd ai agent optimize cancel ``` -### 6f. Deploy the winning candidate +#### 6f-A. Deploy the winning candidate > **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. This step is blocked until the API issue is resolved. But you can check agent optimization job in foundry UI with `?flight=enable_faos_read_ui` @@ -238,7 +268,7 @@ This creates a new agent version with `OPTIMIZATION_CONFIG` set to the candidate config (instructions, model, temperature). The agent SDK's `load_config()` reads this at startup and applies the optimized settings. -### 6g. Verify the optimized agent +#### 6g-A. Verify the optimized agent > **⚠️ Blocked:** This step depends on 6f, which is currently blocked by the FAOS CANDIDATE API issue. @@ -249,6 +279,86 @@ azd ai agent invoke "Hello!" --- +### Path B: Using your own project (optimize only, built-in dataset) + +> Use this path if you have your own azd project with a deployed hosted agent on a westus2/ncus Foundry account. +> The eval APIs (`eval init`, `eval run`) require specific backend support that may not be available on your project. +> Instead, go directly to `optimize` which uses a **built-in dataset** (3 tasks, 12 criteria) — no eval setup needed. + +#### 6a-B. Prerequisites + +- You have an azd project with a hosted agent already deployed (`azd deploy` completed). +- Your agent uses the `agent_optimization` SDK package with `load_config()`. +- You are logged in (`az login`) and have access to the Foundry project. + +#### 6b-B. Optimize the agent (built-in dataset) + +From your azd project directory: + +```bash +azd ai agent optimize +# → If eval.yaml exists, select "No" to use the built-in dataset +# → If no eval.yaml, it automatically uses the built-in dataset +``` + +Or explicitly skip the eval.yaml prompt: + +```bash +azd ai agent optimize --no-prompt +# Always uses built-in defaults (3 tasks, 12 criteria) +``` + +Expected output (takes ~5–20 minutes): + +``` +Optimizing agent "your-agent"... + Dataset: built-in (3 tasks, 12 criteria) + Job ID: opt_abc123... + ⠦ completed · strategy: gepa · iteration 1 · score: 0.85 · 5m0s + +Results: + Candidate Score Pass Tokens + ──────────────────── ─────── ─────── ──────── + baseline 0.60 100% 300 + baseline_instr_v1 ★ 0.85 100% 980 + + Deploy the best candidate: + azd ai agent optimize deploy --candidate cand_... +``` + +#### 6c-B. Monitor optimization jobs + +```bash +# Watch the running job +azd ai agent optimize status --watch + +# List all jobs +azd ai agent optimize list + +# Cancel if needed +azd ai agent optimize cancel +``` + +#### 6d-B. Deploy the winning candidate + +> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. +> You can check agent optimization job results in Foundry UI with `?flight=enable_faos_read_ui`. + +```bash +azd ai agent optimize deploy --candidate +``` + +#### 6e-B. Verify the optimized agent + +> **⚠️ Blocked:** This step depends on 6d-B, which is currently blocked by the FAOS CANDIDATE API issue. + +```bash +azd ai agent invoke "Hello!" +# Expected: agent responds using the optimized configuration +``` + +--- + ## Comprehensive Test Scenarios ### A. `azd ai agent eval init` diff --git a/cli/azd/extensions/azure.ai.agents/extension.yaml b/cli/azd/extensions/azure.ai.agents/extension.yaml index 5a7cd13dbb8..3c618813371 100644 --- a/cli/azd/extensions/azure.ai.agents/extension.yaml +++ b/cli/azd/extensions/azure.ai.agents/extension.yaml @@ -5,7 +5,7 @@ displayName: Foundry agents (Preview) description: Ship agents with Microsoft Foundry from your terminal. (Preview) usage: azd ai agent [options] # NOTE: Make sure version.txt is in sync with this version. -version: 0.1.33-optbugbash-preview +version: 0.1.34-optbugbash-preview requiredAzdVersion: ">1.23.13" language: go capabilities: diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 50c3976d593..bc59cb33c45 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -211,7 +211,15 @@ func (a *OptimizeAction) resolveConfig( if err != nil { return nil, "", "", fmt.Errorf("%w\n\nCheck that the file path is correct and contains valid YAML", err) } - return cfg, a.flags.configFile, "", nil + + // Even with explicit --config, try to reconcile agent name with the environment. + resolved, resolveErr := resolveOptimizeAgent(ctx, a.flags.agent, a.noPrompt) + if resolveErr == nil { + agentProject = resolved.agentProject + reconcileConfigAgentName(&cfg.Agent, resolved.agentName, a.flags.configFile) + } + + return cfg, a.flags.configFile, agentProject, nil } resolved, err := resolveOptimizeAgent(ctx, a.flags.agent, a.noPrompt) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 6fdfe4466a9..ccafbaaa744 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -184,10 +184,13 @@ func (a *OptimizeApplyAction) apply( fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", color.CyanString("azd deploy --service %s", svc.Name)) - // Point the user to the config folders for comparison. + // Show instruction diff (baseline → optimized). + printPromptDiff(out, serviceDir, a.flags.candidate, candidateConfig) + + // Point the user to the config folders for other differences (skills, tools, etc.). baselinePath := filepath.Join(serviceDir, agentConfigsDir, opteval.BaselineDir) candidatePath := filepath.Join(serviceDir, agentConfigsDir, a.flags.candidate) - fmt.Fprintf(out, "\n To see the full diff, compare the files in:\n") + fmt.Fprintf(out, "\n For other changes (skills, tools, etc.), compare the files in:\n") fmt.Fprintf(out, " Baseline: %s\n", color.CyanString(baselinePath)) fmt.Fprintf(out, " Optimized: %s\n", color.CyanString(candidatePath)) @@ -495,3 +498,52 @@ func extractInstructions(config any) string { } return "" } + +// maxDiffPreviewLines is the max lines shown per section in the prompt diff preview. +const maxDiffPreviewLines = 4 + +// printPromptDiff displays an abbreviated instruction diff (baseline → optimized) +// with a short preview of each. +func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateConfig any) { + optimized := extractInstructions(candidateConfig) + if optimized == "" { + return + } + + baseDir := filepath.Join(serviceDir, agentConfigsDir, opteval.BaselineDir) + baseline, err := loadBaselineConfig(serviceDir) + if err != nil { + return + } + baselineText := baseline.resolveInstructions(baseDir) + if baselineText == "" { + return + } + baselineLines := strings.Split(baselineText, "\n") + optimizedLines := strings.Split(optimized, "\n") + + fmt.Fprintf(out, "\n Instruction diff (baseline → optimized):\n\n") + + removed := color.New(color.FgRed) + removed.Fprintf(out, " — Baseline (%d lines, %d chars):\n", + len(baselineLines), len(baselineText)) + printPreviewLines(out, baselineLines, "- ", removed) + + fmt.Fprintln(out) + + added := color.New(color.FgGreen) + added.Fprintf(out, " — Optimized (%d lines, %d chars):\n", + len(optimizedLines), len(optimized)) + printPreviewLines(out, optimizedLines, "+ ", added) +} + +// printPreviewLines prints up to maxDiffPreviewLines with a prefix, then "..." if truncated. +func printPreviewLines(out io.Writer, lines []string, prefix string, c *color.Color) { + limit := min(len(lines), maxDiffPreviewLines) + for _, line := range lines[:limit] { + c.Fprintf(out, " %s%s\n", prefix, line) + } + if len(lines) > maxDiffPreviewLines { + c.Fprintf(out, " %s... (%d more lines)\n", prefix, len(lines)-maxDiffPreviewLines) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go index 3f042e533d9..7a8bb618e3a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -14,6 +14,7 @@ import ( "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/fatih/color" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -45,6 +46,143 @@ func TestNewOptimizeApplyCommand_CandidateIsRequired(t *testing.T) { assert.Contains(t, err.Error(), "candidate") } +// ---- printPreviewLines ---- + +func TestPrintPreviewLines(t *testing.T) { + t.Parallel() + + // Disable color output so assertions don't need ANSI codes. + color.NoColor = true + + tests := []struct { + name string + lines []string + prefix string + want []string // substrings expected in output + }{ + { + "fewer lines than limit", + []string{"line1", "line2"}, + "+ ", + []string{"+ line1", "+ line2"}, + }, + { + "exactly at limit", + []string{"a", "b", "c", "d"}, + "- ", + []string{"- a", "- b", "- c", "- d"}, + }, + { + "exceeds limit shows truncation", + []string{"a", "b", "c", "d", "e", "f"}, + "+ ", + []string{"+ a", "+ b", "+ c", "+ d", "... (2 more lines)"}, + }, + { + "empty lines", + []string{}, + "- ", + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var buf bytes.Buffer + c := color.New(color.FgWhite) + printPreviewLines(&buf, tt.lines, tt.prefix, c) + out := buf.String() + for _, s := range tt.want { + assert.Contains(t, out, s) + } + if tt.want == nil { + assert.Empty(t, out) + } + }) + } +} + +// ---- printPromptDiff ---- + +func TestPrintPromptDiff(t *testing.T) { + t.Parallel() + + color.NoColor = true + + t.Run("shows diff when baseline and candidate have instructions", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + // Set up baseline with metadata that points to an instruction file. + baselineDir := filepath.Join(dir, agentConfigsDir, opteval.BaselineDir) + require.NoError(t, os.MkdirAll(baselineDir, 0750)) + require.NoError(t, os.WriteFile( + filepath.Join(baselineDir, opteval.InstructionFile), + []byte("You are a baseline assistant.\nLine two."), + 0600, + )) + require.NoError(t, os.WriteFile( + filepath.Join(baselineDir, opteval.MetadataFile), + []byte("instruction_file: instructions.md\nmodel: gpt-4o\n"), + 0600, + )) + + candidateConfig := map[string]any{ + "systemPrompt": "You are an optimized assistant.\nNew line two.\nNew line three.", + } + + var buf bytes.Buffer + printPromptDiff(&buf, dir, "cand1", candidateConfig) + out := buf.String() + + assert.Contains(t, out, "Instruction diff") + assert.Contains(t, out, "Baseline") + assert.Contains(t, out, "Optimized") + assert.Contains(t, out, "You are a baseline assistant.") + assert.Contains(t, out, "You are an optimized assistant.") + }) + + t.Run("no output when candidate has no instructions", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + candidateConfig := map[string]any{"model": "gpt-4o"} + + var buf bytes.Buffer + printPromptDiff(&buf, dir, "cand1", candidateConfig) + assert.Empty(t, buf.String()) + }) + + t.Run("no output when baseline config missing", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + candidateConfig := map[string]any{"systemPrompt": "optimized"} + + var buf bytes.Buffer + printPromptDiff(&buf, dir, "cand1", candidateConfig) + assert.Empty(t, buf.String()) + }) + + t.Run("no output when baseline has no instruction file", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + // Write metadata without instruction_file. + baselineDir := filepath.Join(dir, agentConfigsDir, opteval.BaselineDir) + require.NoError(t, os.MkdirAll(baselineDir, 0750)) + require.NoError(t, os.WriteFile( + filepath.Join(baselineDir, opteval.MetadataFile), + []byte("model: gpt-4o\n"), + 0600, + )) + + candidateConfig := map[string]any{"systemPrompt": "optimized"} + + var buf bytes.Buffer + printPromptDiff(&buf, dir, "cand1", candidateConfig) + assert.Empty(t, buf.String()) + }) +} + // ---- extractInstructions ---- func TestExtractInstructions(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go index b47292ca9aa..a8b69969168 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -4,6 +4,8 @@ package cmd import ( + "os" + "path/filepath" "testing" "azureaiagent/internal/pkg/agents/optimize_api" @@ -102,3 +104,73 @@ func TestDefaultOptimizeConfig(t *testing.T) { require.Len(t, cfg.Evaluators, 1) assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) } + +// ---- LoadOptimizeConfig + reconcileConfigAgentName (--config path) ---- + +func TestLoadOptimizeConfig_ReconcileAgentName(t *testing.T) { + t.Parallel() + + writeConfigYAML := func(t *testing.T, dir, agentName string) string { + t.Helper() + content := "agent:\n name: " + agentName + "\noptions:\n eval_model: gpt-4o\n mode: optimize\n" + path := filepath.Join(dir, "spec.yaml") + require.NoError(t, os.WriteFile(path, []byte(content), 0600)) + return path + } + + t.Run("env overrides config when names differ", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + cfgPath := writeConfigYAML(t, dir, "config-agent") + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + assert.Equal(t, "config-agent", cfg.Agent.Name) + + changed := reconcileConfigAgentName(&cfg.Agent, "env-agent", cfgPath) + assert.True(t, changed, "should report change when names differ") + assert.Equal(t, "env-agent", cfg.Agent.Name, "environment name should take precedence") + }) + + t.Run("no change when names match", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + cfgPath := writeConfigYAML(t, dir, "same-agent") + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + + changed := reconcileConfigAgentName(&cfg.Agent, "same-agent", cfgPath) + assert.False(t, changed) + assert.Equal(t, "same-agent", cfg.Agent.Name) + }) + + t.Run("sets name when config has empty agent name", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + content := "agent:\n kind: hosted\noptions:\n eval_model: gpt-4o\n" + cfgPath := filepath.Join(dir, "spec.yaml") + require.NoError(t, os.WriteFile(cfgPath, []byte(content), 0600)) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + assert.Empty(t, cfg.Agent.Name) + + changed := reconcileConfigAgentName(&cfg.Agent, "env-agent", cfgPath) + assert.False(t, changed, "filling empty name is not a 'change' (no conflict)") + assert.Equal(t, "env-agent", cfg.Agent.Name) + }) + + t.Run("no-op when env name is empty", func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + cfgPath := writeConfigYAML(t, dir, "config-agent") + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + + changed := reconcileConfigAgentName(&cfg.Agent, "", cfgPath) + assert.False(t, changed) + assert.Equal(t, "config-agent", cfg.Agent.Name, "original name preserved when env is empty") + }) +} diff --git a/cli/azd/extensions/azure.ai.agents/version.txt b/cli/azd/extensions/azure.ai.agents/version.txt index 5cc273dc597..fc5a69c4947 100644 --- a/cli/azd/extensions/azure.ai.agents/version.txt +++ b/cli/azd/extensions/azure.ai.agents/version.txt @@ -1 +1 @@ -0.1.33-optbugbash-preview +0.1.34-optbugbash-preview diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index 5a068cb6ad8..a31f336a7c7 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -4165,6 +4165,82 @@ "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-windows-arm64.zip" } } + }, + { + "version": "0.1.34-optbugbash-preview", + "requiredAzdVersion": "\u003e1.23.13", + "capabilities": [ + "custom-commands", + "lifecycle-events", + "mcp-server", + "service-target-provider", + "metadata" + ], + "providers": [ + { + "name": "azure.ai.agent", + "type": "service-target", + "description": "Deploys agents to the Foundry Agent Service" + } + ], + "usage": "azd ai agent \u003ccommand\u003e [options]", + "examples": [ + { + "name": "init", + "description": "Initialize a new AI agent project.", + "usage": "azd ai agent init" + } + ], + "artifacts": { + "darwin/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "326e41becb8073e146639fb1078324bf0455200060bcad71bdf87e43779e52e4" + }, + "entryPoint": "azure-ai-agents-darwin-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" + }, + "darwin/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "ded28b36249fca47924fbb0de11ca8c2376f4a8878e58c489d2bc82da4406bcd" + }, + "entryPoint": "azure-ai-agents-darwin-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" + }, + "linux/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "106e191775557e51ab1bc605d144bd02004c653a9c72fef2488ed87a992ba595" + }, + "entryPoint": "azure-ai-agents-linux-amd64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" + }, + "linux/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "8ae74dd54ecdee54b0a98e44513adaf1e32d65f0a1a0e13e74cf0a6bd5197186" + }, + "entryPoint": "azure-ai-agents-linux-arm64", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" + }, + "windows/amd64": { + "checksum": { + "algorithm": "sha256", + "value": "ffe3a7258b22a46ac7a47d7a4b9786dedbcf36f69f8154149c6fe7a63ce1cd9e" + }, + "entryPoint": "azure-ai-agents-windows-amd64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-windows-amd64.zip" + }, + "windows/arm64": { + "checksum": { + "algorithm": "sha256", + "value": "b352fd4bc55f858998d4083eb226f4d1c1b3b74c6ee3689b845da09022ac0962" + }, + "entryPoint": "azure-ai-agents-windows-arm64.exe", + "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-windows-arm64.zip" + } + } } ] }, From 96d268e5ce88a65dd6ef36e6771daa42e579e424 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 11:27:44 -0700 Subject: [PATCH 23/33] deployment logic --- .../azure.ai.agents/internal/cmd/listen.go | 11 + .../internal/cmd/listen_test.go | 309 ++++++++++++++++++ .../internal/cmd/optimize_deploy.go | 13 +- .../internal/cmd/optimize_helpers.go | 73 ++++- .../internal/cmd/optimize_helpers_test.go | 301 +++++++++++++++++ .../pkg/agents/optimize_api/client.go | 20 +- .../pkg/agents/optimize_api/client_test.go | 21 +- .../pkg/agents/optimize_api/models.go | 10 +- .../pkg/agents/optimize_api/models_test.go | 35 ++ 9 files changed, 768 insertions(+), 25 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go index 641fe54162f..2f660577c46 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go @@ -17,6 +17,7 @@ import ( "azureaiagent/internal/exterrors" "azureaiagent/internal/pkg/agents/agent_api" "azureaiagent/internal/pkg/agents/agent_yaml" + "azureaiagent/internal/pkg/agents/optimize_api" "azureaiagent/internal/pkg/azure" "azureaiagent/internal/project" @@ -247,6 +248,16 @@ func postdeployHandler(ctx context.Context, azdClient *azdext.AzdClient, args *a return fmt.Errorf("agent identity RBAC setup failed: %w", err) } + // Report optimization candidate deployments to FAOS. + // If a service has AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID in the azd environment, + // the agent was deployed from an optimization candidate. We notify the + // optimization service so it can track which candidates have been deployed. + reportOptimizationDeployments(ctx, azdClient, hostedAgents, envName, endpointResp.Value, + func(endpoint string) *optimize_api.OptimizeClient { + return optimize_api.NewOptimizeClient(endpoint, cred) + }, + ) + return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go index d50c3072ac2..39bd65532e6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go @@ -4,11 +4,15 @@ package cmd import ( + "os" + "path/filepath" "testing" "azureaiagent/internal/project" "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // TestPostdeployHandler_NoAgentService_NoOp verifies postdeployHandler returns nil @@ -279,3 +283,308 @@ func TestBuildConnectionCredentials(t *testing.T) { }) } } + +// --------------------------------------------------------------------------- +// isHostedAgentService +// --------------------------------------------------------------------------- + +func TestIsHostedAgentService_HostedKind(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(dir, "agent.yaml"), + []byte("kind: hosted\nname: my-agent\n"), 0600, + )) + + svc := &azdext.ServiceConfig{Name: "svc", RelativePath: "."} + proj := &azdext.ProjectConfig{Path: dir} + + assert.True(t, isHostedAgentService(svc, proj)) +} + +func TestIsHostedAgentService_NonHostedKind(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(dir, "agent.yaml"), + []byte("kind: local\nname: my-agent\n"), 0600, + )) + + svc := &azdext.ServiceConfig{Name: "svc", RelativePath: "."} + proj := &azdext.ProjectConfig{Path: dir} + + assert.False(t, isHostedAgentService(svc, proj)) +} + +func TestIsHostedAgentService_NoAgentYaml(t *testing.T) { + t.Parallel() + + svc := &azdext.ServiceConfig{Name: "svc", RelativePath: "."} + proj := &azdext.ProjectConfig{Path: t.TempDir()} + + assert.False(t, isHostedAgentService(svc, proj)) +} + +func TestIsHostedAgentService_InvalidYaml(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(dir, "agent.yaml"), + []byte(":::invalid yaml:::"), 0600, + )) + + svc := &azdext.ServiceConfig{Name: "svc", RelativePath: "."} + proj := &azdext.ProjectConfig{Path: dir} + + assert.False(t, isHostedAgentService(svc, proj)) +} + +func TestIsHostedAgentService_MissingKindField(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(dir, "agent.yaml"), + []byte("name: my-agent\n"), 0600, + )) + + svc := &azdext.ServiceConfig{Name: "svc", RelativePath: "."} + proj := &azdext.ProjectConfig{Path: dir} + + assert.False(t, isHostedAgentService(svc, proj)) +} + +func TestIsHostedAgentService_SubDirectory(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + subDir := filepath.Join(dir, "agents", "bot") + require.NoError(t, os.MkdirAll(subDir, 0700)) + require.NoError(t, os.WriteFile( + filepath.Join(subDir, "agent.yaml"), + []byte("kind: hosted\nname: bot\n"), 0600, + )) + + svc := &azdext.ServiceConfig{Name: "bot", RelativePath: "agents/bot"} + proj := &azdext.ProjectConfig{Path: dir} + + assert.True(t, isHostedAgentService(svc, proj)) +} + +// --------------------------------------------------------------------------- +// resolveEnvValue / resolveMapValues / resolveAnyValue +// --------------------------------------------------------------------------- + +func TestResolveEnvValue(t *testing.T) { + t.Parallel() + + env := map[string]string{ + "DB_HOST": "mydb.postgres.azure.com", + "DB_PORT": "5432", + } + + tests := []struct { + input string + want string + }{ + {"${DB_HOST}", "mydb.postgres.azure.com"}, + {"host=${DB_HOST}:${DB_PORT}", "host=mydb.postgres.azure.com:5432"}, + {"no-var", "no-var"}, + {"${UNDEFINED}", ""}, + {"", ""}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, resolveEnvValue(tt.input, env)) + }) + } +} + +func TestResolveMapValues(t *testing.T) { + t.Parallel() + + env := map[string]string{"KEY": "val"} + m := map[string]any{ + "a": "${KEY}", + "b": "literal", + "c": 42, + } + + got := resolveMapValues(m, env) + assert.Equal(t, "val", got["a"]) + assert.Equal(t, "literal", got["b"]) + assert.Equal(t, 42, got["c"]) +} + +func TestResolveAnyValue_NestedStructures(t *testing.T) { + t.Parallel() + + env := map[string]string{"X": "resolved"} + + // Nested map + nested := map[string]any{ + "inner": map[string]any{"key": "${X}"}, + } + got := resolveAnyValue(nested, env) + gotMap := got.(map[string]any) + inner := gotMap["inner"].(map[string]any) + assert.Equal(t, "resolved", inner["key"]) + + // Slice + slice := []any{"${X}", "plain", 99} + gotSlice := resolveAnyValue(slice, env).([]any) + assert.Equal(t, "resolved", gotSlice[0]) + assert.Equal(t, "plain", gotSlice[1]) + assert.Equal(t, 99, gotSlice[2]) + + // Non-string type passthrough + assert.Equal(t, true, resolveAnyValue(true, env)) +} + +// --------------------------------------------------------------------------- +// resolveToolboxEnvVars +// --------------------------------------------------------------------------- + +func TestResolveToolboxEnvVars(t *testing.T) { + t.Parallel() + + env := map[string]string{ + "TB_NAME": "my-toolbox", + "TB_DESC": "A test toolbox", + "URL": "https://example.com", + } + + tb := project.Toolbox{ + Name: "${TB_NAME}", + Description: "${TB_DESC}", + Tools: []map[string]any{ + {"server_url": "${URL}", "type": "web_search"}, + }, + } + + resolveToolboxEnvVars(&tb, env) + + assert.Equal(t, "my-toolbox", tb.Name) + assert.Equal(t, "A test toolbox", tb.Description) + assert.Equal(t, "https://example.com", tb.Tools[0]["server_url"]) + assert.Equal(t, "web_search", tb.Tools[0]["type"]) +} + +// --------------------------------------------------------------------------- +// toolboxConnectionsByName +// --------------------------------------------------------------------------- + +func TestToolboxConnectionsByName_NilConfig(t *testing.T) { + t.Parallel() + assert.Empty(t, toolboxConnectionsByName(nil)) +} + +func TestToolboxConnectionsByName_MergesBothTypes(t *testing.T) { + t.Parallel() + + config := &project.ServiceTargetAgentConfig{ + Connections: []project.Connection{ + {Name: "conn-a", Target: "https://a.com"}, + }, + ToolConnections: []project.ToolConnection{ + {Name: "tool-b", Target: "https://b.com"}, + }, + } + + result := toolboxConnectionsByName(config) + assert.Len(t, result, 2) + assert.Equal(t, "https://a.com", result["conn-a"].Target) + assert.Equal(t, "https://b.com", result["tool-b"].Target) +} + +// --------------------------------------------------------------------------- +// postdeployHandler — skips non-agent-host services +// --------------------------------------------------------------------------- + +func TestPostdeployHandler_SkipsNonAgentHostServices(t *testing.T) { + t.Parallel() + + // Project has one service with a different host type — handler should + // return nil without making any RPC calls (azdClient is nil). + args := &azdext.ProjectEventArgs{ + Project: &azdext.ProjectConfig{ + Path: t.TempDir(), + Services: map[string]*azdext.ServiceConfig{ + "api": {Name: "api", Host: "containerapp", RelativePath: "."}, + }, + }, + } + + assert.NoError(t, postdeployHandler(t.Context(), nil, args)) +} + +func TestPostdeployHandler_SkipsWhenNoServices(t *testing.T) { + t.Parallel() + + args := &azdext.ProjectEventArgs{ + Project: &azdext.ProjectConfig{ + Path: t.TempDir(), + Services: map[string]*azdext.ServiceConfig{}, + }, + } + + assert.NoError(t, postdeployHandler(t.Context(), nil, args)) +} + +// --------------------------------------------------------------------------- +// enrichToolboxFromConnections — server_url already set +// --------------------------------------------------------------------------- + +func TestEnrichToolboxFromConnections_DoesNotOverrideExistingServerURL(t *testing.T) { + t.Parallel() + + connByName := map[string]toolboxConnection{ + "my-conn": {Name: "my-conn", Target: "https://conn-target.com"}, + } + + tb := project.Toolbox{ + Name: "test", + Tools: []map[string]any{ + { + "type": "mcp", + "project_connection_id": "my-conn", + "server_url": "https://custom-url.com", + }, + }, + } + + enrichToolboxFromConnections(&tb, connByName) + + // server_url was already set — should not be overridden. + assert.Equal(t, "https://custom-url.com", tb.Tools[0]["server_url"]) + // server_label should still be filled in. + assert.Equal(t, "my-conn", tb.Tools[0]["server_label"]) +} + +func TestEnrichToolboxFromConnections_EmptyTarget(t *testing.T) { + t.Parallel() + + connByName := map[string]toolboxConnection{ + "no-target": {Name: "no-target", Target: ""}, + } + + tb := project.Toolbox{ + Name: "test", + Tools: []map[string]any{ + {"type": "mcp", "project_connection_id": "no-target"}, + }, + } + + enrichToolboxFromConnections(&tb, connByName) + + // Empty target → server_url should NOT be set. + _, hasURL := tb.Tools[0]["server_url"] + assert.False(t, hasURL) + // server_label should still be set. + assert.Equal(t, "no-target", tb.Tools[0]["server_label"]) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index c9f1d05a308..0496a738ec1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -171,7 +171,18 @@ func (a *OptimizeDeployAction) runDirect( return err } - // Step 5: Print success. + // Step 5: Report the deployment to the optimization service (best-effort). + if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ + CandidateID: a.flags.candidate, + AgentName: agentName, + AgentVersion: versionObj.Version, + }); err != nil { + // Non-fatal — deployment succeeded, just log the reporting failure. + fmt.Fprintf(out, " %s failed to report deployment to optimization service: %s\n", + color.YellowString("warning:"), err) + } + + // Step 6: Print success. fmt.Fprintln(out) color.New(color.FgGreen, color.Bold).Fprintf(out, " \u2713 Successfully deployed candidate %s as version %s\n", a.flags.candidate, versionObj.Version) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index 7129ca9bf77..c46717079d4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -11,10 +11,12 @@ import ( "context" "fmt" "io" + "log" "os" "strings" "azureaiagent/internal/pkg/agents/eval_api" + "azureaiagent/internal/pkg/agents/optimize_api" azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/spf13/cobra" @@ -62,9 +64,6 @@ func (f *optimizeConnectionFlags) resolve(ctx context.Context) (string, error) { return projectEndpoint, nil } -// optimizeAPIVersion is the API version used for optimization service calls. -const optimizeAPIVersion = "v1" - // optimizeLastJobIDKey is the azd environment key for the last optimization job ID. const optimizeLastJobIDKey = "OPTIMIZE_LAST_OPERATION_ID" @@ -131,3 +130,71 @@ func printOptimizePortalLink(ctx context.Context, out io.Writer, agentName, oper return prefix.OptimizationURL(agentName, operationID) }) } + +// reportOptimizationDeployments reports optimization candidate deployments to FAOS. +// For each hosted agent service, if AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID is set in +// the azd environment, it calls the promote API and then clears the env var. +// This is best-effort — failures are logged but do not block the deploy. +func reportOptimizationDeployments( + ctx context.Context, + azdClient *azdext.AzdClient, + hostedAgents []*azdext.ServiceConfig, + envName, projectEndpoint string, + newClient func(endpoint string) *optimize_api.OptimizeClient, +) { + defer func() { + if r := recover(); r != nil { + log.Printf("postdeploy: optimization deployment reporting panicked: %v", r) + } + }() + + log.Printf("postdeploy: reporting optimization deployments for %d hosted agents", len(hostedAgents)) + + for _, svc := range hostedAgents { + serviceKey := toServiceKey(svc.Name) + candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) + + candidateResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: candidateKey, + }) + if err != nil || candidateResp.Value == "" { + log.Printf("postdeploy: no optimization candidate for %s, skipping", svc.Name) + continue + } + + versionKey := fmt.Sprintf("AGENT_%s_VERSION", serviceKey) + versionResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: versionKey, + }) + if err != nil || versionResp.Value == "" { + log.Printf("postdeploy: no version for %s, skipping", svc.Name) + continue + } + + log.Printf("postdeploy: promoting candidate %s for %s (version %s)", + candidateResp.Value, svc.Name, versionResp.Value) + + optClient := newClient(projectEndpoint) + if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ + CandidateID: candidateResp.Value, + AgentName: svc.Name, + AgentVersion: versionResp.Value, + }); err != nil { + log.Printf("postdeploy: failed to report optimization deployment for %s: %v", svc.Name, err) + continue + } + + log.Printf("postdeploy: successfully promoted candidate %s for %s", candidateResp.Value, svc.Name) + + // Clear the candidate ID after successful reporting. + if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, + Key: candidateKey, + Value: "", + }); err != nil { + log.Printf("postdeploy: failed to clear %s: %v", candidateKey, err) + } + } +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go index c7944074181..889f50979d9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go @@ -5,9 +5,21 @@ package cmd import ( "context" + "encoding/json" + "io" + "net" + "net/http" + "net/http/httptest" "testing" + "azureaiagent/internal/pkg/agents/optimize_api" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" ) func TestOptimizeConnectionFlags_Resolve_AllEmpty(t *testing.T) { @@ -56,3 +68,292 @@ func TestOptimizeConnectionFlags_Resolve_ProjectEndpointFlag(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "https://my-project.services.ai.azure.com", endpoint) } + +// newOptimizeTestAzdClient creates a test AzdClient backed by a gRPC server +// with the given environment service implementation. +func newOptimizeTestAzdClient( + t *testing.T, + envServer azdext.EnvironmentServiceServer, +) *azdext.AzdClient { + t.Helper() + + grpcServer := grpc.NewServer() + azdext.RegisterEnvironmentServiceServer(grpcServer, envServer) + + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + + go func() { _ = grpcServer.Serve(listener) }() + + t.Cleanup(func() { + grpcServer.Stop() + _ = listener.Close() + }) + + azdClient, err := azdext.NewAzdClient(azdext.WithAddress(listener.Addr().String())) + require.NoError(t, err) + t.Cleanup(func() { azdClient.Close() }) + + return azdClient +} + +// newTestOptimizeClient creates an OptimizeClient that talks to the given +// httptest server, using a bare pipeline (no auth). +func newTestOptimizeClient(endpoint string) *optimize_api.OptimizeClient { + pl := runtime.NewPipeline("test", "v0.0.0", runtime.PipelineOptions{}, &policy.ClientOptions{}) + return optimize_api.NewOptimizeClientFromPipeline(endpoint, pl) +} + +func TestReportOptimizationDeployments_NoAgents(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": {}, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + // Should complete without calling any API. + reportOptimizationDeployments( + t.Context(), azdClient, nil, "dev", "https://unused.example.com", + newTestOptimizeClient, + ) +} + +func TestReportOptimizationDeployments_Success_ClearsCandidate(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_MY_AGENT_OPTIMIZATION_CANDIDATE_ID": "cand-123", + "AGENT_MY_AGENT_VERSION": "v2", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + var gotURL string + var gotBody optimize_api.DeploymentReport + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotURL = r.URL.String() + body, _ := io.ReadAll(r.Body) + _ = json.Unmarshal(body, &gotBody) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{{Name: "my-agent"}} + + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + assert.Contains(t, gotURL, "/optimize/candidates/cand-123:promote") + assert.Equal(t, "my-agent", gotBody.AgentName) + assert.Equal(t, "v2", gotBody.AgentVersion) + // CandidateID is json:"-", so it should not appear in the body. + assert.Empty(t, gotBody.CandidateID) + + // The candidate key should be cleared after successful reporting. + assert.Equal(t, "", envServer.values["dev"]["AGENT_MY_AGENT_OPTIMIZATION_CANDIDATE_ID"]) +} + +func TestReportOptimizationDeployments_MissingCandidateID_Skips(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + // No AGENT_SVC_OPTIMIZATION_CANDIDATE_ID at all. + "AGENT_SVC_VERSION": "v1", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + apiCalled := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + apiCalled = true + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{{Name: "svc"}} + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + assert.False(t, apiCalled, "API should not be called when candidate ID is missing") +} + +func TestReportOptimizationDeployments_MissingVersion_Skips(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_SVC_OPTIMIZATION_CANDIDATE_ID": "cand-456", + // No AGENT_SVC_VERSION. + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + apiCalled := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + apiCalled = true + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{{Name: "svc"}} + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + assert.False(t, apiCalled, "API should not be called when version is missing") +} + +func TestReportOptimizationDeployments_APIFailure_DoesNotClearCandidate(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_SVC_OPTIMIZATION_CANDIDATE_ID": "cand-789", + "AGENT_SVC_VERSION": "v3", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{{Name: "svc"}} + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + // Candidate key should NOT be cleared when the API returns an error. + assert.Equal(t, "cand-789", envServer.values["dev"]["AGENT_SVC_OPTIMIZATION_CANDIDATE_ID"]) +} + +func TestReportOptimizationDeployments_MultipleAgents(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_ALPHA_OPTIMIZATION_CANDIDATE_ID": "c-a", + "AGENT_ALPHA_VERSION": "v1", + // beta has no candidate — should be skipped. + "AGENT_BETA_VERSION": "v2", + // gamma has candidate but API will fail for it. + "AGENT_GAMMA_OPTIMIZATION_CANDIDATE_ID": "c-g", + "AGENT_GAMMA_VERSION": "v3", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + promoted := map[string]bool{} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/optimize/candidates/c-g:promote" { + w.WriteHeader(http.StatusInternalServerError) + return + } + promoted[r.URL.Path] = true + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{ + {Name: "alpha"}, + {Name: "beta"}, + {Name: "gamma"}, + } + + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + // Alpha: promoted and cleared. + assert.True(t, promoted["/optimize/candidates/c-a:promote"]) + assert.Equal(t, "", envServer.values["dev"]["AGENT_ALPHA_OPTIMIZATION_CANDIDATE_ID"]) + + // Beta: skipped (no candidate ID), no API call. + assert.False(t, promoted["/optimize/candidates/:promote"]) // shouldn't appear + + // Gamma: API failed, so candidate key should remain. + assert.Equal(t, "c-g", envServer.values["dev"]["AGENT_GAMMA_OPTIMIZATION_CANDIDATE_ID"]) +} + +func TestReportOptimizationDeployments_ServiceNameWithDashes(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_MY_COOL_AGENT_OPTIMIZATION_CANDIDATE_ID": "cand-dash", + "AGENT_MY_COOL_AGENT_VERSION": "v5", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + var gotURL string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotURL = r.URL.String() + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + agents := []*azdext.ServiceConfig{{Name: "my-cool-agent"}} + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", srv.URL, + newTestOptimizeClient, + ) + + assert.Contains(t, gotURL, "/optimize/candidates/cand-dash:promote") + assert.Equal(t, "", envServer.values["dev"]["AGENT_MY_COOL_AGENT_OPTIMIZATION_CANDIDATE_ID"]) +} + +func TestReportOptimizationDeployments_PanicRecovery(t *testing.T) { + t.Parallel() + + envServer := &testEnvironmentServiceServer{ + values: map[string]map[string]string{ + "dev": { + "AGENT_SVC_OPTIMIZATION_CANDIDATE_ID": "cand-panic", + "AGENT_SVC_VERSION": "v1", + }, + }, + } + azdClient := newOptimizeTestAzdClient(t, envServer) + + agents := []*azdext.ServiceConfig{{Name: "svc"}} + + // Pass a newClient factory that panics. The recover guard should + // prevent this from crashing the caller. + assert.NotPanics(t, func() { + reportOptimizationDeployments( + t.Context(), azdClient, agents, "dev", "https://unused", + func(_ string) *optimize_api.OptimizeClient { + panic("boom") + }, + ) + }) + + // Candidate key should remain since the promote never succeeded. + assert.Equal(t, "cand-panic", envServer.values["dev"]["AGENT_SVC_OPTIMIZATION_CANDIDATE_ID"]) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go index fc77e2e7ed3..435e73a4500 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -73,7 +73,7 @@ func (c *OptimizeClient) StartOptimize( ctx context.Context, optimizeReq *OptimizeRequest, ) (*OptimizeResponse, error) { - url := fmt.Sprintf("%s/optimize?api-version=v1", c.endpoint) + url := fmt.Sprintf("%s/optimize?api-version=%s", c.endpoint, APIVersion) payload, err := json.Marshal(optimizeReq) if err != nil { @@ -117,7 +117,7 @@ func (c *OptimizeClient) GetOptimizeStatus( ctx context.Context, operationID string, ) (*OptimizeJobStatus, error) { - url := fmt.Sprintf("%s/optimize/%s?api-version=v1", c.endpoint, operationID) + url := fmt.Sprintf("%s/optimize/%s?api-version=%s", c.endpoint, operationID, APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -153,7 +153,7 @@ func (c *OptimizeClient) ListOptimizeJobs( limit int, status string, ) (*OptimizeListResponse, error) { - url := fmt.Sprintf("%s/optimize?api-version=v1&limit=%d", c.endpoint, limit) + url := fmt.Sprintf("%s/optimize?api-version=%s&limit=%d", c.endpoint, APIVersion, limit) if status != "" { url += "&status=" + status } @@ -191,7 +191,7 @@ func (c *OptimizeClient) CancelOptimize( ctx context.Context, operationID string, ) (*OptimizeCancelResponse, error) { - url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=v1", c.endpoint, operationID) + url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=%s", c.endpoint, operationID, APIVersion) req, err := runtime.NewRequest(ctx, http.MethodPost, url) if err != nil { @@ -228,8 +228,8 @@ func (c *OptimizeClient) ReportDeployment( report *DeploymentReport, ) error { url := fmt.Sprintf( - "%s/optimize/candidates/%s:promote?api-version=v1", - c.endpoint, report.CandidateID, + "%s/optimize/candidates/%s:promote?api-version=%s", + c.endpoint, report.CandidateID, APIVersion, ) payload, err := json.Marshal(report) @@ -267,7 +267,7 @@ func (c *OptimizeClient) GetCandidateConfig( ctx context.Context, candidateID string, ) (any, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=v1", c.endpoint, candidateID) + url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=%s", c.endpoint, candidateID, APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -302,7 +302,7 @@ func (c *OptimizeClient) GetCandidate( ctx context.Context, candidateID string, ) (*CandidateManifest, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=v1", c.endpoint, candidateID) + url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=%s", c.endpoint, candidateID, APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -338,8 +338,8 @@ func (c *OptimizeClient) GetCandidateFile( candidateID string, filePath string, ) (string, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s/files?api-version=v1&path=%s", - c.endpoint, candidateID, netURL.QueryEscape(filePath)) + url := fmt.Sprintf("%s/optimize/candidates/%s/files?api-version=%s&path=%s", + c.endpoint, candidateID, APIVersion, netURL.QueryEscape(filePath)) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go index 2396468a4bf..52e72e59c93 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go @@ -224,28 +224,31 @@ func TestListOptimizeJobs_NoStatusFilter(t *testing.T) { func TestReportDeployment(t *testing.T) { t.Parallel() - var capturedBody DeploymentReport + var capturedBody map[string]string server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, http.MethodPost, r.Method) - assert.Contains(t, r.URL.Path, "/optimize/candidates/cand-42/deployments") + assert.Contains(t, r.URL.Path, "/optimize/candidates/cand-42:promote") assert.Contains(t, r.URL.RawQuery, "api-version=v1") err := json.NewDecoder(r.Body).Decode(&capturedBody) assert.NoError(t, err) - w.WriteHeader(http.StatusCreated) + w.WriteHeader(http.StatusOK) })) defer server.Close() client := newTestClient(server.URL) err := client.ReportDeployment(t.Context(), &DeploymentReport{ - CandidateID: "cand-42", - ProjectEndpoint: "https://proj.endpoint", + CandidateID: "cand-42", + AgentName: "my-agent", + AgentVersion: "3", }) require.NoError(t, err) - assert.Equal(t, "cand-42", capturedBody.CandidateID) - assert.Equal(t, "https://proj.endpoint", capturedBody.ProjectEndpoint) + assert.Equal(t, "my-agent", capturedBody["agentName"]) + assert.Equal(t, "3", capturedBody["agentVersion"]) + // CandidateID should not appear in the body (json:"-") + assert.Empty(t, capturedBody["candidateId"]) } func TestReportDeployment_HTTPError(t *testing.T) { @@ -259,7 +262,9 @@ func TestReportDeployment_HTTPError(t *testing.T) { client := newTestClient(server.URL) err := client.ReportDeployment(t.Context(), &DeploymentReport{ - CandidateID: "bad-id", + CandidateID: "bad-id", + AgentName: "agent", + AgentVersion: "1", }) require.Error(t, err) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index e4337ace452..0cd30c78d0b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -8,6 +8,9 @@ package optimize_api import "encoding/json" +// APIVersion is the API version used for all optimization service calls. +const APIVersion = "v1" + // Optimization job status constants. const ( StatusPending = "pending" @@ -223,11 +226,12 @@ type OptimizeCancelResponse struct { // --- Deployment report --- -// DeploymentReport is sent to FAOS after a candidate is deployed, +// DeploymentReport is sent to FAOS after a candidate is promoted, // creating the candidate→deployment mapping. type DeploymentReport struct { - CandidateID string `json:"candidateId"` - ProjectEndpoint string `json:"projectEndpoint,omitempty"` + CandidateID string `json:"-"` // used in URL path, not serialized + AgentName string `json:"agentName"` // deployed agent name + AgentVersion string `json:"agentVersion"` // deployed agent version } // --- Candidate models --- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index 732a0045459..546ed43748d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -236,3 +236,38 @@ func TestOptimizeListResponse_RoundTrip(t *testing.T) { assert.Equal(t, "op-2", got.LastID) assert.True(t, got.HasMore) } + +// ---- DeploymentReport serialization ---- + +func TestDeploymentReport_JSON_ExcludesCandidateID(t *testing.T) { + t.Parallel() + + report := DeploymentReport{ + CandidateID: "cand_abc123", + AgentName: "my-agent", + AgentVersion: "3", + } + + data, err := json.Marshal(report) + require.NoError(t, err) + + // CandidateID has json:"-", so it must not appear in the body. + assert.NotContains(t, string(data), "candidateId") + assert.NotContains(t, string(data), "cand_abc123") + + // agentName and agentVersion must be present. + assert.Contains(t, string(data), `"agentName":"my-agent"`) + assert.Contains(t, string(data), `"agentVersion":"3"`) +} + +func TestDeploymentReport_JSON_RoundTrip(t *testing.T) { + t.Parallel() + + body := `{"agentName":"test-agent","agentVersion":"5"}` + var report DeploymentReport + require.NoError(t, json.Unmarshal([]byte(body), &report)) + + assert.Equal(t, "test-agent", report.AgentName) + assert.Equal(t, "5", report.AgentVersion) + assert.Empty(t, report.CandidateID, "CandidateID should not be populated from JSON") +} From 38ff8c23f37fbdd2d0793e669e1547ec97e63fee Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 11:47:55 -0700 Subject: [PATCH 24/33] review version --- .../azd_observability_bugbash.md | 882 ------------------ .../extensions/azure.ai.agents/extension.yaml | 2 +- .../azure.ai.agents/internal/cmd/eval_init.go | 4 +- .../internal/cmd/eval_init_prompts.go | 6 +- .../internal/cmd/eval_init_test.go | 25 - .../azure.ai.agents/internal/cmd/eval_test.go | 9 - .../internal/pkg/agents/agent_yaml/yaml.go | 2 - .../pkg/agents/eval_api/generation.go | 9 +- .../pkg/agents/eval_api/generation_test.go | 11 +- .../internal/pkg/agents/opteval/yaml_test.go | 4 +- .../extensions/azure.ai.agents/version.txt | 2 +- cli/azd/extensions/registry.json | 304 ------ 12 files changed, 12 insertions(+), 1248 deletions(-) delete mode 100644 cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md diff --git a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md b/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md deleted file mode 100644 index 09ec71bce83..00000000000 --- a/cli/azd/extensions/azure.ai.agents/azd_observability_bugbash.md +++ /dev/null @@ -1,882 +0,0 @@ -# azd ai agent eval + optimize — BugBash - -> **TiP regions** Currently, some dependent APIs are only available in this region for now - -## 1. Install the extension - -Prerequisites: [azd CLI](https://aka.ms/azd), [Go](https://go.dev/dl/), `az login` - -```bash -# Installing private registry for bugbash -azd ext install microsoft.azd.extensions -azd ext source add --name zyysurely --type url --location https://raw.githubusercontent.com/Zyysurely/azure-dev/zyying/opt_eval/cli/azd/extensions/registry.json -azd ext install azure.ai.agents --source zyysurely --version 0.1.33-optbugbash-preview --force - -# If then you want to switch back to the official version, use -azd ext install azure.ai.agents --force -``` - -Verify:`azd ai agent eval --help` and `azd ai agent optimize --help` - -## 2. Ensure the access to the bugbash project - -https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/users -Please activate `Foundry User` and `Owner` access - - -## 3. Create an optimization-ready agent - -Navigate to a fresh directory outside the extension repo, init the agent and point to our bugbash project, if you already have an azd project with TiP foundry account, you can continue to use it. - -```bash -git clone https://github.com/ai-platform-microsoft/foundry-observability-playground.git -cd .\foundry-observability-playground\demos\build2026\agents\travel-approver\ -azd ai agent init --project-id /subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-azdbugbash/providers/Microsoft.CognitiveServices/accounts/azd-bugbash-0514/projects/bugbash-westus2 -# !!! Customize your agent name and model deployment -``` - -The template includes `agent_optimization/` — a small package that reads config -injected by the optimization service at runtime. Your agent calls `load_config()` at startup: - -```python -from agent_optimization import load_config - -config = load_config( - default_instructions="You are a helpful assistant.", - default_model="gpt-4.1-mini", -) -``` - -## 4. Test locally [You can skip since the current sample agent code has been verified] - -```bash -azd ai agent run -# In another terminal: -azd ai agent invoke --local "Hello!" -``` - -## 5. Deploy hosted agent - -Point to an existing Foundry project and deploy (no `azd provision` needed): - -```bash -# Windows (PowerShell) -azd deploy -``` - - -Verify: `azd ai agent invoke "Hello!"` - -> **If you have Owner permissions** and want fresh resources: run `azd provision` before `azd deploy`. - - -## 6. E2E Hero Scenarios - -There are two paths depending on whether you use the **bugbash project** or **your own project**. - ---- - -### Path A: Using the bugbash project (eval + optimize) - -> Use this path if you cloned the template in step 3 and deployed to the bugbash Foundry project. -> You have access to the eval APIs and can run the full eval → optimize flow. - -All commands below auto-detect the agent context from the current azd environment. -Run them from your deployed azd project directory. - -#### 6a-A. Initialize an eval suite - -> Generating the eval suite which can optimize your agent adaptively, which could used for optimization as well - -```bash -# including both data generation and evaluator generation -azd ai agent eval init - -# (Recommended) Using our provided golden dataset, but also adaptive evaluator -azd ai agent eval init --dataset eval/travel_approval_golden.jsonl -``` - -The command resolves your agent from `azure.yaml` and prompts interactively: - -``` -Resolving eval context... - Reading project configuration... - Detecting agent service... - Resolving Foundry project endpoint... - -Detected eval target: - (✓) Service: travel-approver-bb (azure.yaml) - (✓) Agent: travel-approver-azd-bb (AGENT_TRAVEL_APPROVER_BB_NAME) - (✓) Version: 2 (AGENT_TRAVEL_APPROVER_BB_VERSION) - (✓) Kind: hosted (agent.yaml) - (✓) Endpoint: https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 (AZURE_AI_PROJECT_ENDPOINT) - (✓) Project: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver (azure.yaml service "travel-approver-bb" project path) - Eval config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\eval.yaml - - Agent Config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\.agent_configs\baseline\metadata.yaml -? Eval suite name: travel-approver-azd-bb -? Instruction file: .agent_configs\baseline\instructions.md -? Include agent traces for evaluator generation?: No -? Select the model for evaluation and generation: gpt-4o (deployed) -? Max samples (between 15 and 1000): 15 - (–) Running Evaluator generation (evaluatorgen-travel-approver-azd-bb-v1-3392d06e) - (–) Running Dataset generation (datagen-c00db6c5b7ee4585aa9f25f7089a05a6) - (✓) Done Evaluator generation (34 seconds) - (✓) Done Dataset generation (2m 19s) - -Eval suite created - Config: D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\eval.yaml - Dataset: travel-approver-azd-bb (2.0) - D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\datasets\travel-approver-azd-bb - Evaluator: travel-approver-azd-bb (1) - D:\optimization\bugbash\foundry-observability-playground\demos\build2026\agents\travel-approver\evaluators\travel-approver-azd-bb\rubric_dimensions.json - - Evaluator dimensions (6): - Weight Dimension - ────── ───────── - 10 policy_compliance - 6 budget_accuracy - 5 alternative_suggestions_specificity - 4 decision_explanation_clarity - 3 user_constraint_adherence - 5 general_quality - - Portal: - Dataset: https://ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/data/datasets/travel-approver-azd-bb/2.0 - Evaluator: https://ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/evaluations/catalog/travel-approver-azd-bb/1 - - Next steps: - azd ai agent eval run - Run the eval suite against your agent. - azd ai agent eval update - Edit the generated dataset or evaluator locally, then upload changes. -``` - -#### 6b-A. Run an eval (Optional, if you want to try evaluation run) - -```bash -azd ai agent eval run -``` - -Reads `eval.yaml`, creates the eval on the Foundry backend, and submits a run against your deployed agent. - -#### 6c-A. Browse eval results (Optional) - -```bash -# List all evals (table with status, run count, created date) -azd ai agent eval list - -# Show details for the most recent eval (auto-resolved from azd env) -azd ai agent eval show - -# Export results to JSON for offline analysis -azd ai agent eval show -O results.json -``` - -#### 6d-A. Optimize the agent - -After the eval suite is ready, run optimize. It auto-detects the `eval.yaml` you just created. - -```bash -azd ai agent optimize -``` - -Expected output (takes ~5–20 minutes): - -``` -# azd ai agent optimize -? Select an agent service: travel-zyying-new -? Found eval.yaml in project. Use it for optimization?: Yes -? Instruction file: .agent_configs\baseline\instructions.md -? Skills directory (enter to skip): skills -? Would you like to specify target models for optimization?: Yes -? Select target models for optimization (current: gpt-4o): gpt-4o (current), gpt-4.1 -Optimizing agent "travel-zyying-new"... - Config: D:\optimization\public\viveks-scratch\optimization-demo-v2\src\travel-approver-demo\eval.yaml - Baseline saved to .agent_configs\baseline\metadata.yaml - Job ID: opt_b1cca48e468b4a508d21bfa19cdd16de - Status: pending - Portal: https://eastus2euap.ai.azure.com/nextgen/r/LThb9AdWSnaqlSi_ntO2JQ,rg-azdbugbash,,azd-bugbash-0514,bugbash-westus2/build/agents/travel-zyying-new/optimization/opt_b1cca48e468b4a508d21bfa19cdd16de?flight=enable_faos_read_ui - - ⠼ completed · strategy: gepa · iteration 1 · score: 0.77 · 7m50s - -Results: - Candidate Score Pass Tokens - ──────────────────── ─────── ─────── ──────── - baseline ★ 0.77 100% 0 - candidate_1 0.74 100% 0 - - Candidate IDs: - ★ baseline cand_c6532ad867594dd4b6878a45604a4994 - candidate_1 cand_d9bedab23c5641d4a2d83c98aa635c2f - - Apply the best candidate locally, then deploy: - azd ai agent optimize apply --candidate cand_c6532ad867594dd4b6878a45604a4994 - azd deploy -``` - -The ★ marks the best candidate. Copy the deploy command from the output to promote it. - -#### Customizing optimization options in `eval.yaml` - -You can fine-tune optimization behavior by adding or modifying the `options:` section in your `eval.yaml`. Below are all available fields, their types, and defaults: - -```yaml -options: - eval_model: "gpt-4o" # (string) Model used for evaluation. Default: "gpt-4o" - target_attributes: # If not specify, we should auto detect it - - instruction - - skill - - model - target_config: - model: - - gpt-4.1 - - gpt-4.1-mini - - gpt-4o - budget: 0 # Deprecating # (int) Max optimization budget (number of candidates). Default: 5 - max_iterations: 4 # (int) Max iterations per strategy. Default: 4 (when strategies are default) - min_improvement: 0.0 # (float) Minimum score improvement to accept a candidate. - keep_versions: false # (bool) Keep all intermediate agent versions. Default: false - reflection_model: "" # (string) Model for reflection steps. Default: "" (uses eval_model) -``` - -#### 6e-A. Monitor optimization jobs - -```bash -# Watch a running job in real-time -azd ai agent optimize status --watch - -# List all optimization runs -azd ai agent optimize list - -# Cancel a running job -azd ai agent optimize cancel -``` - -#### 6f-A. Deploy the winning candidate - -> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. This step is blocked until the API issue is resolved. -But you can check agent optimization job in foundry UI with `?flight=enable_faos_read_ui` - -The optimize output includes a ready-to-use deploy command: - -```bash -azd ai agent optimize deploy --candidate -``` - -This creates a new agent version with `OPTIMIZATION_CONFIG` set to the candidate's -config (instructions, model, temperature). The agent SDK's `load_config()` reads this -at startup and applies the optimized settings. - -#### 6g-A. Verify the optimized agent - -> **⚠️ Blocked:** This step depends on 6f, which is currently blocked by the FAOS CANDIDATE API issue. - -```bash -azd ai agent invoke "Hello!" -# Expected: agent responds using the optimized configuration -``` - ---- - -### Path B: Using your own project (optimize only, built-in dataset) - -> Use this path if you have your own azd project with a deployed hosted agent on a westus2/ncus Foundry account. -> The eval APIs (`eval init`, `eval run`) require specific backend support that may not be available on your project. -> Instead, go directly to `optimize` which uses a **built-in dataset** (3 tasks, 12 criteria) — no eval setup needed. - -#### 6a-B. Prerequisites - -- You have an azd project with a hosted agent already deployed (`azd deploy` completed). -- Your agent uses the `agent_optimization` SDK package with `load_config()`. -- You are logged in (`az login`) and have access to the Foundry project. - -#### 6b-B. Optimize the agent (built-in dataset) - -From your azd project directory: - -```bash -azd ai agent optimize -# → If eval.yaml exists, select "No" to use the built-in dataset -# → If no eval.yaml, it automatically uses the built-in dataset -``` - -Or explicitly skip the eval.yaml prompt: - -```bash -azd ai agent optimize --no-prompt -# Always uses built-in defaults (3 tasks, 12 criteria) -``` - -Expected output (takes ~5–20 minutes): - -``` -Optimizing agent "your-agent"... - Dataset: built-in (3 tasks, 12 criteria) - Job ID: opt_abc123... - ⠦ completed · strategy: gepa · iteration 1 · score: 0.85 · 5m0s - -Results: - Candidate Score Pass Tokens - ──────────────────── ─────── ─────── ──────── - baseline 0.60 100% 300 - baseline_instr_v1 ★ 0.85 100% 980 - - Deploy the best candidate: - azd ai agent optimize deploy --candidate cand_... -``` - -#### 6c-B. Monitor optimization jobs - -```bash -# Watch the running job -azd ai agent optimize status --watch - -# List all jobs -azd ai agent optimize list - -# Cancel if needed -azd ai agent optimize cancel -``` - -#### 6d-B. Deploy the winning candidate - -> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize deploy` and `optimize apply` cannot fetch candidate config at this time. -> You can check agent optimization job results in Foundry UI with `?flight=enable_faos_read_ui`. - -```bash -azd ai agent optimize deploy --candidate -``` - -#### 6e-B. Verify the optimized agent - -> **⚠️ Blocked:** This step depends on 6d-B, which is currently blocked by the FAOS CANDIDATE API issue. - -```bash -azd ai agent invoke "Hello!" -# Expected: agent responds using the optimized configuration -``` - ---- - -## Comprehensive Test Scenarios - -### A. `azd ai agent eval init` - -#### Inside azd project (cd into your deployed azd project) - -```bash -# A1. Default interactive init — auto-detects agent from azd env -azd ai agent eval init --dataset ./data.jsonl -# Expected: prompts for name, instruction, model, max-samples -# writes eval.yaml + artifacts under .azure/.foundry/ - -# A2. Custom eval suite name -azd ai agent eval init --dataset ./data.jsonl --name my-custom-suite -# Expected: config name = "my-custom-suite-" (random suffix appended) - -# A3. Inline gen-instruction (skip prompt) -azd ai agent eval init --dataset ./data.jsonl -g "Test the agent's ability to handle refund requests" -# Expected: uses inline instruction, skips instruction prompt - -# A4. Gen-instruction from file -echo "Test customer support scenarios" > /tmp/instruction.txt -azd ai agent eval init --dataset ./data.jsonl -G /tmp/instruction.txt -# Expected: reads instruction from file - -# A5. Custom eval model -azd ai agent eval init --dataset ./data.jsonl --eval-model gpt-4o -# Expected: uses gpt-4o instead of deployed model default - -# A6. Custom evaluators -azd ai agent eval init --dataset ./data.jsonl --evaluator builtin.task_adherence --evaluator custom_eval -# Expected: eval.yaml has both evaluators listed - -# A7. Custom output path -azd ai agent eval init --dataset ./data.jsonl -O my-eval.yaml -# Expected: writes to my-eval.yaml instead of eval.yaml - -# A8. --no-wait mode -azd ai agent eval init --dataset ./data.jsonl --no-wait -# Expected: submits jobs, prints pending op IDs, returns immediately -# eval.yaml has InitStatus: pending - -# A9. Regeneration — eval.yaml already exists -# (run init once first, then run again) -azd ai agent eval init --dataset ./data.jsonl -# Expected: prompts "Existing dataset: ... Do you want to regenerate?" -# and "Existing evaluator: ... Do you want to regenerate?" - -# A10. Reset defaults — overwrite existing config -azd ai agent eval init --dataset ./data.jsonl --reset-defaults -# Expected: overwrites eval.yaml without prompting about existing config - -# A11. Non-interactive mode (no prompts) -azd ai agent eval init --dataset ./data.jsonl --no-prompt -# Expected: uses defaults without prompting. Full regeneration if eval.yaml exists. -# Clean up: Remove-Item env:\AZD_FORCE_TTY - -# A12. Multiple agent services in azure.yaml -# (if your project has 2+ azure.ai.agent services) -azd ai agent eval init --dataset ./data.jsonl -# Expected: prompts to select which agent service -``` - -#### Outside azd project (cd to an empty directory) - -```bash -mkdir /tmp/eval-test && cd /tmp/eval-test - -# A13. No agent flag, no project — should fail -azd ai agent eval init --dataset ./data.jsonl -# Expected: ERROR — "failed to get project config (is there an azure.yaml?)" -# or guidance to use --agent / run from azd project - -# A14. Explicit agent + endpoint — works standalone -azd ai agent eval init --dataset ./data.jsonl \ - --agent sample-agent \ - -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 -# Expected: works without azure.yaml; writes eval.yaml in current dir - -# A15. Missing endpoint — should fail with guidance -azd ai agent eval init --dataset ./data.jsonl --agent sample-agent -# Expected: ERROR — "Foundry project context could not be resolved" -# suggests --project-endpoint or azd ai agent init - -# A16. Endpoint via env var -$env:AZURE_AI_PROJECT_ENDPOINT = "https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2" -azd ai agent eval init --dataset ./data.jsonl --agent sample-agent -# Expected: picks up endpoint from env var, works -# Clean up: Remove-Item env:\AZURE_AI_PROJECT_ENDPOINT -``` - ---- - -### B. `azd ai agent eval run` - -#### Inside azd project - -```bash -# B1. Default run (eval.yaml exists from init) -azd ai agent eval run -# Expected: reads eval.yaml from project dir, creates eval, submits run - -# B2. Custom config path -azd ai agent eval run --config my-eval.yaml -# Expected: uses my-eval.yaml instead of eval.yaml - -# B3. Resume pending init -# (if you used --no-wait during init, eval.yaml has pending status) -azd ai agent eval run -# Expected: detects InitStatus: pending, resumes polling, then runs eval -``` - -#### Outside azd project - -```bash -cd /tmp/eval-test # directory with eval.yaml from A14 - -# B4. eval.yaml in cwd, no azd project -azd ai agent eval run -# Expected: falls back to prompt-based endpoint resolution, runs eval - -# B5. No eval.yaml at all -mkdir /tmp/empty-test && cd /tmp/empty-test -azd ai agent eval run -# Expected: ERROR — cannot read eval.yaml -``` - ---- - -### C. `azd ai agent eval list` - -```bash -# C1. Default list (inside or outside project, needs endpoint) -azd ai agent eval list -# Expected: table with columns: Eval ID, Name, Status, Runs, Created by, Created on -# max 10 results, active eval marked with * - -# C2. Custom limit -azd ai agent eval list --limit 3 -# Expected: at most 3 rows - -# C3. No evals exist -# (on a fresh project with no evals) -azd ai agent eval list -# Expected: "no evaluations found" or empty table -``` - ---- - -### D. `azd ai agent eval show` - -```bash -# D1. Show by eval ID -azd ai agent eval show -# Expected: eval definition + recent run history - -# D2. Auto-resolve eval ID (from azd env) -azd ai agent eval show -# Expected: uses last eval ID from environment - -# D3. No eval ID available -# (fresh environment, no prior eval) -azd ai agent eval show -# Expected: ERROR — eval ID required - -# D4. Show specific run details -azd ai agent eval show --eval-run-id -# Expected: per-criteria breakdown, passed/failed/errored counts - -# D5. Export eval + runs to JSON -azd ai agent eval show -O results.json -# Expected: writes {"eval": ..., "runs": [...]} to results.json - -# D6. Export single run to JSON -azd ai agent eval show --eval-run-id -O run.json -# Expected: writes single run result to run.json - -# D7. Custom run limit -azd ai agent eval show --limit 5 -# Expected: at most 5 runs in history -``` - ---- - -### E. `azd ai agent optimize` (main command) - -#### Inside azd project - -```bash -# E1. Default optimize — auto-detect agent -azd ai agent optimize -# Expected: if no eval.yaml → uses built-in dataset (3 tasks, 12 criteria) -# if eval.yaml exists → prompts "Found eval.yaml in project. Use it?" - -# E2. Accept eval.yaml prompt -# (run eval init first, then run optimize, confirm yes) -azd ai agent optimize -# Expected: loads config from eval.yaml. Output: "Config: /eval.yaml" - -# E3. Decline eval.yaml prompt -# (eval.yaml exists, decline the prompt) -azd ai agent optimize -# Expected: falls back to built-in defaults. Output: "Dataset: built-in (3 tasks, 12 criteria)" - -# E4. eval.yaml + --no-prompt -$env:AZD_FORCE_TTY = "false" -azd ai agent optimize -# Expected: skips eval.yaml prompt, uses built-in defaults -# Clean up: Remove-Item env:\AZD_FORCE_TTY - -# E5. Explicit --config overrides eval.yaml detection -azd ai agent optimize --config spec.yaml -# Expected: uses spec.yaml, ignores eval.yaml entirely - -# E6. Positional agent arg -azd ai agent optimize my-agent -# Expected: uses "my-agent" as agent name - -# E7. --agent flag -azd ai agent optimize --agent my-agent -# Expected: uses flag value - -# E8. Custom eval model -azd ai agent optimize --eval-model gpt-4o -# Expected: overrides options.eval_model in config - -# E9. Custom strategy (single) -azd ai agent optimize -s skill -# Expected: uses only skill strategy - -# E10. Custom strategy (multiple) -azd ai agent optimize -s instruction -s skill -# Expected: uses both strategies - -# E11. --no-wait -azd ai agent optimize --no-wait -# Expected: submits job, prints ID, returns immediately - -# E12. Watch polling progress -azd ai agent optimize -# Expected: spinner shows status, strategy, iteration, score, elapsed time -# final results table with ★ best candidate and deploy command -``` - -#### Outside azd project - -```bash -mkdir /tmp/opt-test && cd /tmp/opt-test - -# E13. No agent flag, no project — should fail -azd ai agent optimize -# Expected: ERROR — "agent name is required: use --agent , or run from an azd project after 'azd deploy'" - -# E14. Explicit agent + endpoint -azd ai agent optimize --agent sample-agent \ - -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 -# Expected: works without project. Uses built-in defaults. - -# E15. Explicit agent via env var -$env:AZURE_AI_PROJECT_ENDPOINT = "https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2" -azd ai agent optimize --agent sample-agent -# Expected: resolves endpoint from env var -# Clean up: Remove-Item env:\AZURE_AI_PROJECT_ENDPOINT - -# E16. With config file, no project -azd ai agent optimize --config spec.yaml -# Expected: loads config from file, no project resolution needed -``` - -#### Config validation (can run anywhere with a config file) - -```bash -# E17. Missing agent name in config -# (create spec.yaml with empty agent.name) -azd ai agent optimize --config spec.yaml -# Expected: ERROR — "agent.name is required" - -# E18. Missing eval model -# (config without options.eval_model) -azd ai agent optimize --config spec.yaml -# Expected: ERROR — "options.eval_model is required" - -# E19. No dataset at all -# (config without dataset_file, dataset_reference, or inline) -azd ai agent optimize --config spec.yaml -# Expected: ERROR — "one of dataset_file or dataset_reference is required" - -# E20. Conflicting dataset -# (config with both dataset_file and dataset_reference) -azd ai agent optimize --config spec.yaml -# Expected: ERROR — "dataset_file and dataset_reference are mutually exclusive" - -# E21. Invalid config file path -azd ai agent optimize --config nonexistent.yaml -# Expected: ERROR — file not found + guidance to check path -``` - ---- - -### F. `azd ai agent optimize status` - -```bash -# F1. Status by operation ID -azd ai agent optimize status -# Expected: job summary — ID, Status, Agent, Strategy, Score, Created - -# F2. Auto-resolve from env (after running optimize in project) -azd ai agent optimize status -# Expected: uses OPTIMIZE_LAST_OPERATION_ID from azd env - -# F3. No ID available -# (fresh env, never ran optimize) -azd ai agent optimize status -# Expected: ERROR — operation ID required - -# F4. --watch mode -azd ai agent optimize status --watch -# Expected: polls until job completes, shows spinner + progress - -# F5. Custom poll interval -azd ai agent optimize status --watch --poll-interval 10 -# Expected: polls every 10 seconds instead of 5 - -# F6. Completed job shows candidates -azd ai agent optimize status -# Expected: results table with candidates, scores, deploy command -``` - ---- - -### G. `azd ai agent optimize list` - -```bash -# G1. Default list -azd ai agent optimize list -# Expected: table — ID, Status, Agent, Best Score, Created. Max 20 rows. - -# G2. Filter by status -azd ai agent optimize list --status completed -# Expected: only completed jobs shown - -# G3. Invalid status filter -azd ai agent optimize list --status invalid -# Expected: ERROR — invalid status value - -# G4. Custom limit -azd ai agent optimize list --limit 3 -# Expected: at most 3 entries - -# G5. No jobs exist -# (fresh project endpoint) -azd ai agent optimize list -# Expected: "no optimization jobs found" message -``` - ---- - -### H. `azd ai agent optimize cancel` - -```bash -# H1. Cancel a running job -# (start optimize --no-wait first, then cancel) -azd ai agent optimize --no-wait -azd ai agent optimize cancel -# Expected: job cancelled, shows guidance - -# H2. Cancel already-completed job -azd ai agent optimize cancel -# Expected: ERROR or message — job already in terminal state - -# H3. Missing ID argument -azd ai agent optimize cancel -# Expected: ERROR — requires exactly 1 argument -``` - ---- - -### I. `azd ai agent optimize apply` (inside azd project only) - -> **⚠️ Known Issue:** Due to a FAOS CANDIDATE API issue, `optimize apply` and `optimize deploy` cannot apply the optimized result at this time. These commands will fail when trying to fetch candidate config. - -```bash -# I1. Apply candidate config to agent.yaml -azd ai agent optimize apply --candidate -# Expected: fetches candidate config, writes OPTIMIZATION_CONFIG and -# OPTIMIZATION_CANDIDATE_ID into agent.yaml env vars. -# Downloads skill files. Prints "azd deploy --service ". -# Verify: cat agent.yaml — should see new env vars appended - -# I2. Auto-detect agent service -azd ai agent optimize apply --candidate -# Expected: resolves agent service from azure.yaml automatically - -# I3. Explicit agent service name -azd ai agent optimize apply --candidate --agent sample-agent -# Expected: uses specified service - -# I4. Missing --candidate flag -azd ai agent optimize apply -# Expected: ERROR — --candidate is required - -# I5. Outside azd project — should fail -cd /tmp/empty-test -azd ai agent optimize apply --candidate -# Expected: ERROR — requires azd project, suggests "optimize deploy" instead -``` - ---- - -### J. `azd ai agent optimize deploy` (API-based, works anywhere) - -```bash -# J1. Deploy candidate via API -azd ai agent optimize deploy --candidate --agent sample-agent -# Expected: creates new agent version with OPTIMIZATION_CONFIG, shows new version number - -# J2. Auto-detect agent inside project -cd -azd ai agent optimize deploy --candidate -# Expected: resolves agent name from project + environment - -# J3. Outside project with explicit agent + endpoint -cd /tmp/empty-test -azd ai agent optimize deploy --candidate --agent sample-agent \ - -p https://azd-bugbash-0514.services.ai.azure.com/api/projects/bugbash-westus2 -# Expected: works without project context - -# J4. Missing --candidate -azd ai agent optimize deploy -# Expected: ERROR — --candidate required - -# J5. Verify deployed version -azd ai agent invoke "Hello!" -# Expected: agent responds using optimized config -``` - ---- - -### K. End-to-end flows - -```bash -# K1. Full eval → optimize → apply → deploy roundtrip -azd ai agent eval init --dataset ./data.jsonl -azd ai agent eval run -azd ai agent eval list -azd ai agent eval show -azd ai agent optimize # accept eval.yaml prompt -azd ai agent optimize apply --candidate -azd deploy --service sample-agent -azd ai agent invoke "Hello!" - -# K2. Optimize-only flow (no eval init) -azd ai agent optimize -azd ai agent optimize status # auto-resolves last job -azd ai agent optimize deploy --candidate -azd ai agent invoke "Hello!" - -# K3. Standalone flow (outside project) -mkdir /tmp/standalone && cd /tmp/standalone -azd ai agent optimize --agent sample-agent --eval-model gpt-4o --project-id -azd ai agent optimize list -azd ai agent optimize status -``` - ---- - -### L. Error & edge cases - -```bash -# L1. Not logged in -azd auth logout -azd ai agent optimize --agent sample-agent -# Expected: authentication error - -# L2. Invalid endpoint -azd ai agent optimize --agent sample-agent -p https://invalid.endpoint.com -# Expected: error with reachability guidance - -# L3. --help for all commands -azd ai agent eval --help -azd ai agent eval init --help -azd ai agent eval run --help -azd ai agent eval list --help -azd ai agent eval show --help -azd ai agent optimize --help -azd ai agent optimize status --help -azd ai agent optimize list --help -azd ai agent optimize cancel --help -azd ai agent optimize apply --help -azd ai agent optimize deploy --help -# Expected: accurate, complete help text for each - -# L4. Eval model not deployed -azd ai agent optimize --eval-model nonexistent-model -# Expected: job runs but all scores may be zero (known issue — no error message) - -# L5. Artifacts directory structure -# (after eval init completes inside project) -ls .azure/.foundry/ -# Expected: datasets/, evaluators/, results/ subdirectories with generated files -``` - ---- - -## Cleanup: Revert to the official extension binary - -After the bugbash, reinstall the released extension to remove the custom binary: - -```powershell -# Windows (PowerShell) -azd ext uninstall azure.ai.agents -azd ext install azure.ai.agents -``` - -```bash -# macOS / Linux -azd ext uninstall azure.ai.agents -azd ext install azure.ai.agents -``` - -This re-downloads the official published binary and removes the custom build overlay. \ No newline at end of file diff --git a/cli/azd/extensions/azure.ai.agents/extension.yaml b/cli/azd/extensions/azure.ai.agents/extension.yaml index 3c618813371..857f27744b8 100644 --- a/cli/azd/extensions/azure.ai.agents/extension.yaml +++ b/cli/azd/extensions/azure.ai.agents/extension.yaml @@ -5,7 +5,7 @@ displayName: Foundry agents (Preview) description: Ship agents with Microsoft Foundry from your terminal. (Preview) usage: azd ai agent [options] # NOTE: Make sure version.txt is in sync with this version. -version: 0.1.34-optbugbash-preview +version: 0.1.31-preview requiredAzdVersion: ">1.23.13" language: go capabilities: diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index 36db9e10baa..c4468dfa069 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -15,7 +15,6 @@ import ( "os" "path/filepath" - "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/opteval" @@ -195,8 +194,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error flags.name = resolveEvalName(flags) } - if resolved.agentKind != agent_yaml.AgentKindPrompt && - flags.instruction == "" && flags.instructionFile == "" && flags.configFile == "" && + if flags.instruction == "" && flags.instructionFile == "" && flags.configFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 0082e8266e7..79b93c5c40b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -15,8 +15,6 @@ import ( "strconv" "strings" - "azureaiagent/internal/pkg/agents/agent_yaml" - "github.com/azure/azure-dev/cli/azd/pkg/azdext" ) @@ -52,12 +50,12 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f needsGeneration := true // adaptive evaluator is always generated needsEvalGen := true - if flags.configFile != "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + if flags.configFile != "" && needsGeneration { // Config detected — show resolved values and let the user confirm or override. if err := promptConfigConfirmation(ctx, azdClient, resolved, flags); err != nil { return err } - } else if flags.instruction == "" && flags.instructionFile == "" && needsGeneration && resolved.agentKind != agent_yaml.AgentKindPrompt { + } else if flags.instruction == "" && flags.instructionFile == "" && needsGeneration { // Let the user choose between inline text or loading from a file. inputChoices := []*azdext.SelectChoice{ {Label: "Type inline", Value: "inline"}, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 5803385542e..351aacc1ace 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -363,31 +363,6 @@ func TestBuildGenerationSources(t *testing.T) { assert.Empty(t, sources[1].Prompt) }) - t.Run("prompt agent includes only agent source", func(t *testing.T) { - t.Parallel() - sources := eval_api.BuildGenerationSources( - string(agent_yaml.AgentKindPrompt), "prompt-agent", "v1", "", nil, - ) - require.Len(t, sources, 1) - - assert.Equal(t, "agent", sources[0].Type) - assert.Equal(t, "prompt-agent", sources[0].AgentName) - assert.Equal(t, "v1", sources[0].AgentVersion) - assert.Empty(t, sources[0].Prompt, "prompt agents should not have prompt field") - }) - - t.Run("prompt agent without version omits agent_version", func(t *testing.T) { - t.Parallel() - sources := eval_api.BuildGenerationSources( - string(agent_yaml.AgentKindPrompt), "prompt-agent", "", "", nil, - ) - require.Len(t, sources, 1) - - assert.Equal(t, "agent", sources[0].Type) - assert.Equal(t, "prompt-agent", sources[0].AgentName) - assert.Empty(t, sources[0].AgentVersion, "empty version should be omitted") - }) - t.Run("hosted agent without instruction omits prompt source", func(t *testing.T) { t.Parallel() sources := eval_api.BuildGenerationSources( diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index d4bc3786745..53757e6a2fc 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -166,15 +166,6 @@ func TestDetectEvalAgentKind(t *testing.T) { assert.Equal(t, filepath.Join(dir, "agent.yaml"), path) }) - t.Run("detects prompt kind from agent.yml", func(t *testing.T) { - t.Parallel() - dir := t.TempDir() - writeTestFile(t, dir, "agent.yml", "kind: prompt\nname: test-agent\n") - kind, path := detectEvalAgentKind(dir) - assert.Equal(t, agent_yaml.AgentKindPrompt, kind) - assert.Equal(t, filepath.Join(dir, "agent.yml"), path) - }) - t.Run("returns empty for missing manifest", func(t *testing.T) { t.Parallel() dir := t.TempDir() diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go index 931b9d86730..b7122d17d7e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_yaml/yaml.go @@ -16,7 +16,6 @@ type AgentKind string const ( AgentKindHosted AgentKind = "hosted" AgentKindWorkflow AgentKind = "workflow" - AgentKindPrompt AgentKind = "prompt" ) // IsValidAgentKind checks if the provided AgentKind is valid @@ -29,7 +28,6 @@ func ValidAgentKinds() []AgentKind { return []AgentKind{ AgentKindHosted, AgentKindWorkflow, - AgentKindPrompt, } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index 6a45ede72ec..3c8f97d601d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -21,14 +21,13 @@ type TraceOptions struct { } // BuildGenerationSources constructs the sources array for generation jobs. -// For prompt agents (agentKind == "prompt"), only the agent source is included. -// For other agent kinds, a prompt source is included when instruction is -// non-empty, along with the agent source. When traces is non-nil and Days > 0, -// a traces source is appended with start_time computed from the current time. +// A prompt source is included when instruction is non-empty, along with the +// agent source. When traces is non-nil and Days > 0, a traces source is +// appended with start_time computed from the current time. func BuildGenerationSources(agentKind, agentName, version, instruction string, traces *TraceOptions) []GenerationSource { var sources []GenerationSource - if agentKind != "prompt" && instruction != "" { + if instruction != "" { sources = append(sources, GenerationSource{ Type: "prompt", Prompt: instruction, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go index 5710e733eb5..2a08673d335 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go @@ -27,18 +27,9 @@ func TestBuildGenerationSources_HostedWithInstruction(t *testing.T) { assert.Equal(t, "v2", sources[1].AgentVersion) } -func TestBuildGenerationSources_PromptAgentOnly(t *testing.T) { - t.Parallel() - sources := BuildGenerationSources("prompt", "prompt-agent", "v1", "ignored", nil) - require.Len(t, sources, 1) - assert.Equal(t, "agent", sources[0].Type) - assert.Equal(t, "prompt-agent", sources[0].AgentName) - assert.Equal(t, "v1", sources[0].AgentVersion) -} - func TestBuildGenerationSources_NoVersion(t *testing.T) { t.Parallel() - sources := BuildGenerationSources("prompt", "agent", "", "", nil) + sources := BuildGenerationSources("hosted", "agent", "", "", nil) require.Len(t, sources, 1) assert.Empty(t, sources[0].AgentVersion) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go index d794a5fd190..a73009a0a66 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go @@ -149,7 +149,7 @@ func TestAgentRef_YAMLFields(t *testing.T) { input := ` name: test-agent -kind: prompt +kind: hosted version: v5 model: gpt-4.1 ` @@ -157,7 +157,7 @@ model: gpt-4.1 require.NoError(t, yaml.Unmarshal([]byte(input), &ref)) assert.Equal(t, "test-agent", ref.Name) - assert.Equal(t, agent_yaml.AgentKindPrompt, ref.Kind) + assert.Equal(t, agent_yaml.AgentKindHosted, ref.Kind) assert.Equal(t, "v5", ref.Version) assert.Equal(t, "gpt-4.1", ref.Model) } diff --git a/cli/azd/extensions/azure.ai.agents/version.txt b/cli/azd/extensions/azure.ai.agents/version.txt index fc5a69c4947..db5a7919b50 100644 --- a/cli/azd/extensions/azure.ai.agents/version.txt +++ b/cli/azd/extensions/azure.ai.agents/version.txt @@ -1 +1 @@ -0.1.34-optbugbash-preview +0.1.31-preview diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index a31f336a7c7..cc99086dbbe 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -3937,310 +3937,6 @@ "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" } } - }, - { - "version": "0.1.31-optbugbash-preview", - "requiredAzdVersion": "\u003e1.23.13", - "capabilities": [ - "custom-commands", - "lifecycle-events", - "mcp-server", - "service-target-provider", - "metadata" - ], - "providers": [ - { - "name": "azure.ai.agent", - "type": "service-target", - "description": "Deploys agents to the Foundry Agent Service" - } - ], - "usage": "azd ai agent \u003ccommand\u003e [options]", - "examples": [ - { - "name": "init", - "description": "Initialize a new AI agent project.", - "usage": "azd ai agent init" - } - ], - "artifacts": { - "darwin/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "ac322b193b148ad273f7b96f3b7b69b36c900609f0eb835ac906cfce421bf7d2" - }, - "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" - }, - "darwin/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "a77ee7c8cbb2e708b0fdc4e8c4985b48af5e0b384e0cf29e8f0e589fec1044dc" - }, - "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" - }, - "linux/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "8baf6c4ebe5db270557c7245016aa67fac8391d99d1d2a17bd6a2084faf620b8" - }, - "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" - }, - "linux/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "ec70d32603b4971368fdaa2964eba6ae0d01c5beacb5d402f411aa08a3be5a88" - }, - "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" - }, - "windows/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "0d7eeb7773df5908640bda81b9fd90c77ccdc28bb79f4db7a44c843cce0aabcb" - }, - "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-windows-amd64.zip" - }, - "windows/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "86d65182ba0b9e46c908a5f8953c3eb1056a41031e85a34e814d6ca3da07e78c" - }, - "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-optbugbash-preview/azure-ai-agents-windows-arm64.zip" - } - } - }, - { - "version": "0.1.32-optbugbash-preview", - "requiredAzdVersion": "\u003e1.23.13", - "capabilities": [ - "custom-commands", - "lifecycle-events", - "mcp-server", - "service-target-provider", - "metadata" - ], - "providers": [ - { - "name": "azure.ai.agent", - "type": "service-target", - "description": "Deploys agents to the Foundry Agent Service" - } - ], - "usage": "azd ai agent \u003ccommand\u003e [options]", - "examples": [ - { - "name": "init", - "description": "Initialize a new AI agent project.", - "usage": "azd ai agent init" - } - ], - "artifacts": { - "darwin/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "9b7c45241bf0bf1ac6b822e2190bec7136d1ffc25a43718816a17cf77a022d70" - }, - "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" - }, - "darwin/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "034491e234bff6b4a4ad37850e9961268ac82ff7d364898cdf4b8a572184a675" - }, - "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" - }, - "linux/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "92408289da7dae4e45969cc80b92856e577e014853415176f81b53b581c4bee5" - }, - "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" - }, - "linux/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "164328f849a5375c5f11201f1a23915a606f0fa02bda51aead9850a685ea5cc6" - }, - "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" - }, - "windows/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "0b804726424c7cbebd2c501b38d222cc42beb92172c522871cad0b8df82c6efd" - }, - "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-windows-amd64.zip" - }, - "windows/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "c195a3a99d3ff94ed5d841e74a79d64376ebe5e70c81b0ca3cbedb69cd48864a" - }, - "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.32-optbugbash-preview/azure-ai-agents-windows-arm64.zip" - } - } - }, - { - "version": "0.1.33-optbugbash-preview", - "requiredAzdVersion": "\u003e1.23.13", - "capabilities": [ - "custom-commands", - "lifecycle-events", - "mcp-server", - "service-target-provider", - "metadata" - ], - "providers": [ - { - "name": "azure.ai.agent", - "type": "service-target", - "description": "Deploys agents to the Foundry Agent Service" - } - ], - "usage": "azd ai agent \u003ccommand\u003e [options]", - "examples": [ - { - "name": "init", - "description": "Initialize a new AI agent project.", - "usage": "azd ai agent init" - } - ], - "artifacts": { - "darwin/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "1dd0fac923612ef746538c0f08d7fc4fb4dbe35bb63816eb9a1fa055ab2b9b63" - }, - "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" - }, - "darwin/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "7fa4effc01a82fcbe713ccaf62438df75e994f4886ceea262392668fd708114b" - }, - "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" - }, - "linux/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "5abb54644fbf59e5dc865cdb8f8e54d3ad482c17fc21920b186a9a6b479482f7" - }, - "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" - }, - "linux/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "8c24590181ee39b07f65b27e6e02155f6dad79a11f31e84e72056d4fb3bd572e" - }, - "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" - }, - "windows/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "36ac3ab724e60bd0d30671a756acd69d97b5fae7b14d6a7b14e42f408e4626c7" - }, - "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-windows-amd64.zip" - }, - "windows/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "16fa8ce21c28642b8086aaa7c39fc71d1f2dfbf610c3fee9eab7699c8a26d4f9" - }, - "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.33-optbugbash-preview/azure-ai-agents-windows-arm64.zip" - } - } - }, - { - "version": "0.1.34-optbugbash-preview", - "requiredAzdVersion": "\u003e1.23.13", - "capabilities": [ - "custom-commands", - "lifecycle-events", - "mcp-server", - "service-target-provider", - "metadata" - ], - "providers": [ - { - "name": "azure.ai.agent", - "type": "service-target", - "description": "Deploys agents to the Foundry Agent Service" - } - ], - "usage": "azd ai agent \u003ccommand\u003e [options]", - "examples": [ - { - "name": "init", - "description": "Initialize a new AI agent project.", - "usage": "azd ai agent init" - } - ], - "artifacts": { - "darwin/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "326e41becb8073e146639fb1078324bf0455200060bcad71bdf87e43779e52e4" - }, - "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-darwin-amd64.zip" - }, - "darwin/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "ded28b36249fca47924fbb0de11ca8c2376f4a8878e58c489d2bc82da4406bcd" - }, - "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-darwin-arm64.zip" - }, - "linux/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "106e191775557e51ab1bc605d144bd02004c653a9c72fef2488ed87a992ba595" - }, - "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-linux-amd64.tar.gz" - }, - "linux/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "8ae74dd54ecdee54b0a98e44513adaf1e32d65f0a1a0e13e74cf0a6bd5197186" - }, - "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-linux-arm64.tar.gz" - }, - "windows/amd64": { - "checksum": { - "algorithm": "sha256", - "value": "ffe3a7258b22a46ac7a47d7a4b9786dedbcf36f69f8154149c6fe7a63ce1cd9e" - }, - "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-windows-amd64.zip" - }, - "windows/arm64": { - "checksum": { - "algorithm": "sha256", - "value": "b352fd4bc55f858998d4083eb226f4d1c1b3b74c6ee3689b845da09022ac0962" - }, - "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.34-optbugbash-preview/azure-ai-agents-windows-arm64.zip" - } - } } ] }, From 14bf9aabb9744b157a6ad6ab9fb4512e4eeb8e09 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 11:52:17 -0700 Subject: [PATCH 25/33] change back --- cli/azd/extensions/registry.json | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/cli/azd/extensions/registry.json b/cli/azd/extensions/registry.json index cc99086dbbe..70a2515652c 100644 --- a/cli/azd/extensions/registry.json +++ b/cli/azd/extensions/registry.json @@ -1,4 +1,5 @@ { + "schemaVersion": "1.0", "extensions": [ { "id": "microsoft.azd.demo", @@ -3891,50 +3892,50 @@ "darwin/amd64": { "checksum": { "algorithm": "sha256", - "value": "e933b6fbe2dd0420d1cea6c63e48f7c753dcdf9e15b8efa2978cdde72e81a8e4" + "value": "ab396ab8dd3b627e0467e35535f6cb6bb29efb475986f7218cf2322946e4e33a" }, "entryPoint": "azure-ai-agents-darwin-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-amd64.zip" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-amd64.zip" }, "darwin/arm64": { "checksum": { "algorithm": "sha256", - "value": "19194ea63609f056a2b772bcf81b27605a02cffcfcc272afabfd0f2e9a0e20d5" + "value": "919f18a74e4fa85d8b3db74e34e1d41af10b1e9a78fae171be7e42085f8bfef9" }, "entryPoint": "azure-ai-agents-darwin-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-arm64.zip" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-darwin-arm64.zip" }, "linux/amd64": { "checksum": { "algorithm": "sha256", - "value": "279474857915889b972cc6c041ebd73ec91131590ec57db122cb65a5b5e4daef" + "value": "dbcf13152499e43dee123bf8d3a25aba16f2f4cf287975187b9f79b62495cb53" }, "entryPoint": "azure-ai-agents-linux-amd64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-amd64.tar.gz" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-amd64.tar.gz" }, "linux/arm64": { "checksum": { "algorithm": "sha256", - "value": "818a85228bcb61559dee3618e1a602795514a79b2d14f84c7e1c9906e980f86c" + "value": "2a84f09164cf0260727e82e1e43ace5aef7a8d3380e0602221d05ece9230fd87" }, "entryPoint": "azure-ai-agents-linux-arm64", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-arm64.tar.gz" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-linux-arm64.tar.gz" }, "windows/amd64": { "checksum": { "algorithm": "sha256", - "value": "60d65a87f3f3ed594245f9004b3a58d881b031c7a723afc5e0f5f7c833a0a14d" + "value": "e3a9d3f0358852767babe84ce16fb01e52427458776751e11d2024fd77694a0a" }, "entryPoint": "azure-ai-agents-windows-amd64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-amd64.zip" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-amd64.zip" }, "windows/arm64": { "checksum": { "algorithm": "sha256", - "value": "e2b29ac9718d86b07112b6e1d380208e6374da507dd8bceeb23a2eef93b8d5b4" + "value": "1ff890de9c6507ff8a2e104e697545e1928eb4f1557a35e06f7c2168478507c3" }, "entryPoint": "azure-ai-agents-windows-arm64.exe", - "url": "https://github.com/Zyysurely/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" + "url": "https://github.com/Azure/azure-dev/releases/download/azd-ext-azure-ai-agents_0.1.31-preview/azure-ai-agents-windows-arm64.zip" } } } @@ -5037,11 +5038,11 @@ } }, { - "version": "0.0.6-preview", "capabilities": [ "custom-commands", "metadata" ], + "version": "0.0.6-preview", "usage": "azd ai models \u003ccommand\u003e [options]", "examples": [ { From bf3292d8ad2f67e72899a5b5d84e229e29b1c94f Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 12:01:57 -0700 Subject: [PATCH 26/33] fix conflict --- cli/azd/extensions/azure.ai.agents/internal/cmd/root.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go index ea4e7e5daf4..170b1cf40d2 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/root.go @@ -62,17 +62,14 @@ func NewRootCommand() *cobra.Command { rootCmd.AddCommand(newMonitorCommand(extCtx)) rootCmd.AddCommand(newFilesCommand(extCtx)) rootCmd.AddCommand(newSessionCommand(extCtx)) -<<<<<<< HEAD - rootCmd.AddCommand(newEvalCommand(extCtx)) - rootCmd.AddCommand(newOptimizeCommand(extCtx)) -======= rootCmd.AddCommand(newProjectCommand(extCtx)) // Connection commands — in separate package for easy lift-and-shift later. // When the azd core namespace change lands, move this AddCommand call // to the new root and update the import path. rootCmd.AddCommand(conncmd.NewConnectionRootCommand(extCtx)) ->>>>>>> main + rootCmd.AddCommand(newEvalCommand(extCtx)) + rootCmd.AddCommand(newOptimizeCommand(extCtx)) return rootCmd } From 9609c298917f10f5217bf86895e35bf1b4f0e055 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 15:08:28 -0700 Subject: [PATCH 27/33] address comments --- .../azure.ai.agents/internal/cmd/eval.go | 5 +- .../internal/cmd/eval_helpers.go | 2 +- .../internal/cmd/eval_helpers_test.go | 4 +- .../azure.ai.agents/internal/cmd/eval_init.go | 9 ++- .../internal/cmd/eval_init_jobs.go | 14 +++-- .../internal/cmd/eval_init_prompts.go | 13 +--- .../internal/cmd/eval_init_test.go | 1 - .../internal/cmd/eval_list_test.go | 2 +- .../internal/cmd/eval_progress.go | 10 +-- .../azure.ai.agents/internal/cmd/eval_test.go | 11 ++-- .../azure.ai.agents/internal/cmd/optimize.go | 6 +- .../internal/cmd/optimize_apply.go | 18 +++--- .../internal/cmd/optimize_apply_test.go | 2 +- .../internal/cmd/optimize_config.go | 25 +++++--- .../internal/cmd/optimize_config_test.go | 2 +- .../internal/cmd/optimize_deploy.go | 4 +- .../internal/cmd/optimize_deploy_test.go | 4 +- .../internal/cmd/optimize_list.go | 2 +- .../internal/cmd/optimize_test.go | 6 +- .../pkg/agents/dataset_api/operations.go | 62 +++++++++++-------- .../pkg/agents/dataset_api/operations_test.go | 18 +++--- .../internal/pkg/agents/eval_api/artifacts.go | 28 +++++++-- .../pkg/agents/eval_api/eval_config_test.go | 2 +- .../pkg/agents/eval_api/generation.go | 1 + .../internal/pkg/agents/eval_api/models.go | 4 +- .../pkg/agents/eval_api/operations.go | 15 +++-- .../pkg/agents/eval_api/operations_test.go | 19 +++--- .../internal/pkg/agents/eval_api/poller.go | 16 +++++ .../pkg/agents/eval_api/portal_urls.go | 5 +- .../internal/pkg/agents/opteval/state.go | 10 ++- .../internal/pkg/agents/opteval/yaml.go | 17 ++++- .../pkg/agents/optimize_api/client.go | 16 ++--- .../pkg/agents/optimize_api/models.go | 2 +- .../pkg/agents/optimize_api/poller.go | 35 ++++++++++- 34 files changed, 244 insertions(+), 146 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 9c378454b1c..0a331ac0e26 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -96,7 +96,9 @@ Use eval init to generate an eval config, then eval run to execute it.`, return cmd } -// resolveEvalContext resolves the context for an eval operation by reading azd project state, environment variables, and optionally prompting the user. It returns an evalResolvedContext with API clients and metadata needed to run eval commands. +// resolveEvalContext resolves the context for an eval operation by reading azd project state, +// environment variables, and optionally prompting the user. It returns an evalResolvedContext +// with API clients and metadata needed to run eval commands. func resolveEvalContext(ctx context.Context, options evalContextOptions) (*evalResolvedContext, error) { fmt.Println(output.WithGrayFormat("Resolving eval context...")) @@ -459,7 +461,6 @@ func pollEvalOperationWithSpinner( return nil, err } - log.Printf("[debug] %s: completed successfully", label) progress.setDone(label) return job, nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index e6747f4caf0..e01b6c8b7bc 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -239,7 +239,7 @@ func writeBaselineConfig(agentProject string, p baselineParams) error { // loadJSONLFile reads a JSONL file and unmarshals each non-empty line into T. // Returns an error if the file cannot be read, a line fails to parse, or no items are found. func loadJSONLFile[T any](path string) ([]T, error) { - f, err := os.Open(path) + f, err := os.Open(path) //nolint:gosec // path is provided by user for local dataset if err != nil { return nil, fmt.Errorf("failed to open dataset file %s: %w", path, err) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go index 37df0f88027..4785d793d3b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go @@ -141,7 +141,7 @@ func TestWriteBaselineConfig(t *testing.T) { instrPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.InstructionFile) assert.FileExists(t, instrPath) - content, err := os.ReadFile(instrPath) + content, err := os.ReadFile(instrPath) //nolint:gosec // test file path require.NoError(t, err) assert.Equal(t, "You are a helpful assistant.", string(content)) }) @@ -172,7 +172,7 @@ func TestWriteBaselineConfig(t *testing.T) { require.NoError(t, err) metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) - data, err := os.ReadFile(metaPath) + data, err := os.ReadFile(metaPath) //nolint:gosec // test file path require.NoError(t, err) assert.Contains(t, string(data), "skill_dir") }) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index c4468dfa069..b9b87076651 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -12,6 +12,7 @@ import ( "context" "errors" "fmt" + "log" "os" "path/filepath" @@ -151,7 +152,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // If --reset-defaults is set, clear existing state so the user can start fresh. if flags.resetDefaults && resolved.envName != "" { - opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) + if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + log.Printf("warning: clearing eval state: %v", err) + } } // Handle existing eval.yaml: prompt for regeneration, carry forward options. @@ -230,7 +233,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error } state.InitStatus = opteval.InitStatusCompleted - opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) + if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + log.Printf("warning: clearing eval state: %v", err) + } return writeAndPrintEvalResult(ctx, resolved, evalCfg, pollRes, configPath, isRegenerate) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 5edffcdd457..6f54495d111 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -10,7 +10,6 @@ package cmd import ( "context" - "encoding/json" "errors" "fmt" "log" @@ -55,6 +54,12 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon if flags.configFile != "" { agent.ConfigFile = flags.configFile } + if flags.instruction != "" { + agent.Instruction.Value = flags.instruction + } + if flags.instructionFile != "" { + agent.Instruction.File = flags.instructionFile + } return &evalConfig{ Config: opteval.Config{ Name: resolveEvalName(flags), @@ -102,9 +107,6 @@ func submitEvaluatorGeneration( request := eval_api.NewEvaluatorGenerationJobRequest( resolveEvalName(flags), flags.evalModel, sources, ) - if body, err := json.MarshalIndent(request, "", " "); err == nil { - log.Printf("[debug] submitEvaluatorGeneration request:\n%s", body) - } return resolved.evalClient.CreateEvaluatorGenerationJob(ctx, request, DefaultAgentAPIVersion) } @@ -167,7 +169,9 @@ func resumeEvalInit( return err } state.InitStatus = opteval.InitStatusCompleted - opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName) + if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + log.Printf("warning: clearing eval state: %v", err) + } if resolved.hasProject { eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go index 79b93c5c40b..1e472b7f9d8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_prompts.go @@ -47,15 +47,12 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } } - needsGeneration := true // adaptive evaluator is always generated - needsEvalGen := true - - if flags.configFile != "" && needsGeneration { + if flags.configFile != "" { // Config detected — show resolved values and let the user confirm or override. if err := promptConfigConfirmation(ctx, azdClient, resolved, flags); err != nil { return err } - } else if flags.instruction == "" && flags.instructionFile == "" && needsGeneration { + } else if flags.instruction == "" && flags.instructionFile == "" { // Let the user choose between inline text or loading from a file. inputChoices := []*azdext.SelectChoice{ {Label: "Type inline", Value: "inline"}, @@ -109,7 +106,7 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } // Ask whether to include traces for evaluator generation, unless already set via flags. - if flags.traceDays == 0 && needsEvalGen { + if flags.traceDays == 0 { confirmResp, err := azdClient.Prompt().Confirm(ctx, &azdext.ConfirmRequest{ Options: &azdext.ConfirmOptions{ Message: "Include agent traces for evaluator generation?", @@ -142,10 +139,6 @@ func promptEvalInitOptions(ctx context.Context, resolved *evalResolvedContext, f } } - if !needsGeneration { - return nil - } - if !flags.evalModelSet { // Read the deployed model name from the azd environment to use as default. var deployedModel string diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index 351aacc1ace..d47f9393818 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -426,7 +426,6 @@ func TestBuildOpenAIEvalRequest(t *testing.T) { assert.Equal(t, "azure_ai_evaluator", req.TestingCriteria[0].Type) assert.Equal(t, "builtin.quality", req.TestingCriteria[0].EvaluatorName) assert.Equal(t, "gpt-4o", req.TestingCriteria[0].InitializationParameters["model"]) - assert.Equal(t, "{{item.messages}}", req.TestingCriteria[0].DataMapping["messages"]) assert.Equal(t, "{{item.query}}", req.TestingCriteria[0].DataMapping["query"]) assert.Equal(t, "{{sample.output_items}}", req.TestingCriteria[0].DataMapping["response"]) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go index fbb7d8fabc5..53ce6dfcb63 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list_test.go @@ -20,7 +20,7 @@ func TestNewEvalListCommand_Flags(t *testing.T) { f := cmd.Flags().Lookup("limit") require.NotNil(t, f) - assert.Equal(t, "20", f.DefValue) + assert.Equal(t, "10", f.DefValue) } func TestNewEvalListCommand_NoArgs(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go index 8d227804b08..3a74a61981c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_progress.go @@ -40,8 +40,10 @@ var spinFrames = []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧" // Start launches the background spinner ticker. func (p *evalProgress) Start() { + p.mu.Lock() p.start = time.Now() p.spinning = true + p.mu.Unlock() go func() { defer close(p.done) frameIdx := 0 @@ -110,14 +112,6 @@ func (p *evalProgress) setDone(label string) { fmt.Printf(" %s %s (%s)\n", color.GreenString("(✓) Done"), label, elapsed) } -// printDetail prints an indented detail line (e.g. a portal link) safely -// without conflicting with the spinner. -func (p *evalProgress) printDetail(text string) { - p.mu.Lock() - defer p.mu.Unlock() - p.clearSpinnerLine() - fmt.Printf(" · %s\n", text) -} func (p *evalProgress) setFailed(label string) { p.mu.Lock() defer p.mu.Unlock() diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 53757e6a2fc..62de63b1140 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -237,7 +237,7 @@ func TestSaveEvaluatorResult(t *testing.T) { path := filepath.Join(dir, "evaluators", "smoke-core", "rubric_dimensions.json") assert.FileExists(t, path) - data, err := os.ReadFile(path) + data, err := os.ReadFile(path) //nolint:gosec // test file path require.NoError(t, err) // Only the dimensions array is saved, not the outer fields. assert.Contains(t, string(data), `"id": "quality"`) @@ -305,7 +305,7 @@ func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { t.Parallel() // The Azure SDK bearer token policy rejects non-TLS test servers, so the - // credential call will fail. downloadDatasetArtifact gracefully returns nil. + // credential call will fail. downloadDatasetArtifact now returns the error. apiServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) @@ -318,9 +318,10 @@ func TestDownloadDatasetArtifact_WritesBlob(t *testing.T) { ref := &evalDatasetRef{Name: "test-ds", Version: "v1"} _, err := eval_api.DownloadDatasetArtifact(t.Context(), client, dir, ref, "2025-11-15-preview") - require.NoError(t, err) + require.Error(t, err) + assert.Contains(t, err.Error(), "getting dataset credential") - // No file written when credential fetch fails (non-TLS test server). + // No file written when credential fetch fails. dest := eval_api.DatasetArtifactPath(dir, ref) assert.NoDirExists(t, dest) } @@ -353,7 +354,7 @@ func TestWriteJSONFile(t *testing.T) { err := eval_api.WriteJSONFile(path, map[string]string{"hello": "world"}) require.NoError(t, err) - data, err := os.ReadFile(path) + data, err := os.ReadFile(path) //nolint:gosec // test file path require.NoError(t, err) assert.Contains(t, string(data), `"hello": "world"`) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index bc59cb33c45..55b90c2d960 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -177,7 +177,7 @@ func (a *OptimizeAction) Run(ctx context.Context, cmd *cobra.Command) error { out := cmd.OutOrStdout() bold := color.New(color.Bold) - bold.Fprintf(out, "Optimizing agent %q...\n", cfg.Agent.Name) + _, _ = bold.Fprintf(out, "Optimizing agent %q...\n", cfg.Agent.Name) if configSource == "" { fmt.Fprintf(out, " Dataset: built-in (3 tasks, 12 criteria)\n") } else { @@ -481,7 +481,7 @@ func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus, bold := color.New(color.Bold) green := color.New(color.FgGreen) - bold.Fprintln(out, "\nResults:") + _, _ = bold.Fprintln(out, "\nResults:") fmt.Fprintf(out, " %-20s %7s %7s %8s\n", "Candidate", "Score", "Pass", "Tokens") fmt.Fprintf(out, " %-20s %7s %7s %8s\n", strings.Repeat("─", 20), strings.Repeat("─", 7), @@ -501,7 +501,7 @@ func printOptimizeResults(out io.Writer, status *optimize_api.OptimizeJobStatus, line := fmt.Sprintf(" %-20s %7.2f %6.0f%% %8.0f", name, c.AvgScore, c.PassRate*100, c.AvgTokens) if isBest { - green.Fprintln(out, line) + _, _ = green.Fprintln(out, line) } else { fmt.Fprintln(out, line) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index ccafbaaa744..80478cac6e5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -113,7 +113,7 @@ func (a *OptimizeApplyAction) apply( serviceDir := filepath.Join(project.Path, svc.RelativePath) candidateDir := filepath.Join(serviceDir, agentConfigsDir, a.flags.candidate) - bold.Fprintf(out, "Applying optimization candidate %s...\n\n", a.flags.candidate) + _, _ = bold.Fprintf(out, "Applying optimization candidate %s...\n\n", a.flags.candidate) credential, err := newAgentCredential() if err != nil { @@ -178,7 +178,7 @@ func (a *OptimizeApplyAction) apply( // Done — prompt the user to deploy. fmt.Fprintln(out) - color.New(color.FgGreen, color.Bold).Fprintf(out, + _, _ = color.New(color.FgGreen, color.Bold).Fprintf(out, " ✓ Candidate %s applied to %s\n\n", a.flags.candidate, filepath.Join(agentConfigsDir, a.flags.candidate)) fmt.Fprintf(out, " Run %s to deploy the optimized agent.\n", @@ -438,7 +438,11 @@ func downloadSkillFilesToDir( continue } - outPath := filepath.Join(destDir, filepath.FromSlash(f.Path)) + outPath, pathErr := opteval.SafePath(destDir, f.Path) + if pathErr != nil { + fmt.Fprintf(out, " warning: skipping file %s: path escapes destination directory\n", f.Path) + continue + } if err := os.MkdirAll(filepath.Dir(outPath), 0750); err != nil { return count, fmt.Errorf("creating directory for %s: %w", f.Path, err) } @@ -525,14 +529,14 @@ func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateCon fmt.Fprintf(out, "\n Instruction diff (baseline → optimized):\n\n") removed := color.New(color.FgRed) - removed.Fprintf(out, " — Baseline (%d lines, %d chars):\n", + _, _ = removed.Fprintf(out, " — Baseline (%d lines, %d chars):\n", len(baselineLines), len(baselineText)) printPreviewLines(out, baselineLines, "- ", removed) fmt.Fprintln(out) added := color.New(color.FgGreen) - added.Fprintf(out, " — Optimized (%d lines, %d chars):\n", + _, _ = added.Fprintf(out, " — Optimized (%d lines, %d chars):\n", len(optimizedLines), len(optimized)) printPreviewLines(out, optimizedLines, "+ ", added) } @@ -541,9 +545,9 @@ func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateCon func printPreviewLines(out io.Writer, lines []string, prefix string, c *color.Color) { limit := min(len(lines), maxDiffPreviewLines) for _, line := range lines[:limit] { - c.Fprintf(out, " %s%s\n", prefix, line) + _, _ = c.Fprintf(out, " %s%s\n", prefix, line) } if len(lines) > maxDiffPreviewLines { - c.Fprintf(out, " %s... (%d more lines)\n", prefix, len(lines)-maxDiffPreviewLines) + _, _ = c.Fprintf(out, " %s... (%d more lines)\n", prefix, len(lines)-maxDiffPreviewLines) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go index 7a8bb618e3a..c645bc2d9a5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -293,7 +293,7 @@ func TestWriteAgentConfigFromCandidate(t *testing.T) { assert.FileExists(t, filepath.Join(dir, opteval.MetadataFile)) assert.FileExists(t, filepath.Join(dir, opteval.InstructionFile)) - content, err := os.ReadFile(filepath.Join(dir, opteval.InstructionFile)) + content, err := os.ReadFile(filepath.Join(dir, opteval.InstructionFile)) //nolint:gosec // test file path require.NoError(t, err) assert.Equal(t, "Test prompt.", string(content)) }) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 0427af277f6..0da075e3388 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -44,7 +44,7 @@ type OptimizeConfigCriterion struct { // LoadOptimizeConfig reads and parses a YAML optimization config file. func LoadOptimizeConfig(path string) (*OptimizeConfig, error) { - data, err := os.ReadFile(path) + data, err := os.ReadFile(path) //nolint:gosec // path is provided by user for local config if err != nil { return nil, fmt.Errorf("failed to read config file %s: %w", path, err) } @@ -102,8 +102,10 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { var defaultDataset = []optimize_api.DatasetTask{ { - Name: "calculator_module", - Prompt: "Create a Python module calc.py with four functions: add, subtract, multiply, divide. Each takes two numbers and returns the result. Include a brief test at the bottom (if __name__ == '__main__') that exercises each function and prints the results. Then run it.", + Name: "calculator_module", + Prompt: "Create a Python module calc.py with four functions: add, subtract, multiply, divide. " + + "Each takes two numbers and returns the result. Include a brief test at the bottom " + + "(if __name__ == '__main__') that exercises each function and prints the results. Then run it.", Criteria: []optimize_api.Criterion{ {Name: "decimal_types", Instruction: "ALL functions MUST use and return Python's decimal.Decimal type, NOT float."}, {Name: "error_code_prefix", Instruction: "ALL error messages raised by any function MUST include a bracketed error code prefix [CALC-NNN]."}, @@ -112,8 +114,11 @@ var defaultDataset = []optimize_api.DatasetTask{ }, }, { - Name: "csv_report", - Prompt: "Create a Python script report.py that generates a CSV file 'sales_report.csv' with 10 rows of sample sales data. Columns: date, product, quantity, unit_price, total. Then read the CSV back and print a summary: total revenue and the top-selling product by quantity. Run the script.", + Name: "csv_report", + Prompt: "Create a Python script report.py that generates a CSV file 'sales_report.csv' " + + "with 10 rows of sample sales data. Columns: date, product, quantity, unit_price, total. " + + "Then read the CSV back and print a summary: total revenue and the top-selling product " + + "by quantity. Run the script.", Criteria: []optimize_api.Criterion{ {Name: "pipe_delimiter", Instruction: "The CSV file MUST use pipe '|' as the delimiter, NOT comma."}, {Name: "zero_padded_quantity", Instruction: "ALL quantity values MUST be zero-padded to exactly 4 digits (e.g. '0042' not '42')."}, @@ -122,8 +127,12 @@ var defaultDataset = []optimize_api.DatasetTask{ }, }, { - Name: "api_response_builder", - Prompt: "Create a Python module api_utils.py with a function build_response(data, status_code=200) that builds a JSON-ready dictionary representing an API response. Also create a function validate_email(email: str) -> bool that checks if an email is roughly valid. Write a test block that demonstrates both functions with a few examples and prints the JSON output. Run it.", + Name: "api_response_builder", + Prompt: "Create a Python module api_utils.py with a function build_response(data, " + + "status_code=200) that builds a JSON-ready dictionary representing an API response. " + + "Also create a function validate_email(email: str) -> bool that checks if an email " + + "is roughly valid. Write a test block that demonstrates both functions with a few " + + "examples and prints the JSON output. Run it.", Criteria: []optimize_api.Criterion{ {Name: "named_tuple_validation", Instruction: "validate_email() MUST return a typing.NamedTuple with fields (is_valid: bool, reason: str), NOT a bare bool."}, {Name: "request_id", Instruction: "build_response() MUST include a 'requestId' field containing a UUID4 string."}, @@ -247,7 +256,7 @@ func loadSkillsFromDir(dir string) ([]optimize_api.SkillDefinition, error) { continue } - data, err := os.ReadFile(entryPath) + data, err := os.ReadFile(entryPath) //nolint:gosec // path derived from project skill directory if err != nil { return nil, fmt.Errorf("reading skill file %s: %w", entry.Name(), err) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index 7ca6531af44..e8f99f214d6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -18,7 +18,7 @@ import ( func writeTestFile(t *testing.T, dir, name, content string) string { t.Helper() path := filepath.Join(dir, name) - require.NoError(t, os.WriteFile(path, []byte(content), 0644)) + require.NoError(t, os.WriteFile(path, []byte(content), 0600)) return path } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 0496a738ec1..0d243442761 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -102,7 +102,7 @@ func (a *OptimizeDeployAction) runDirect( return err } - bold.Fprintf(out, "Deploying candidate %s to agent %s...\n\n", a.flags.candidate, agentName) + _, _ = bold.Fprintf(out, "Deploying candidate %s to agent %s...\n\n", a.flags.candidate, agentName) // Step 1: Fetch candidate config from optimization service. fmt.Fprintf(out, " Fetching candidate config...\n") @@ -184,7 +184,7 @@ func (a *OptimizeDeployAction) runDirect( // Step 6: Print success. fmt.Fprintln(out) - color.New(color.FgGreen, color.Bold).Fprintf(out, + _, _ = color.New(color.FgGreen, color.Bold).Fprintf(out, " \u2713 Successfully deployed candidate %s as version %s\n", a.flags.candidate, versionObj.Version) fmt.Fprintf(out, "\n Agent: %s\n", agentName) fmt.Fprintf(out, " Version: %s\n", versionObj.Version) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go index 29289f60e84..5b58f43eb66 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy_test.go @@ -168,7 +168,7 @@ func TestUpsertAgentYamlEnvVar_InsertsNew(t *testing.T) { err := upsertAgentYamlEnvVar(yamlPath, "MY_VAR", "my_value") require.NoError(t, err) - data, err := os.ReadFile(yamlPath) + data, err := os.ReadFile(yamlPath) //nolint:gosec // test file path require.NoError(t, err) assert.Contains(t, string(data), "MY_VAR") assert.Contains(t, string(data), "my_value") @@ -188,7 +188,7 @@ environment_variables: err := upsertAgentYamlEnvVar(yamlPath, "MY_VAR", "new_value") require.NoError(t, err) - data, err := os.ReadFile(yamlPath) + data, err := os.ReadFile(yamlPath) //nolint:gosec // test file path require.NoError(t, err) assert.Contains(t, string(data), "new_value") assert.NotContains(t, string(data), "old_value") diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go index 312c1d213b9..fa09c0d9de1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go @@ -97,7 +97,7 @@ func runOptimizeList(cmd *cobra.Command, flags *optimizeListFlags) error { func printOptimizeListTable(out io.Writer, jobs []optimize_api.OptimizeJobStatus) { bold := color.New(color.Bold) - bold.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", "ID", "Status", "Agent", "Score", "Created") + _, _ = bold.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", "ID", "Status", "Agent", "Score", "Created") fmt.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", strings.Repeat("─", 38), strings.Repeat("─", 12), strings.Repeat("─", 14), strings.Repeat("─", 7), strings.Repeat("─", 19)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go index a8b69969168..abbbe91368c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -18,7 +18,7 @@ import ( func TestOptimizeCommand_HasExpectedSubCommands(t *testing.T) { cmd := newOptimizeCommand(&azdext.ExtensionContext{}) - expected := []string{"status", "list", "cancel", "deploy"} + expected := []string{"status", "list", "cancel", "deploy", "apply"} var actual []string for _, sub := range cmd.Commands() { actual = append(actual, sub.Name()) @@ -51,9 +51,7 @@ func TestOptimizeCommand_AcceptsConfigFlag(t *testing.T) { assert.Equal(t, "c", f.Shorthand, "--config should have -c shorthand") assert.NotNil(t, cmd.Flags().Lookup("poll-interval")) - assert.NotNil(t, cmd.Flags().Lookup("endpoint")) - assert.NotNil(t, cmd.Flags().Lookup("agent")) - assert.NotNil(t, cmd.Flags().Lookup("strategy")) + assert.NotNil(t, cmd.Flags().Lookup("target")) } func TestOptimizeCommand_DefaultFlags(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go index 58da5cf7cf4..bec24bd62df 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go @@ -7,6 +7,7 @@ import ( "bytes" "context" "encoding/json" + "encoding/xml" "fmt" "io" "log" @@ -39,7 +40,7 @@ func NewDatasetClient(endpoint string, cred azcore.TokenCredential) *DatasetClie clientOptions := &policy.ClientOptions{ Logging: policy.LogOptions{ AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, - IncludeBody: true, + IncludeBody: false, }, PerCallPolicies: []policy.Policy{ runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), @@ -61,6 +62,15 @@ func NewDatasetClient(endpoint string, cred azcore.TokenCredential) *DatasetClie } } +// NewDatasetClientFromPipeline creates a DatasetClient with a pre-built pipeline. +// This is intended for tests that need to bypass auth policies. +func NewDatasetClientFromPipeline(endpoint string, pipeline runtime.Pipeline) *DatasetClient { + return &DatasetClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + // CreateDataset registers a dataset with inline content (upload). func (c *DatasetClient) CreateDataset( ctx context.Context, @@ -107,8 +117,8 @@ func (c *DatasetClient) UploadNewVersion( return nil, fmt.Errorf("uploading blob: %w", err) } - // Step 3: Finalize the dataset version. - dataURI := pending.ResolvedBlobURI() + // Step 3: Finalize the dataset version with the full blob URI. + dataURI := strings.TrimSuffix(pending.ResolvedBlobURI(), "/") + "/" + blobName return c.FinalizeDatasetVersion(ctx, name, newVersion, dataURI, apiVersion) } @@ -207,8 +217,6 @@ func (c *DatasetClient) GetDatasetCredential( // Returns the raw content as bytes. The downloadURL should be the full URL with SAS token // (e.g., from DatasetCredential.ResolvedDownloadURI()). func (c *DatasetClient) DownloadDataset(ctx context.Context, downloadURL string) ([]byte, error) { - log.Printf("[dataset_api] downloading dataset from blob: %s", downloadURL) - req, err := runtime.NewRequest(ctx, http.MethodGet, downloadURL) if err != nil { return nil, fmt.Errorf("failed to create download request: %w", err) @@ -252,7 +260,7 @@ func (c *DatasetClient) ListContainerBlobs(ctx context.Context, containerSASUri q.Set("comp", "list") u.RawQuery = q.Encode() - log.Printf("[dataset_api] listing blobs: %s", u.String()) + log.Printf("[dataset_api] listing blobs: %s", u.Redacted()) req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) if err != nil { @@ -292,8 +300,6 @@ func (c *DatasetClient) DownloadBlob(ctx context.Context, containerSASUri, blobN // Append blob name to the container path. u.Path = strings.TrimSuffix(u.Path, "/") + "/" + blobName - log.Printf("[dataset_api] downloading blob: %s", u.String()) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) if err != nil { return nil, fmt.Errorf("failed to create blob download request: %w", err) @@ -319,23 +325,29 @@ func (c *DatasetClient) DownloadBlob(ctx context.Context, containerSASUri, blobN return data, nil } -// parseBlobNames extracts blob names from the Azure Blob Storage XML list response. +// parseBlobNames extracts blob names from the Azure Blob Storage XML list response +// using proper XML parsing against the EnumerationResults schema. func parseBlobNames(xmlBody string) []string { - var names []string - // Simple extraction — look for ... within elements. - remaining := xmlBody - for { - start := strings.Index(remaining, "") - if start == -1 { - break - } - remaining = remaining[start+len(""):] - end := strings.Index(remaining, "") - if end == -1 { - break + type blob struct { + Name string `xml:"Name"` + } + type blobs struct { + Blob []blob `xml:"Blob"` + } + type enumerationResults struct { + Blobs blobs `xml:"Blobs"` + } + + var result enumerationResults + if err := xml.Unmarshal([]byte(xmlBody), &result); err != nil { + return nil + } + + names := make([]string, 0, len(result.Blobs.Blob)) + for _, b := range result.Blobs.Blob { + if b.Name != "" { + names = append(names, b.Name) } - names = append(names, remaining[:end]) - remaining = remaining[end:] } return names } @@ -369,14 +381,13 @@ func (c *DatasetClient) doRequest( return nil, fmt.Errorf("failed to create request: %w", err) } - log.Printf("[dataset_api] %s %s", method, u.String()) + log.Printf("[dataset_api] %s %s", method, u.Redacted()) if body != nil { payload, err := json.Marshal(body) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } - log.Printf("[dataset_api] request body: %s", string(payload)) if err := req.SetBody(streaming.NopCloser(bytes.NewReader(payload)), "application/json"); err != nil { return nil, fmt.Errorf("failed to set request body: %w", err) } @@ -394,7 +405,6 @@ func (c *DatasetClient) doRequest( } log.Printf("[dataset_api] response status: %d", resp.StatusCode) - log.Printf("[dataset_api] response body: %s", string(respBody)) if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusCreated, http.StatusAccepted) { resp.Body = io.NopCloser(bytes.NewReader(respBody)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go index 9223597888d..edb9b8ae4b8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go @@ -12,6 +12,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -33,19 +34,16 @@ func newTestClient(t *testing.T, handler http.Handler) (*DatasetClient, *httptes t.Helper() server := httptest.NewServer(handler) t.Cleanup(server.Close) - client := NewDatasetClient(server.URL, &fakeCredential{}) + pipeline := runtime.NewPipeline( + "test", + "v0.0.0", + runtime.PipelineOptions{}, + &policy.ClientOptions{}, + ) + client := NewDatasetClientFromPipeline(server.URL, pipeline) return client, server } -func jsonHandler(status int, body map[string]any) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - data, _ := json.Marshal(body) - _, _ = w.Write(data) - } -} - // --------------------------------------------------------------------------- // NewDatasetClient // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index 736bb4633e2..b3d09bcac6f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -6,6 +6,7 @@ package eval_api import ( "context" "encoding/json" + "errors" "fmt" "os" "path/filepath" @@ -51,12 +52,12 @@ func DownloadDatasetArtifact( // Attempt full download via the dataset API. cred, credErr := client.GetDatasetCredential(ctx, ref.Name, ref.Version, apiVersion) if credErr != nil { - return "", nil + return "", fmt.Errorf("getting dataset credential for %q: %w", ref.Name, credErr) } downloadURL := cred.ResolvedDownloadURI() if downloadURL == "" { - return "", nil + return "", fmt.Errorf("dataset %q returned empty download URI", ref.Name) } destDir := DatasetArtifactPath(agentProject, ref) @@ -68,29 +69,44 @@ func DownloadDatasetArtifact( if isContainerSAS(downloadURL) { blobs, err := client.ListContainerBlobs(ctx, downloadURL) if err != nil { - return "", nil + return "", fmt.Errorf("listing container blobs for dataset %q: %w", ref.Name, err) } if len(blobs) == 0 { - return "", nil + return "", fmt.Errorf("dataset %q container has no blobs", ref.Name) } + var errs []error for _, blobName := range blobs { + ext := strings.ToLower(filepath.Ext(blobName)) + if ext != ".jsonl" && ext != ".csv" { + continue + } data, dlErr := client.DownloadBlob(ctx, downloadURL, blobName) if dlErr != nil { + errs = append(errs, fmt.Errorf("downloading blob %q: %w", blobName, dlErr)) + continue + } + dest, pathErr := opteval.SafePath(destDir, blobName) + if pathErr != nil { + errs = append(errs, pathErr) continue } - dest := filepath.Join(destDir, filepath.FromSlash(blobName)) if err := os.MkdirAll(filepath.Dir(dest), 0750); err != nil { + errs = append(errs, fmt.Errorf("creating dir for %q: %w", blobName, err)) continue } if err := os.WriteFile(dest, data, 0600); err != nil { + errs = append(errs, fmt.Errorf("writing %q: %w", blobName, err)) continue } } + if len(errs) > 0 { + return "", errors.Join(errs...) + } } else { // Single blob download. data, dlErr := client.DownloadDataset(ctx, downloadURL) if dlErr != nil { - return "", nil + return "", fmt.Errorf("downloading dataset %q: %w", ref.Name, dlErr) } // Infer filename from URL. filename := filenameFromURL(downloadURL) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index eff24a84597..e8e7b70e3cb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -216,7 +216,7 @@ func TestToAgentTargetAdaptableEvalGroupRequest_WithEvaluators(t *testing.T) { assert.Equal(t, "azure_ai_evaluator", req.TestingCriteria[0].Type) assert.Equal(t, "builtin.quality", req.TestingCriteria[0].EvaluatorName) assert.Equal(t, "gpt-4o", req.TestingCriteria[0].InitializationParameters["model"]) - assert.Equal(t, "{{item.messages}}", req.TestingCriteria[0].DataMapping["messages"]) + assert.Equal(t, "{{item.query}}", req.TestingCriteria[0].DataMapping["query"]) assert.Equal(t, "custom-1", req.TestingCriteria[1].EvaluatorName) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index 3c8f97d601d..df2e6f8c8a1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -91,6 +91,7 @@ func NewEvaluatorGenerationJobRequest( return &EvaluatorGenerationJobRequest{ Name: name, EvaluatorName: name, + Category: "quality", Model: evalModel, Sources: sources, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index 388a4257b59..a917e700140 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -273,7 +273,7 @@ type CreateOpenAIEvalRunRequest struct { } // EvalRunDataSourceType defines the type for an eval run data source. -type EvalRunDataSourceType = string +type EvalRunDataSourceType string const ( // EvalRunDataSourceTypeAgentTarget is the data source type for agent target completions. @@ -281,7 +281,7 @@ const ( ) // EvalRunDataContentType defines the source type for eval run data content. -type EvalRunDataContentType = string +type EvalRunDataContentType string const ( EvalRunDataContentTypeFileContent EvalRunDataContentType = "file_content" diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go index 5e67ac9b54b..8c661cadd13 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations.go @@ -45,7 +45,7 @@ func NewEvalClient(endpoint string, cred azcore.TokenCredential) *EvalClient { clientOptions := &policy.ClientOptions{ Logging: policy.LogOptions{ AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, - IncludeBody: true, + IncludeBody: false, }, PerCallPolicies: []policy.Policy{ runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), @@ -67,6 +67,15 @@ func NewEvalClient(endpoint string, cred azcore.TokenCredential) *EvalClient { } } +// NewEvalClientFromPipeline creates an EvalClient with a pre-built pipeline. +// This is intended for tests that need to bypass auth policies. +func NewEvalClientFromPipeline(endpoint string, pipeline runtime.Pipeline) *EvalClient { + return &EvalClient{ + endpoint: endpoint, + pipeline: pipeline, + } +} + // CreateDataGenerationJob starts a dataset generation job for eval onboarding. func (c *EvalClient) CreateDataGenerationJob( ctx context.Context, @@ -223,14 +232,13 @@ func (c *EvalClient) doRequest( return nil, fmt.Errorf("failed to create request: %w", err) } - log.Printf("[eval_api] %s %s", method, u.String()) + log.Printf("[eval_api] %s %s", method, u.Redacted()) if body != nil { payload, err := json.Marshal(body) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } - log.Printf("[eval_api] request body: %s", string(payload)) if err := req.SetBody(streaming.NopCloser(bytes.NewReader(payload)), "application/json"); err != nil { return nil, fmt.Errorf("failed to set request body: %w", err) } @@ -248,7 +256,6 @@ func (c *EvalClient) doRequest( } log.Printf("[eval_api] response status: %d", resp.StatusCode) - log.Printf("[eval_api] response body: %s", string(respBody)) if !runtime.HasStatusCode(resp, http.StatusOK, http.StatusCreated, http.StatusAccepted) { // Restore the body so runtime.NewResponseError can read it. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go index 14215ad407a..08d845303cb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go @@ -12,6 +12,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -35,20 +36,16 @@ func newTestClient(t *testing.T, handler http.Handler) (*EvalClient, *httptest.S t.Helper() server := httptest.NewServer(handler) t.Cleanup(server.Close) - client := NewEvalClient(server.URL, &fakeCredential{}) + pipeline := runtime.NewPipeline( + "test", + "v0.0.0", + runtime.PipelineOptions{}, + &policy.ClientOptions{}, + ) + client := NewEvalClientFromPipeline(server.URL, pipeline) return client, server } -// jsonHandler returns an http.HandlerFunc that responds with the given body and status. -func jsonHandler(status int, body map[string]any) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - data, _ := json.Marshal(body) - _, _ = w.Write(data) - } -} - // --------------------------------------------------------------------------- // NewEvalClient // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go index a7124208253..0a1b2c8541a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go @@ -5,10 +5,13 @@ package eval_api import ( "context" + "errors" "fmt" "log" "strings" "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" ) // --------------------------------------------------------------------------- @@ -164,6 +167,10 @@ func (p *Poller) Poll(ctx context.Context) (*GenerationJob, error) { job, err := p.GetJob(ctx, p.OperationID, p.APIVersion) if err != nil { + if isTransientError(err) { + log.Printf("[poller] transient error polling %s, will retry: %v", p.OperationID, err) + continue + } return nil, err } @@ -187,3 +194,12 @@ func (p *Poller) Poll(ctx context.Context) (*GenerationJob, error) { Attempts: p.Options.MaxAttempts, } } + +// isTransientError checks whether an error represents a transient HTTP failure +// (429 Too Many Requests or 5xx Server Error) that is safe to retry. +func isTransientError(err error) bool { + if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { + return respErr.StatusCode == 429 || respErr.StatusCode >= 500 + } + return false +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go index 765c3f551c7..8b1ccd0fd5d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls.go @@ -64,9 +64,8 @@ func (p *PortalPrefix) DatasetURL(datasetName, version string) string { // OptimizationURL returns the portal URL for an optimization job. func (p *PortalPrefix) OptimizationURL(agentName, operationID string) string { - optimizePrefix := strings.Replace(p.prefix, "https://ai.azure.com", "https://eastus2euap.ai.azure.com", 1) - return fmt.Sprintf("%s/build/agents/%s/optimization/%s?flight=enable_faos_read_ui", - optimizePrefix, agentName, operationID) + return fmt.Sprintf("%s/build/agents/%s/optimization/%s", + p.prefix, agentName, operationID) } // encodeSubscriptionForURL encodes a subscription ID GUID as base64 without padding. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go index d6aa897acd1..9d89c3401d9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go @@ -9,6 +9,7 @@ package opteval import ( "context" + "errors" "fmt" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -86,13 +87,18 @@ func SaveEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName str } // ClearEvalState removes eval state keys from the azd environment. -func ClearEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) { +func ClearEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) error { + var errs []error for _, key := range []string{ evalKeyInitStatus, evalKeyDatasetGenOpID, evalKeyDatasetGenStatus, evalKeyEvalGenOpID, evalKeyEvalGenStatus, evalKeyEvalID, } { - _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ EnvName: envName, Key: key, Value: "", }) + if err != nil { + errs = append(errs, err) + } } + return errors.Join(errs...) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go index d32bd103702..251b4137e08 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go @@ -7,12 +7,26 @@ import ( "fmt" "os" "path/filepath" + "strings" "azureaiagent/internal/pkg/agents/agent_yaml" "go.yaml.in/yaml/v3" ) +// SafePath validates that joining baseDir with an untrusted relative path +// does not escape baseDir (zip-slip prevention). Returns the cleaned +// absolute path or an error. +func SafePath(baseDir, untrusted string) (string, error) { + p := filepath.Join(baseDir, filepath.FromSlash(untrusted)) + p = filepath.Clean(p) + if !strings.HasPrefix(p, filepath.Clean(baseDir)+string(filepath.Separator)) && + p != filepath.Clean(baseDir) { + return "", fmt.Errorf("path %q escapes base directory", untrusted) + } + return p, nil +} + // Config is the shared YAML configuration for eval and optimize commands. // // Contains fields common to both commands. Optimize-specific fields @@ -397,9 +411,6 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { if o.MaxIterations <= 0 { o.MaxIterations = 4 } - // if o.Budget <= 0 { - // o.Budget = 100 - // } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go index 435e73a4500..ce4a95ac328 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -37,7 +37,7 @@ func NewOptimizeClient(endpoint string, cred azcore.TokenCredential) *OptimizeCl clientOptions := &policy.ClientOptions{ Logging: policy.LogOptions{ AllowedHeaders: []string{"X-Ms-Correlation-Request-Id", "X-Request-Id"}, - IncludeBody: true, + IncludeBody: false, }, PerCallPolicies: []policy.Policy{ runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), @@ -117,7 +117,7 @@ func (c *OptimizeClient) GetOptimizeStatus( ctx context.Context, operationID string, ) (*OptimizeJobStatus, error) { - url := fmt.Sprintf("%s/optimize/%s?api-version=%s", c.endpoint, operationID, APIVersion) + url := fmt.Sprintf("%s/optimize/%s?api-version=%s", c.endpoint, netURL.PathEscape(operationID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -155,7 +155,7 @@ func (c *OptimizeClient) ListOptimizeJobs( ) (*OptimizeListResponse, error) { url := fmt.Sprintf("%s/optimize?api-version=%s&limit=%d", c.endpoint, APIVersion, limit) if status != "" { - url += "&status=" + status + url += "&status=" + netURL.QueryEscape(status) } req, err := runtime.NewRequest(ctx, http.MethodGet, url) @@ -191,7 +191,7 @@ func (c *OptimizeClient) CancelOptimize( ctx context.Context, operationID string, ) (*OptimizeCancelResponse, error) { - url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=%s", c.endpoint, operationID, APIVersion) + url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=%s", c.endpoint, netURL.PathEscape(operationID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodPost, url) if err != nil { @@ -229,7 +229,7 @@ func (c *OptimizeClient) ReportDeployment( ) error { url := fmt.Sprintf( "%s/optimize/candidates/%s:promote?api-version=%s", - c.endpoint, report.CandidateID, APIVersion, + c.endpoint, netURL.PathEscape(report.CandidateID), APIVersion, ) payload, err := json.Marshal(report) @@ -267,7 +267,7 @@ func (c *OptimizeClient) GetCandidateConfig( ctx context.Context, candidateID string, ) (any, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=%s", c.endpoint, candidateID, APIVersion) + url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=%s", c.endpoint, netURL.PathEscape(candidateID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -302,7 +302,7 @@ func (c *OptimizeClient) GetCandidate( ctx context.Context, candidateID string, ) (*CandidateManifest, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=%s", c.endpoint, candidateID, APIVersion) + url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=%s", c.endpoint, netURL.PathEscape(candidateID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -339,7 +339,7 @@ func (c *OptimizeClient) GetCandidateFile( filePath string, ) (string, error) { url := fmt.Sprintf("%s/optimize/candidates/%s/files?api-version=%s&path=%s", - c.endpoint, candidateID, APIVersion, netURL.QueryEscape(filePath)) + c.endpoint, netURL.PathEscape(candidateID), APIVersion, netURL.QueryEscape(filePath)) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 0cd30c78d0b..97b21056521 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -114,7 +114,7 @@ type OptimizeOptions struct { MinImprovement float64 `json:"minImprovement,omitempty"` ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` PassThreshold float64 `json:"passThreshold,omitempty"` - EvalModel string `json:"evalModel"` + EvalModel string `json:"evalModel,omitempty"` // Send as both "strategies" (current server) and "targetAttributes" (future). Strategies []string `json:"strategies,omitempty"` TargetAttributes []string `json:"targetAttributes,omitempty"` diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go index c633e0815b9..dedbfaa0ac7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go @@ -7,8 +7,12 @@ package optimize_api import ( "context" + "errors" "fmt" + "log" "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" ) // Poller polls an optimization job until it reaches a terminal state. @@ -16,19 +20,29 @@ type Poller struct { Client *OptimizeClient OperationID string Interval time.Duration + MaxAttempts int // 0 means no limit OnProgress func(*OptimizeJobStatus) } // PollUntilDone polls GetOptimizeStatus at the configured interval until the -// job reaches a terminal state (completed, failed, cancelled) or the context -// is cancelled. +// job reaches a terminal state (completed, failed, cancelled), the context +// is cancelled, or MaxAttempts is reached. func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) { - ticker := time.NewTicker(p.Interval) + interval := p.Interval + if interval <= 0 { + interval = 5 * time.Second + } + ticker := time.NewTicker(interval) defer ticker.Stop() + attempts := 0 for { status, err := p.Client.GetOptimizeStatus(ctx, p.OperationID) if err != nil { + if isTransientError(err) { + log.Printf("[poller] transient error polling %s, will retry: %v", p.OperationID, err) + goto wait + } return nil, fmt.Errorf("failed to get optimization status: %w", err) } @@ -40,6 +54,12 @@ func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) return status, nil } + wait: + attempts++ + if p.MaxAttempts > 0 && attempts >= p.MaxAttempts { + return nil, fmt.Errorf("polling timed out after %d attempts", attempts) + } + select { case <-ctx.Done(): return nil, ctx.Err() @@ -48,3 +68,12 @@ func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) } } } + +// isTransientError checks whether an error represents a transient HTTP failure +// (429 Too Many Requests or 5xx Server Error) that is safe to retry. +func isTransientError(err error) bool { + if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { + return respErr.StatusCode == 429 || respErr.StatusCode >= 500 + } + return false +} From 928e96cf14b6f738b802d0bd008c6a47c63f6076 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 17:43:05 -0700 Subject: [PATCH 28/33] Address more comments --- .../azure.ai.agents/internal/cmd/eval.go | 15 +- .../internal/cmd/eval_helpers.go | 24 ++-- .../internal/cmd/eval_helpers_test.go | 22 +-- .../azure.ai.agents/internal/cmd/eval_init.go | 30 ++-- .../internal/cmd/eval_init_jobs.go | 54 ++++---- .../internal/cmd/eval_init_test.go | 43 +++--- .../azure.ai.agents/internal/cmd/eval_list.go | 11 +- .../azure.ai.agents/internal/cmd/eval_run.go | 46 +++++-- .../internal/cmd/eval_run_test.go | 8 +- .../azure.ai.agents/internal/cmd/eval_show.go | 6 +- .../azure.ai.agents/internal/cmd/eval_test.go | 35 ++--- .../azure.ai.agents/internal/cmd/listen.go | 2 +- .../azure.ai.agents/internal/cmd/optimize.go | 20 +-- .../internal/cmd/optimize_apply.go | 74 ++++++---- .../internal/cmd/optimize_apply_test.go | 80 +++++++---- .../internal/cmd/optimize_config.go | 76 ++++++---- .../internal/cmd/optimize_config_test.go | 130 +++++++++++++++--- .../internal/cmd/optimize_deploy.go | 6 +- .../internal/cmd/optimize_helpers.go | 108 ++++++++------- .../internal/cmd/optimize_prompts.go | 45 +++--- .../internal/exterrors/codes.go | 12 ++ .../internal/pkg/agents/dataset_api/models.go | 5 +- .../pkg/agents/dataset_api/operations.go | 2 +- .../internal/pkg/agents/eval_api/artifacts.go | 34 +++-- .../pkg/agents/eval_api/artifacts_test.go | 59 ++++++++ .../pkg/agents/eval_api/eval_config.go | 26 +++- .../pkg/agents/eval_api/eval_config_test.go | 110 ++++++++++----- .../pkg/agents/eval_api/generation.go | 4 +- .../pkg/agents/eval_api/generation_test.go | 12 +- .../internal/pkg/agents/eval_api/poller.go | 14 +- .../pkg/agents/eval_api/portal_urls_test.go | 66 +++++++++ .../pkg/agents/{opteval => opt_eval}/state.go | 9 +- .../pkg/agents/{opteval => opt_eval}/yaml.go | 29 ++-- .../agents/{opteval => opt_eval}/yaml_test.go | 5 +- .../pkg/agents/optimize_api/client.go | 14 +- .../pkg/agents/optimize_api/models.go | 4 +- .../pkg/agents/optimize_api/models_test.go | 2 +- .../pkg/agents/optimize_api/poller.go | 32 +++-- .../pkg/agents/optimize_api/poller_test.go | 53 +++++++ .../internal/pkg/agents/transient.go | 25 ++++ .../internal/pkg/agents/transient_test.go | 34 +++++ 41 files changed, 957 insertions(+), 429 deletions(-) create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts_test.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls_test.go rename cli/azd/extensions/azure.ai.agents/internal/pkg/agents/{opteval => opt_eval}/state.go (93%) rename cli/azd/extensions/azure.ai.agents/internal/pkg/agents/{opteval => opt_eval}/yaml.go (95%) rename cli/azd/extensions/azure.ai.agents/internal/pkg/agents/{opteval => opt_eval}/yaml_test.go (98%) create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient.go create mode 100644 cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient_test.go diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 0a331ac0e26..3cfc8addf66 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -24,7 +24,7 @@ import ( "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/dataset_api" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/azure/azure-dev/cli/azd/pkg/output" @@ -38,12 +38,13 @@ const ( defaultEvalConfigName = "eval.yaml" defaultEvalName = "smoke-core" defaultEvalSamples = 15 + defaultEvalModel = "gpt-4o" ) // Type aliases to avoid repeating full package paths throughout the eval code. type evalConfig = eval_api.EvalConfig -type evalAgentRef = opteval.AgentRef -type evalDatasetRef = opteval.DatasetRef +type evalAgentRef = opt_eval.AgentRef +type evalDatasetRef = opt_eval.DatasetRef // evalResolvedContext holds the fully-resolved context for an eval operation, // including the azd client, API clients, project paths, and agent metadata. @@ -83,8 +84,12 @@ func newEvalCommand(extCtx *azdext.ExtensionContext) *cobra.Command { Short: "Create and run quick evals for an agent.", Long: `Create and run quick evals for an agent. -These commands are designed for quick agent eval onboarding under azd ai agent. -Use eval init to generate an eval config, then eval run to execute it.`, +Subcommands: + init Generate an eval config and dataset from a hosted agent + run Execute an evaluation run from eval.yaml + update Update an existing eval configuration + list List evaluations for the current project + show Show details of an evaluation run`, } cmd.AddCommand(newEvalInitCommand(extCtx)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index e01b6c8b7bc..f11fcfd3c95 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -18,7 +18,7 @@ import ( "path/filepath" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" @@ -88,7 +88,7 @@ func relativeDisplay(absPath, projectDir string) string { // reconcileConfigAgentName reconciles the agent name in a config with the // environment-resolved name. Environment takes precedence. Returns true if // the config was changed. Used by both eval run and optimize. -func reconcileConfigAgentName(agent *opteval.AgentRef, envName, configSource string) bool { +func reconcileConfigAgentName(agent *opt_eval.AgentRef, envName, configSource string) bool { if envName == "" || agent.Name == "" || agent.Name == envName { if envName != "" && agent.Name == "" { agent.Name = envName @@ -114,20 +114,20 @@ func reconcileConfigAgentName(agent *opteval.AgentRef, envName, configSource str // skill_dir, and tools_file. Eval init uses only instruction fields; // optimize also uses skill_dir and tools_file. func resolveAgentConfig( - existingConfig *opteval.Config, + existingConfig *opt_eval.Config, projectDir string, -) *opteval.AgentConfig { +) *opt_eval.AgentConfig { // Step 1: existing config has a config pointer — resolve from it. if existingConfig != nil && existingConfig.Agent.ConfigFile != "" { - ref := opteval.AgentRef{ConfigFile: existingConfig.Agent.ConfigFile} + ref := opt_eval.AgentRef{ConfigFile: existingConfig.Agent.ConfigFile} return ref.ResolveConfig(projectDir) } // Step 2: try the default baseline path. if projectDir != "" { - relPath := opteval.BaselineConfigRelPath() + relPath := opt_eval.BaselineConfigRelPath() if fileExists(filepath.Join(projectDir, relPath)) { - ref := opteval.AgentRef{ConfigFile: relPath} + ref := opt_eval.AgentRef{ConfigFile: relPath} return ref.ResolveConfig(projectDir) } } @@ -145,7 +145,7 @@ func writeBaselineIfNeeded( if projectDir == "" || instruction == "" { return "" } - defaultConfigFile := opteval.BaselineConfigRelPath() + defaultConfigFile := opt_eval.BaselineConfigRelPath() absConfigFile := filepath.Join(projectDir, defaultConfigFile) // Don't overwrite an existing baseline. if fileExists(absConfigFile) { @@ -174,7 +174,7 @@ type baselineParams struct { // When skillDir is empty, it auto-detects a "skills" or "skill" directory. // Used by both eval init and optimize. func writeBaselineConfig(agentProject string, p baselineParams) error { - baseDir := filepath.Join(agentProject, opteval.AgentConfigsDir, opteval.BaselineDir) + baseDir := filepath.Join(agentProject, opt_eval.AgentConfigsDir, opt_eval.BaselineDir) if err := os.MkdirAll(baseDir, 0750); err != nil { return fmt.Errorf("creating baseline directory: %w", err) } @@ -189,11 +189,11 @@ func writeBaselineConfig(agentProject string, p baselineParams) error { } if p.Instruction != "" { - instructionPath := filepath.Join(baseDir, opteval.InstructionFile) + instructionPath := filepath.Join(baseDir, opt_eval.InstructionFile) if err := os.WriteFile(instructionPath, []byte(p.Instruction), 0600); err != nil { return fmt.Errorf("writing baseline instructions: %w", err) } - meta.InstructionFile = opteval.InstructionFile + meta.InstructionFile = opt_eval.InstructionFile } // Resolve skill_dir: use explicit path, or auto-detect from project. @@ -228,7 +228,7 @@ func writeBaselineConfig(agentProject string, p baselineParams) error { return fmt.Errorf("serializing baseline metadata: %w", err) } - metaPath := filepath.Join(baseDir, opteval.MetadataFile) + metaPath := filepath.Join(baseDir, opt_eval.MetadataFile) if err := os.WriteFile(metaPath, data, 0600); err != nil { return fmt.Errorf("writing baseline metadata: %w", err) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go index 4785d793d3b..63d9f743f4c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers_test.go @@ -8,7 +8,7 @@ import ( "path/filepath" "testing" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -45,7 +45,7 @@ func TestReconcileConfigAgentName(t *testing.T) { t.Parallel() t.Run("no change when names match", func(t *testing.T) { t.Parallel() - agent := &opteval.AgentRef{Name: "my-agent"} + agent := &opt_eval.AgentRef{Name: "my-agent"} changed := reconcileConfigAgentName(agent, "my-agent", "config.yaml") assert.False(t, changed) assert.Equal(t, "my-agent", agent.Name) @@ -53,7 +53,7 @@ func TestReconcileConfigAgentName(t *testing.T) { t.Run("sets name when agent name is empty", func(t *testing.T) { t.Parallel() - agent := &opteval.AgentRef{} + agent := &opt_eval.AgentRef{} changed := reconcileConfigAgentName(agent, "env-agent", "config.yaml") assert.False(t, changed) assert.Equal(t, "env-agent", agent.Name) @@ -61,7 +61,7 @@ func TestReconcileConfigAgentName(t *testing.T) { t.Run("overrides when names differ", func(t *testing.T) { t.Parallel() - agent := &opteval.AgentRef{Name: "config-agent"} + agent := &opt_eval.AgentRef{Name: "config-agent"} changed := reconcileConfigAgentName(agent, "env-agent", "config.yaml") assert.True(t, changed) assert.Equal(t, "env-agent", agent.Name) @@ -69,7 +69,7 @@ func TestReconcileConfigAgentName(t *testing.T) { t.Run("no change when envName is empty", func(t *testing.T) { t.Parallel() - agent := &opteval.AgentRef{Name: "my-agent"} + agent := &opt_eval.AgentRef{Name: "my-agent"} changed := reconcileConfigAgentName(agent, "", "config.yaml") assert.False(t, changed) assert.Equal(t, "my-agent", agent.Name) @@ -136,10 +136,10 @@ func TestWriteBaselineConfig(t *testing.T) { }) require.NoError(t, err) - metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + metaPath := filepath.Join(dir, opt_eval.AgentConfigsDir, opt_eval.BaselineDir, opt_eval.MetadataFile) assert.FileExists(t, metaPath) - instrPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.InstructionFile) + instrPath := filepath.Join(dir, opt_eval.AgentConfigsDir, opt_eval.BaselineDir, opt_eval.InstructionFile) assert.FileExists(t, instrPath) content, err := os.ReadFile(instrPath) //nolint:gosec // test file path require.NoError(t, err) @@ -154,10 +154,10 @@ func TestWriteBaselineConfig(t *testing.T) { }) require.NoError(t, err) - metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + metaPath := filepath.Join(dir, opt_eval.AgentConfigsDir, opt_eval.BaselineDir, opt_eval.MetadataFile) assert.FileExists(t, metaPath) - instrPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.InstructionFile) + instrPath := filepath.Join(dir, opt_eval.AgentConfigsDir, opt_eval.BaselineDir, opt_eval.InstructionFile) assert.NoFileExists(t, instrPath) }) @@ -171,7 +171,7 @@ func TestWriteBaselineConfig(t *testing.T) { }) require.NoError(t, err) - metaPath := filepath.Join(dir, opteval.AgentConfigsDir, opteval.BaselineDir, opteval.MetadataFile) + metaPath := filepath.Join(dir, opt_eval.AgentConfigsDir, opt_eval.BaselineDir, opt_eval.MetadataFile) data, err := os.ReadFile(metaPath) //nolint:gosec // test file path require.NoError(t, err) assert.Contains(t, string(data), "skill_dir") @@ -194,7 +194,7 @@ func TestWriteBaselineIfNeeded(t *testing.T) { t.Parallel() dir := t.TempDir() // Create existing baseline. - absPath := filepath.Join(dir, opteval.BaselineConfigRelPath()) + absPath := filepath.Join(dir, opt_eval.BaselineConfigRelPath()) require.NoError(t, os.MkdirAll(filepath.Dir(absPath), 0750)) require.NoError(t, os.WriteFile(absPath, []byte("existing"), 0600)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go index b9b87076651..126e6fced5e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init.go @@ -17,7 +17,7 @@ import ( "path/filepath" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" @@ -82,12 +82,12 @@ the agent project root. Use --no-wait to write pending operation IDs and return. cmd.Flags().StringVar(&flags.agent, "agent", "", "Target agent name") cmd.Flags().StringVarP(&flags.projectEndpoint, "project-endpoint", "p", "", "Microsoft Foundry project endpoint URL") cmd.Flags().StringVarP(&flags.instruction, "gen-instruction", "g", "", "Agent instruction used for dataset and evaluator generation") - cmd.Flags().StringVarP(&flags.instructionFile, "gen-instruction-file", "G", "", "Path to a file containing the agent instruction") + cmd.Flags().StringVarP(&flags.instructionFile, "gen-instruction-file", "", "", "Path to a file containing the agent instruction") cmd.Flags().StringVar(&flags.evalModel, "eval-model", "", "Model used for evaluation and generation") cmd.Flags().StringVar(&flags.dataset, "dataset", "", "Existing local file or registered dataset name to use for evaluation (instead of generating a new dataset)") cmd.Flags().IntVar(&flags.maxSamples, "max-samples", defaultEvalSamples, "Number of samples to generate (15-1000)") cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, "Built-in or custom evaluator name") - cmd.Flags().StringVarP(&flags.output, "out-file", "O", defaultEvalConfigName, "Eval config path") + cmd.Flags().StringVar(&flags.output, "out-file", defaultEvalConfigName, "Eval config path") cmd.Flags().IntVar(&flags.traceDays, "trace-days", 0, "Include agent traces from the last N days for evaluator generation (0 = no traces)") cmd.Flags().BoolVar(&flags.resetDefaults, "reset-defaults", false, "Overwrite an existing eval config") @@ -139,7 +139,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // Resolve agent config: eval.yaml config → default baseline → nothing. if flags.instruction == "" && flags.instructionFile == "" && resolved.hasProject { - var existing *opteval.Config + var existing *opt_eval.Config if hasExisting && !flags.resetDefaults { existing = &existingCfg.Config } @@ -152,7 +152,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // If --reset-defaults is set, clear existing state so the user can start fresh. if flags.resetDefaults && resolved.envName != "" { - if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + if err := opt_eval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { log.Printf("warning: clearing eval state: %v", err) } } @@ -199,7 +199,9 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if flags.instruction == "" && flags.instructionFile == "" && flags.configFile == "" && (flags.dataset == "" || len(flags.evaluators) == 0) { - return fmt.Errorf("--gen-instruction is required when generating eval assets for a hosted agent") + return fmt.Errorf( + "one of --gen-instruction, --gen-instruction-file, --config, or both --dataset and --evaluators is required" + + " when generating eval assets for a hosted agent") } if flags.maxSamples < 15 || flags.maxSamples > 1000 { return fmt.Errorf("--max-samples must be between 15 and 1000") @@ -207,7 +209,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error // Build config and submit generation jobs. evalCfg := newEvalConfig(flags, resolved) - var extraEvals opteval.EvaluatorList + var extraEvals opt_eval.EvaluatorList if !isRegenerate && len(flags.evaluators) > 0 { extraEvals = evaluatorsFromFlags(flags.evaluators) } @@ -219,7 +221,7 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error if flags.noWait { if state.DatasetGenOpID != "" || state.EvalGenOpID != "" { - state.InitStatus = opteval.InitStatusPending + state.InitStatus = opt_eval.InitStatusPending } return writePendingEvalInit(ctx, resolved, configPath, evalCfg, state) } @@ -232,8 +234,8 @@ func runEvalInit(ctx context.Context, flags *evalInitFlags, noPrompt bool) error return err } - state.InitStatus = opteval.InitStatusCompleted - if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + state.InitStatus = opt_eval.InitStatusCompleted + if err := opt_eval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { log.Printf("warning: clearing eval state: %v", err) } return writeAndPrintEvalResult(ctx, resolved, evalCfg, pollRes, configPath, isRegenerate) @@ -286,8 +288,8 @@ func submitEvalJobs( evalCfg *evalConfig, existingCfg *evalConfig, isRegenerate bool, -) (*opteval.EvalState, error) { - state := &opteval.EvalState{} +) (*opt_eval.EvalState, error) { + state := &opt_eval.EvalState{} var needDatasetGen, needEvalGen bool if isRegenerate { @@ -348,7 +350,9 @@ func writeAndPrintEvalResult( } if resolved.hasProject { - eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) + if err := eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg); err != nil { + log.Printf("warning: writing eval review artifacts: %v", err) + } } if isRegenerate { fmt.Println(color.GreenString("\nEval suite regenerated")) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index 6f54495d111..a2d2b6a4245 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -18,7 +18,7 @@ import ( "sync" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/fatih/color" ) @@ -61,11 +61,11 @@ func newEvalConfig(flags *evalInitFlags, resolved *evalResolvedContext) *evalCon agent.Instruction.File = flags.instructionFile } return &evalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: resolveEvalName(flags), Agent: agent, }, - Options: &opteval.Options{ + Options: &opt_eval.Options{ EvalModel: flags.evalModel, }, MaxSamples: flags.maxSamples, @@ -142,10 +142,10 @@ func evaluatorFromJob(job *eval_api.GenerationJob) (string, string) { return job.ResolvedNameVersion() } -func evaluatorsFromFlags(values []string) opteval.EvaluatorList { - refs := make(opteval.EvaluatorList, len(values)) +func evaluatorsFromFlags(values []string) opt_eval.EvaluatorList { + refs := make(opt_eval.EvaluatorList, len(values)) for i, v := range values { - refs[i] = opteval.EvaluatorRef{Name: v} + refs[i] = opt_eval.EvaluatorRef{Name: v} } return refs } @@ -160,7 +160,7 @@ func resumeEvalInit( resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *opteval.EvalState, + state *opt_eval.EvalState, ) error { if _, err := pollAndFinalizeJobs(ctx, resolved, evalCfg, state, nil); err != nil { if _, ok := errors.AsType[*initTimeoutError](err); ok { @@ -168,12 +168,14 @@ func resumeEvalInit( } return err } - state.InitStatus = opteval.InitStatusCompleted - if err := opteval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { + state.InitStatus = opt_eval.InitStatusCompleted + if err := opt_eval.ClearEvalState(ctx, resolved.azdClient, resolved.envName); err != nil { log.Printf("warning: clearing eval state: %v", err) } if resolved.hasProject { - eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg) + if err := eval_api.WriteEvalReviewArtifacts(resolved.agentProject, evalCfg); err != nil { + log.Printf("warning: writing eval review artifacts: %v", err) + } } return eval_api.WriteEvalConfig(configPath, evalCfg) } @@ -193,8 +195,8 @@ func pollAndFinalizeJobs( ctx context.Context, resolved *evalResolvedContext, evalCfg *evalConfig, - state *opteval.EvalState, - extraEvals opteval.EvaluatorList, + state *opt_eval.EvalState, + extraEvals opt_eval.EvaluatorList, ) (*pollResults, error) { results := &pollResults{} // Each goroutine writes to distinct fields of evalCfg and state, so no @@ -223,9 +225,7 @@ func pollAndFinalizeJobs( progress.Start() if hasDataset { - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { var completed *eval_api.GenerationJob if needPollDataset { var err error @@ -277,13 +277,11 @@ func pollAndFinalizeJobs( dsRef.LocalURI = localURI } } - }() + }) } if hasEval { - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { var completed *eval_api.GenerationJob if needPollEval { var err error @@ -319,7 +317,7 @@ func pollAndFinalizeJobs( // Evaluator goroutine owns: state.EvalGenStatus, evalCfg.Evaluators. evalName, evalVersion := evaluatorFromJob(completed) state.EvalGenStatus = completed.NormalizedStatus() - evalRef := opteval.EvaluatorRef{ + evalRef := opt_eval.EvaluatorRef{ Name: evalName, Version: evalVersion, LocalURI: eval_api.EvaluatorLocalURI(evalName), @@ -329,9 +327,11 @@ func pollAndFinalizeJobs( results.EvaluatorResult = eval_api.ParseEvaluatorResult(completed.Result) if resolved.hasProject { - eval_api.SaveEvaluatorResult(resolved.agentProject, evalName, completed.Result) + if err := eval_api.SaveEvaluatorResult(resolved.agentProject, evalName, completed.Result); err != nil { + log.Printf("warning: saving evaluator result for %q: %v", evalName, err) + } } - }() + }) } wg.Wait() @@ -384,9 +384,9 @@ func writePendingEvalInit( resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *opteval.EvalState, + state *opt_eval.EvalState, ) error { - if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + if err := opt_eval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { @@ -412,10 +412,10 @@ func writeTimedOutEvalInit( resolved *evalResolvedContext, configPath string, evalCfg *evalConfig, - state *opteval.EvalState, + state *opt_eval.EvalState, ) error { - state.InitStatus = opteval.InitStatusPending - if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + state.InitStatus = opt_eval.InitStatusPending + if err := opt_eval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } if err := eval_api.WriteEvalConfig(configPath, evalCfg); err != nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go index d47f9393818..f923023617e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_test.go @@ -13,7 +13,7 @@ import ( "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/stretchr/testify/assert" @@ -61,12 +61,11 @@ func TestNewEvalInitCommand_NoArgs(t *testing.T) { assert.Error(t, cmd.Args(cmd, []string{"extra"})) } -func TestNewEvalInitCommand_ShortOutFile(t *testing.T) { +func TestNewEvalInitCommand_NoShortOutFile(t *testing.T) { t.Parallel() cmd := newEvalInitCommand(&azdext.ExtensionContext{}) - f := cmd.Flags().ShorthandLookup("O") - require.NotNil(t, f, "flag -O shorthand should exist") - assert.Equal(t, "out-file", f.Name) + f := cmd.Flags().ShorthandLookup("o") + assert.Nil(t, f, "flag -o shorthand must not exist (conflicts with azd global --output)") } // --------------------------------------------------------------------------- @@ -403,16 +402,16 @@ func TestBuildOpenAIEvalRequest(t *testing.T) { t.Parallel() cfg := &evalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "smoke-core", Agent: evalAgentRef{ Name: "agent-1", Version: "v1", }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opteval.EvaluatorList{{Name: "builtin.quality"}}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}}, }, - Options: &opteval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o"}, } req := buildOpenAIEvalRequest(cfg) @@ -434,7 +433,7 @@ func TestBuildOpenAIEvalRequest_WithDatasetFile(t *testing.T) { t.Parallel() cfg := &evalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "test-eval", Agent: evalAgentRef{Name: "agent-1"}, DatasetFile: "tasks.jsonl", @@ -489,13 +488,13 @@ func TestTryLoadExistingEvalConfig_Found(t *testing.T) { dir := t.TempDir() cfgPath := filepath.Join(dir, "eval.yaml") cfg := &evalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "smoke-core", Agent: evalAgentRef{ Name: "my-agent", }, DatasetFile: "data.jsonl", - Evaluators: opteval.EvaluatorList{{Name: "quality"}}, + Evaluators: opt_eval.EvaluatorList{{Name: "quality"}}, }, } require.NoError(t, eval_api.WriteEvalConfig(cfgPath, cfg)) @@ -504,7 +503,7 @@ func TestTryLoadExistingEvalConfig_Found(t *testing.T) { require.True(t, ok) assert.Equal(t, "smoke-core", loaded.Name) assert.Equal(t, "my-agent", loaded.Agent.Name) - assert.Equal(t, opteval.EvaluatorList{{Name: "quality"}}, loaded.Evaluators) + assert.Equal(t, opt_eval.EvaluatorList{{Name: "quality"}}, loaded.Evaluators) } func TestTryLoadExistingEvalConfig_NotFound(t *testing.T) { @@ -553,26 +552,26 @@ func TestSplitEvaluators(t *testing.T) { t.Parallel() tests := []struct { name string - input opteval.EvaluatorList - expectedGenerated opteval.EvaluatorList - expectedBuiltin opteval.EvaluatorList + input opt_eval.EvaluatorList + expectedGenerated opt_eval.EvaluatorList + expectedBuiltin opt_eval.EvaluatorList }{ { "mixed list", - opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "my-quality"}, {Name: "builtin.safety"}}, - opteval.EvaluatorList{{Name: "my-quality"}}, - opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, + opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "my-quality"}, {Name: "builtin.safety"}}, + opt_eval.EvaluatorList{{Name: "my-quality"}}, + opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, }, { "all builtin", - opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, + opt_eval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, nil, - opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, + opt_eval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, }, { "all generated", - opteval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, - opteval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, + opt_eval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, + opt_eval.EvaluatorList{{Name: "smoke-core"}, {Name: "custom-1"}}, nil, }, { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go index 4bff60cb9e2..51dc7fe9a54 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_list.go @@ -14,7 +14,7 @@ import ( "text/tabwriter" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/spf13/cobra" @@ -58,7 +58,7 @@ func runEvalList(ctx context.Context, flags *evalListFlags) error { // Load the active eval ID from the azd environment. var activeEvalID string if resolved.envName != "" { - state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opt_eval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) activeEvalID = state.EvalID } @@ -69,13 +69,18 @@ func runEvalList(ctx context.Context, flags *evalListFlags) error { items := resp.Data - // Fetch run summaries in parallel for each eval. + // Fetch run summaries in parallel for each eval, bounded by a semaphore + // to avoid overwhelming the service with concurrent requests. + const maxConcurrent = 5 + sem := make(chan struct{}, maxConcurrent) summaries := make([]evalRunSummary, len(items)) var wg sync.WaitGroup for i, item := range items { wg.Add(1) go func(idx int, evalID string) { defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() runs, err := resolved.evalClient.ListOpenAIEvalRuns(ctx, evalID, 10, DefaultAgentAPIVersion) if err != nil || runs == nil { return diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index d8892430ed1..28361af71de 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -14,8 +14,10 @@ import ( "strings" "time" + "azureaiagent/internal/exterrors" + "azureaiagent/internal/pkg/agents" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" @@ -83,9 +85,9 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { } } - state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opt_eval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) - if state.InitStatus == opteval.InitStatusPending { + if state.InitStatus == opt_eval.InitStatusPending { if err := resumeEvalInit(ctx, resolved, configPath, evalCfg, state); err != nil { return err } @@ -117,7 +119,7 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { evalID = evalCfg.Name } state.EvalID = evalID - if err := opteval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { + if err := opt_eval.SaveEvalState(ctx, resolved.azdClient, resolved.envName, state); err != nil { return err } } @@ -225,6 +227,13 @@ func resolveRunName( return defaultName } +// Default polling constants for eval run monitoring. +const ( + defaultEvalPollInterval = 5 * time.Second + defaultEvalMaxAttempts = 360 // ~30 minutes at 5s intervals + maxConsecutiveTransientErr = 5 +) + // pollEvalRun polls an eval run until it reaches a terminal status. // Terminal statuses: "completed", "failed", "canceled". func pollEvalRun( @@ -232,29 +241,32 @@ func pollEvalRun( client *eval_api.EvalClient, evalID, runID string, ) (*eval_api.OpenAIEvalRun, error) { - const ( - interval = 5 * time.Second - maxAttempts = 360 // ~30 minutes - ) - progress := newEvalProgress() progress.Start() defer progress.Stop() progress.setRunning("Eval run", runID) - for range maxAttempts { + consecutiveTransient := 0 + for range defaultEvalMaxAttempts { select { case <-ctx.Done(): return nil, ctx.Err() - case <-time.After(interval): + case <-time.After(defaultEvalPollInterval): } run, err := client.GetOpenAIEvalRun(ctx, evalID, runID, DefaultAgentAPIVersion) if err != nil { + if agents.IsTransientError(err) { + consecutiveTransient++ + if consecutiveTransient <= maxConsecutiveTransientErr { + continue + } + } progress.setFailed("Eval run") return nil, fmt.Errorf("failed to poll eval run: %w", err) } + consecutiveTransient = 0 switch run.Status { case "completed": @@ -266,22 +278,26 @@ func pollEvalRun( if run.Error != nil { errMsg = fmt.Sprintf("eval run failed: %v", run.Error) } - return nil, fmt.Errorf("%s", errMsg) + return nil, exterrors.Dependency( + exterrors.CodeEvalRunFailed, errMsg, + "check eval run details with 'azd ai agent eval show'") case "canceled", "cancelled": progress.setFailed("Eval run") - return nil, fmt.Errorf("eval run was canceled") + return nil, exterrors.Cancelled("eval run was canceled") } } progress.setTimedOut("Eval run") - return nil, fmt.Errorf("eval run %s did not complete within %d attempts", runID, maxAttempts) + return nil, fmt.Errorf( + "eval run %s did not complete within %d attempts", + runID, defaultEvalMaxAttempts) } // buildDatasetFileID constructs an azureai:// URI for a remote dataset reference. // Format: azureai://accounts//projects//data//versions/ // The account and project are extracted from the project endpoint URL // (https://.services.ai.azure.com/api/projects/). -func buildDatasetFileID(projectEndpoint string, ref *opteval.DatasetRef) string { +func buildDatasetFileID(projectEndpoint string, ref *opt_eval.DatasetRef) string { account, project := parseProjectEndpoint(projectEndpoint) version := ref.Version if version == "" { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go index c608e7989c0..c3960bef7e0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run_test.go @@ -8,7 +8,7 @@ import ( "path/filepath" "testing" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -124,19 +124,19 @@ func TestBuildDatasetFileID(t *testing.T) { tests := []struct { name string endpoint string - ref *opteval.DatasetRef + ref *opt_eval.DatasetRef expected string }{ { "with version", "https://foundryljm7.services.ai.azure.com/api/projects/projectljm7", - &opteval.DatasetRef{Name: "bugbash-mt-sim-scenarios", Version: "1"}, + &opt_eval.DatasetRef{Name: "bugbash-mt-sim-scenarios", Version: "1"}, "azureai://accounts/foundryljm7/projects/projectljm7/data/bugbash-mt-sim-scenarios/versions/1", }, { "default version", "https://myaccount.services.ai.azure.com/api/projects/myproject", - &opteval.DatasetRef{Name: "my-dataset"}, + &opt_eval.DatasetRef{Name: "my-dataset"}, "azureai://accounts/myaccount/projects/myproject/data/my-dataset/versions/1", }, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go index f765bbbb1f7..22e77f4ad88 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_show.go @@ -13,7 +13,7 @@ import ( "text/tabwriter" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" "github.com/fatih/color" @@ -63,7 +63,7 @@ func runEvalShow(ctx context.Context, evalID string, flags *evalShowFlags) error // Fall back to the eval ID stored in the azd environment. if evalID == "" && resolved.envName != "" { - state := opteval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) + state := opt_eval.LoadEvalState(ctx, resolved.azdClient, resolved.envName) evalID = state.EvalID } if evalID == "" { @@ -132,7 +132,7 @@ func printEvalSummary(evalObj *eval_api.OpenAIEval, runs []eval_api.OpenAIEvalRu if err := w.Flush(); err != nil { return err } - fmt.Printf("\n(showing %d of %d runs — use --limit to change)\n", min(limit, len(runs)), len(runs)) + fmt.Printf("\n(showing %d runs — use --limit to change)\n", min(limit, len(runs))) return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 62de63b1140..6d4b2115ebf 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -16,7 +16,7 @@ import ( "azureaiagent/internal/pkg/agents/agent_yaml" "azureaiagent/internal/pkg/agents/dataset_api" "azureaiagent/internal/pkg/agents/eval_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" @@ -207,9 +207,10 @@ func TestWriteEvalReviewArtifacts(t *testing.T) { cfg := &evalConfig{} cfg.DatasetReference = &evalDatasetRef{Name: "test-data", Version: "v1"} - cfg.Evaluators = opteval.EvaluatorList{{Name: "quality"}} + cfg.Evaluators = opt_eval.EvaluatorList{{Name: "quality"}} - eval_api.WriteEvalReviewArtifacts(dir, cfg) + err := eval_api.WriteEvalReviewArtifacts(dir, cfg) + require.NoError(t, err) evPath := filepath.Join(dir, "evaluators", "quality", "quality.yaml") assert.FileExists(t, evPath) @@ -221,7 +222,8 @@ func TestWriteEvalReviewArtifacts_NilDataset(t *testing.T) { cfg := &evalConfig{} // No dataset reference — should not panic. - eval_api.WriteEvalReviewArtifacts(dir, cfg) + err := eval_api.WriteEvalReviewArtifacts(dir, cfg) + require.NoError(t, err) } // --------------------------------------------------------------------------- @@ -233,7 +235,7 @@ func TestSaveEvaluatorResult(t *testing.T) { dir := t.TempDir() result := json.RawMessage(`{"name":"smoke-core","definition":{"type":"rubric","dimensions":[{"id":"quality","weight":10}]}}`) - eval_api.SaveEvaluatorResult(dir, "smoke-core", result) + require.NoError(t, eval_api.SaveEvaluatorResult(dir, "smoke-core", result)) path := filepath.Join(dir, "evaluators", "smoke-core", "rubric_dimensions.json") assert.FileExists(t, path) @@ -250,7 +252,7 @@ func TestSaveEvaluatorResult_WithVersion(t *testing.T) { dir := t.TempDir() result := json.RawMessage(`{"name":"custom","definition":{"type":"rubric","dimensions":[{"id":"d1","weight":5}]}}`) - eval_api.SaveEvaluatorResult(dir, "custom", result) + require.NoError(t, eval_api.SaveEvaluatorResult(dir, "custom", result)) path := filepath.Join(dir, "evaluators", "custom", "rubric_dimensions.json") assert.FileExists(t, path) @@ -260,7 +262,7 @@ func TestSaveEvaluatorResult_NilResult(t *testing.T) { t.Parallel() dir := t.TempDir() - eval_api.SaveEvaluatorResult(dir, "test", nil) + require.NoError(t, eval_api.SaveEvaluatorResult(dir, "test", nil)) path := filepath.Join(dir, "evaluators", "test", "rubric_dimensions.json") assert.NoFileExists(t, path) } @@ -269,7 +271,7 @@ func TestSaveEvaluatorResult_EmptyName(t *testing.T) { t.Parallel() dir := t.TempDir() - eval_api.SaveEvaluatorResult(dir, "", json.RawMessage(`{"name":"x"}`)) + require.NoError(t, eval_api.SaveEvaluatorResult(dir, "", json.RawMessage(`{"name":"x"}`))) // Should not create any file. matches, _ := filepath.Glob(filepath.Join(dir, "evaluators", "*.json")) assert.Empty(t, matches) @@ -280,11 +282,12 @@ func TestWriteEvalReviewArtifacts_SkipsWhenResultExists(t *testing.T) { dir := t.TempDir() // Pre-save a result file. - eval_api.SaveEvaluatorResult(dir, "quality", json.RawMessage(`{"name":"quality","definition":{"type":"rubric","dimensions":[{"id":"q","weight":1}]}}`)) + require.NoError(t, eval_api.SaveEvaluatorResult(dir, "quality", json.RawMessage(`{"name":"quality","definition":{"type":"rubric","dimensions":[{"id":"q","weight":1}]}}`))) cfg := &evalConfig{} - cfg.Evaluators = opteval.EvaluatorList{{Name: "quality"}} - eval_api.WriteEvalReviewArtifacts(dir, cfg) + cfg.Evaluators = opt_eval.EvaluatorList{{Name: "quality"}} + err := eval_api.WriteEvalReviewArtifacts(dir, cfg) + require.NoError(t, err) // Should NOT create a .yaml stub since .json result already exists. yamlPath := filepath.Join(dir, "evaluators", "quality", "quality.yaml") @@ -336,7 +339,7 @@ func TestDatasetArtifactPath(t *testing.T) { result := eval_api.DatasetArtifactPath("/project", ref) assert.Equal(t, filepath.Join("/project", "datasets", "golden"), result) - // No version — same path + // No version — same path, version not included. refNoVer := &evalDatasetRef{Name: "golden", Version: ""} resultNoVer := eval_api.DatasetArtifactPath("/project", refNoVer) assert.Equal(t, filepath.Join("/project", "datasets", "golden"), resultNoVer) @@ -408,18 +411,18 @@ func TestEvalConfigRoundTrip(t *testing.T) { path := filepath.Join(dir, "eval.yaml") original := &evalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "smoke-core", Agent: evalAgentRef{ Name: "my-agent", Kind: agent_yaml.AgentKindHosted, Version: "v1", - Instruction: opteval.InstructionRef{Value: "Test this agent"}, + Instruction: opt_eval.InstructionRef{Value: "Test this agent"}, }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, - Options: &opteval.Options{ + Options: &opt_eval.Options{ EvalModel: "gpt-4o", }, MaxSamples: 50, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go index 5f018f4aad5..f6a375fbfa1 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen.go @@ -248,7 +248,7 @@ func postdeployHandler(ctx context.Context, azdClient *azdext.AzdClient, args *a return fmt.Errorf("agent identity RBAC setup failed: %w", err) } - // Report optimization candidate deployments to FAOS. + // Report optimization candidate deployments to the optimization service. // If a service has AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID in the azd environment, // the agent was deployed from an optimization candidate. We notify the // optimization service so it can track which candidates have been deployed. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 55b90c2d960..8e5d685653a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -21,7 +21,7 @@ import ( "strings" "time" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -136,8 +136,9 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi cmd.Flags().StringVarP(&flags.configFile, "config", "c", "", "Path to YAML config file (optional — uses defaults if omitted)") cmd.Flags().StringVarP(&flags.agent, "agent", "a", "", "Agent name (auto-detected from azd project if omitted)") - cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", "gpt-4.1-mini", "Model for evaluation") - cmd.Flags().StringArrayVarP(&flags.targetAttributes, "target", "s", nil, "Target attribute for optimization: instruction, skill (repeatable)") + cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", defaultEvalModel, "Model for evaluation") + cmd.Flags().StringArrayVarP(&flags.targetAttributes, "target", "t", nil, + "Target attribute for optimization: instruction, skill (repeatable)") cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit job and return immediately without waiting for completion") cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") flags.optimizeConnectionFlags.register(cmd) @@ -300,10 +301,9 @@ func (a *OptimizeAction) applyOverrides( } // Resolve relative tools_file against agent project directory. - // TODO: re-enable when tools optimization is supported in the service. - // if cfg.ToolsFile != "" && hasProject && !filepath.IsAbs(cfg.ToolsFile) { - // cfg.ToolsFile = filepath.Join(agentProject, cfg.ToolsFile) - // } + if cfg.ToolsFile != "" && hasProject && !filepath.IsAbs(cfg.ToolsFile) { + cfg.ToolsFile = filepath.Join(agentProject, cfg.ToolsFile) + } // Resolve agent instruction using a well-defined lifecycle: // 1. Config dir pointer (agent.config in eval.yaml) — resolves from metadata.yaml @@ -333,9 +333,9 @@ func (a *OptimizeAction) applyOverrides( // mergeAgentBaseline resolves the baseline agent config and merges missing // fields (instruction, model, skills, tools) into the OptimizeConfig. func mergeAgentBaseline(cfg *OptimizeConfig, agentProject string) { - var existing *opteval.Config + var existing *opt_eval.Config if cfg.Agent.ConfigFile != "" { - existing = &opteval.Config{Agent: cfg.Agent} + existing = &opt_eval.Config{Agent: cfg.Agent} } agentCfg := resolveAgentConfig(existing, agentProject) if agentCfg == nil { @@ -395,7 +395,7 @@ func (a *OptimizeAction) submitJob( }); err != nil { fmt.Fprintf(out, " warning: failed to save baseline config: %s\n", err) } else { - baselineMetaPath := opteval.BaselineConfigRelPath() + baselineMetaPath := opt_eval.BaselineConfigRelPath() fmt.Fprintf(out, " Baseline saved to %s\n", baselineMetaPath) if cfg.Agent.ConfigFile == "" { cfg.Agent.ConfigFile = baselineMetaPath diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 80478cac6e5..60823566d99 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -20,7 +20,7 @@ import ( "path/filepath" "strings" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -30,7 +30,7 @@ import ( ) // agentConfigsDir aliases the shared constant for local use. -const agentConfigsDir = opteval.AgentConfigsDir +const agentConfigsDir = opt_eval.AgentConfigsDir // optimizeApplyFlags holds CLI flags for the optimize apply command. type optimizeApplyFlags struct { @@ -51,7 +51,7 @@ candidate and write them into your local azd project under .agent_configs/. After applying, run 'azd deploy' to deploy the optimized agent version.`, Example: ` # Apply candidate config locally, then deploy - azd ai agent optimize apply --candidate cand_abc123 + azd ai agent optimize apply --candidate candidate_abc123 azd deploy`, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, _ []string) error { @@ -147,7 +147,7 @@ func (a *OptimizeApplyAction) apply( if err := writeAgentConfigFromCandidate(candidateDir, candidateConfig); err != nil { return fmt.Errorf("failed to write candidate config: %w", err) } - fmt.Fprintf(out, " → %s\n", filepath.Join(candidateDir, opteval.MetadataFile)) + fmt.Fprintf(out, " → %s\n", filepath.Join(candidateDir, opt_eval.MetadataFile)) // Step 3: Write OPTIMIZATION_LOCAL_DIR and OPTIMIZATION_CANDIDATE_ID into agent.yaml // so the deploy pipeline knows which local optimization config to use. @@ -188,7 +188,7 @@ func (a *OptimizeApplyAction) apply( printPromptDiff(out, serviceDir, a.flags.candidate, candidateConfig) // Point the user to the config folders for other differences (skills, tools, etc.). - baselinePath := filepath.Join(serviceDir, agentConfigsDir, opteval.BaselineDir) + baselinePath := filepath.Join(serviceDir, agentConfigsDir, opt_eval.BaselineDir) candidatePath := filepath.Join(serviceDir, agentConfigsDir, a.flags.candidate) fmt.Fprintf(out, "\n For other changes (skills, tools, etc.), compare the files in:\n") fmt.Fprintf(out, " Baseline: %s\n", color.CyanString(baselinePath)) @@ -216,8 +216,8 @@ type agentConfigMetadata struct { // /.agent_configs/baseline/metadata.yaml and resolves // file pointers to absolute paths. func loadBaselineConfig(agentProject string) (*agentConfigMetadata, error) { - baseDir := filepath.Join(agentProject, agentConfigsDir, opteval.BaselineDir) - metaPath := filepath.Join(baseDir, opteval.MetadataFile) + baseDir := filepath.Join(agentProject, agentConfigsDir, opt_eval.BaselineDir) + metaPath := filepath.Join(baseDir, opt_eval.MetadataFile) data, err := os.ReadFile(metaPath) //nolint:gosec // path derived from project directory if err != nil { return nil, err @@ -259,15 +259,30 @@ func (m *agentConfigMetadata) resolveSkillDir(configDir string) string { return filepath.Join(configDir, m.SkillDir) } +// resolveToolsFile returns the absolute path to the tools file, +// resolved relative to configDir. Returns empty if not set. +func (m *agentConfigMetadata) resolveToolsFile(configDir string) string { + if m.ToolsFile == "" { + return "" + } + if filepath.IsAbs(m.ToolsFile) { + return m.ToolsFile + } + return filepath.Join(configDir, m.ToolsFile) +} + // writeAgentConfigFromCandidate writes metadata.yaml, instructions.md, skill // files, and tool definitions for an optimization candidate into the given // directory. No config.json is written — all content is decomposed into // individual files with pointers in metadata.yaml. -func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) error { +func writeAgentConfigFromCandidate(candidateDir string, rawConfig json.RawMessage) error { meta := agentConfigMetadata{} - // Extract fields from the candidate config map. - m, _ := candidateConfig.(map[string]any) + // Unmarshal the raw JSON into a generic map for field extraction. + var m map[string]any + if err := json.Unmarshal(rawConfig, &m); err != nil { + return fmt.Errorf("parsing candidate config JSON: %w", err) + } if m != nil { if v, exists := m["name"]; exists { if s, ok := v.(string); ok { @@ -287,13 +302,13 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err } // Write instructions.md from the candidate's system prompt. - instructions := extractInstructions(candidateConfig) + instructions := extractInstructions(m) if instructions != "" { - instructionPath := filepath.Join(candidateDir, opteval.InstructionFile) + instructionPath := filepath.Join(candidateDir, opt_eval.InstructionFile) if err := os.WriteFile(instructionPath, []byte(instructions), 0600); err != nil { return fmt.Errorf("writing candidate instructions: %w", err) } - meta.InstructionFile = opteval.InstructionFile + meta.InstructionFile = opt_eval.InstructionFile } // Write inline skills from the candidate config as individual files. @@ -304,9 +319,9 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err } // Set skill_dir pointer if the skills/ dir exists (from inline or downloaded skills). - skillDir := filepath.Join(candidateDir, opteval.SkillsDir) + skillDir := filepath.Join(candidateDir, opt_eval.SkillsDir) if info, err := os.Stat(skillDir); err == nil && info.IsDir() { - meta.SkillDir = opteval.SkillsDir + meta.SkillDir = opt_eval.SkillsDir } // Write tool_definitions as a JSON file. @@ -314,8 +329,8 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err if err := writeToolDefinitions(candidateDir, m); err != nil { return fmt.Errorf("writing candidate tool definitions: %w", err) } - if _, err := os.Stat(filepath.Join(candidateDir, opteval.ToolsFile)); err == nil { - meta.ToolsFile = opteval.ToolsFile + if _, err := os.Stat(filepath.Join(candidateDir, opt_eval.ToolsFile)); err == nil { + meta.ToolsFile = opt_eval.ToolsFile } } @@ -324,7 +339,7 @@ func writeAgentConfigFromCandidate(candidateDir string, candidateConfig any) err if err != nil { return fmt.Errorf("serializing candidate metadata: %w", err) } - metaPath := filepath.Join(candidateDir, opteval.MetadataFile) + metaPath := filepath.Join(candidateDir, opt_eval.MetadataFile) if err := os.WriteFile(metaPath, data, 0600); err != nil { return fmt.Errorf("writing candidate metadata: %w", err) } @@ -358,7 +373,7 @@ func writeInlineSkills(candidateDir string, config map[string]any) error { body, _ := sm["body"].(string) description, _ := sm["description"].(string) - skillSubDir := filepath.Join(candidateDir, opteval.SkillsDir, name) + skillSubDir := filepath.Join(candidateDir, opt_eval.SkillsDir, name) if err := os.MkdirAll(skillSubDir, 0750); err != nil { return fmt.Errorf("creating skill directory %s: %w", name, err) } @@ -399,7 +414,7 @@ func writeToolDefinitions(candidateDir string, config map[string]any) error { return fmt.Errorf("serializing tool definitions: %w", err) } - return os.WriteFile(filepath.Join(candidateDir, opteval.ToolsFile), data, 0600) + return os.WriteFile(filepath.Join(candidateDir, opt_eval.ToolsFile), data, 0600) } // downloadSkillFilesToDir fetches the candidate manifest, downloads all skill @@ -438,7 +453,7 @@ func downloadSkillFilesToDir( continue } - outPath, pathErr := opteval.SafePath(destDir, f.Path) + outPath, pathErr := opt_eval.SafePath(destDir, f.Path) if pathErr != nil { fmt.Fprintf(out, " warning: skipping file %s: path escapes destination directory\n", f.Path) continue @@ -471,7 +486,7 @@ func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { continue } name := entry.Name() - if name == opteval.BaselineDir || name == currentCandidate { + if name == opt_eval.BaselineDir || name == currentCandidate { continue } dir := filepath.Join(optimizeDir, name) @@ -485,9 +500,8 @@ func cleanOtherCandidates(optimizeDir, currentCandidate string, out io.Writer) { // extractInstructions retrieves the system prompt string from a candidate config // returned by the optimization service. -func extractInstructions(config any) string { - m, ok := config.(map[string]any) - if !ok { +func extractInstructions(m map[string]any) string { + if m == nil { return "" } if v, exists := m["systemPrompt"]; exists { @@ -508,13 +522,17 @@ const maxDiffPreviewLines = 4 // printPromptDiff displays an abbreviated instruction diff (baseline → optimized) // with a short preview of each. -func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateConfig any) { - optimized := extractInstructions(candidateConfig) +func printPromptDiff(out io.Writer, serviceDir, candidateID string, candidateConfig json.RawMessage) { + var m map[string]any + if err := json.Unmarshal(candidateConfig, &m); err != nil { + return + } + optimized := extractInstructions(m) if optimized == "" { return } - baseDir := filepath.Join(serviceDir, agentConfigsDir, opteval.BaselineDir) + baseDir := filepath.Join(serviceDir, agentConfigsDir, opt_eval.BaselineDir) baseline, err := loadBaselineConfig(serviceDir) if err != nil { return diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go index c645bc2d9a5..fc1ef13a373 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -5,12 +5,13 @@ package cmd import ( "bytes" + "encoding/json" "fmt" "os" "path/filepath" "testing" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -114,22 +115,22 @@ func TestPrintPromptDiff(t *testing.T) { dir := t.TempDir() // Set up baseline with metadata that points to an instruction file. - baselineDir := filepath.Join(dir, agentConfigsDir, opteval.BaselineDir) + baselineDir := filepath.Join(dir, agentConfigsDir, opt_eval.BaselineDir) require.NoError(t, os.MkdirAll(baselineDir, 0750)) require.NoError(t, os.WriteFile( - filepath.Join(baselineDir, opteval.InstructionFile), + filepath.Join(baselineDir, opt_eval.InstructionFile), []byte("You are a baseline assistant.\nLine two."), 0600, )) require.NoError(t, os.WriteFile( - filepath.Join(baselineDir, opteval.MetadataFile), + filepath.Join(baselineDir, opt_eval.MetadataFile), []byte("instruction_file: instructions.md\nmodel: gpt-4o\n"), 0600, )) - candidateConfig := map[string]any{ + candidateConfig := mustMarshal(t, map[string]any{ "systemPrompt": "You are an optimized assistant.\nNew line two.\nNew line three.", - } + }) var buf bytes.Buffer printPromptDiff(&buf, dir, "cand1", candidateConfig) @@ -145,7 +146,7 @@ func TestPrintPromptDiff(t *testing.T) { t.Run("no output when candidate has no instructions", func(t *testing.T) { t.Parallel() dir := t.TempDir() - candidateConfig := map[string]any{"model": "gpt-4o"} + candidateConfig := mustMarshal(t, map[string]any{"model": "gpt-4o"}) var buf bytes.Buffer printPromptDiff(&buf, dir, "cand1", candidateConfig) @@ -155,7 +156,7 @@ func TestPrintPromptDiff(t *testing.T) { t.Run("no output when baseline config missing", func(t *testing.T) { t.Parallel() dir := t.TempDir() - candidateConfig := map[string]any{"systemPrompt": "optimized"} + candidateConfig := mustMarshal(t, map[string]any{"systemPrompt": "optimized"}) var buf bytes.Buffer printPromptDiff(&buf, dir, "cand1", candidateConfig) @@ -167,15 +168,15 @@ func TestPrintPromptDiff(t *testing.T) { dir := t.TempDir() // Write metadata without instruction_file. - baselineDir := filepath.Join(dir, agentConfigsDir, opteval.BaselineDir) + baselineDir := filepath.Join(dir, agentConfigsDir, opt_eval.BaselineDir) require.NoError(t, os.MkdirAll(baselineDir, 0750)) require.NoError(t, os.WriteFile( - filepath.Join(baselineDir, opteval.MetadataFile), + filepath.Join(baselineDir, opt_eval.MetadataFile), []byte("model: gpt-4o\n"), 0600, )) - candidateConfig := map[string]any{"systemPrompt": "optimized"} + candidateConfig := mustMarshal(t, map[string]any{"systemPrompt": "optimized"}) var buf bytes.Buffer printPromptDiff(&buf, dir, "cand1", candidateConfig) @@ -183,13 +184,20 @@ func TestPrintPromptDiff(t *testing.T) { }) } +func mustMarshal(t *testing.T, v any) json.RawMessage { + t.Helper() + data, err := json.Marshal(v) + require.NoError(t, err) + return data +} + // ---- extractInstructions ---- func TestExtractInstructions(t *testing.T) { t.Parallel() tests := []struct { name string - config any + config map[string]any want string }{ { @@ -211,7 +219,6 @@ func TestExtractInstructions(t *testing.T) { "From systemPrompt", }, {"nil config", nil, ""}, - {"non-map config", "just a string", ""}, {"empty map", map[string]any{}, ""}, {"non-string value", map[string]any{"systemPrompt": 42}, ""}, } @@ -274,6 +281,29 @@ func TestAgentConfigMetadata_ResolveSkillDir(t *testing.T) { }) } +func TestAgentConfigMetadata_ResolveToolsFile(t *testing.T) { + t.Parallel() + t.Run("returns empty when not set", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{} + assert.Empty(t, meta.resolveToolsFile("/some/dir")) + }) + + t.Run("resolves relative path", func(t *testing.T) { + t.Parallel() + meta := &agentConfigMetadata{ToolsFile: "tools.json"} + result := meta.resolveToolsFile("/project/config") + assert.Equal(t, filepath.Join("/project/config", "tools.json"), result) + }) + + t.Run("preserves absolute path", func(t *testing.T) { + t.Parallel() + abs := filepath.Join(os.TempDir(), "absolute-tools.json") + meta := &agentConfigMetadata{ToolsFile: abs} + assert.Equal(t, abs, meta.resolveToolsFile("/any/dir")) + }) +} + // ---- writeAgentConfigFromCandidate ---- func TestWriteAgentConfigFromCandidate(t *testing.T) { @@ -281,19 +311,19 @@ func TestWriteAgentConfigFromCandidate(t *testing.T) { t.Run("writes metadata and instructions", func(t *testing.T) { t.Parallel() dir := t.TempDir() - config := map[string]any{ + config := mustMarshal(t, map[string]any{ "name": "test-agent", "model": "gpt-4o", "systemPrompt": "Test prompt.", - } + }) err := writeAgentConfigFromCandidate(dir, config) require.NoError(t, err) - assert.FileExists(t, filepath.Join(dir, opteval.MetadataFile)) - assert.FileExists(t, filepath.Join(dir, opteval.InstructionFile)) + assert.FileExists(t, filepath.Join(dir, opt_eval.MetadataFile)) + assert.FileExists(t, filepath.Join(dir, opt_eval.InstructionFile)) - content, err := os.ReadFile(filepath.Join(dir, opteval.InstructionFile)) //nolint:gosec // test file path + content, err := os.ReadFile(filepath.Join(dir, opt_eval.InstructionFile)) //nolint:gosec // test file path require.NoError(t, err) assert.Equal(t, "Test prompt.", string(content)) }) @@ -301,7 +331,7 @@ func TestWriteAgentConfigFromCandidate(t *testing.T) { t.Run("writes inline skills", func(t *testing.T) { t.Parallel() dir := t.TempDir() - config := map[string]any{ + config := mustMarshal(t, map[string]any{ "systemPrompt": "prompt", "skills": []any{ map[string]any{ @@ -310,21 +340,21 @@ func TestWriteAgentConfigFromCandidate(t *testing.T) { "body": "Search content here.", }, }, - } + }) err := writeAgentConfigFromCandidate(dir, config) require.NoError(t, err) - skillFile := filepath.Join(dir, opteval.SkillsDir, "search", "SKILL.md") + skillFile := filepath.Join(dir, opt_eval.SkillsDir, "search", "SKILL.md") assert.FileExists(t, skillFile) }) t.Run("handles nil config gracefully", func(t *testing.T) { t.Parallel() dir := t.TempDir() - err := writeAgentConfigFromCandidate(dir, nil) + err := writeAgentConfigFromCandidate(dir, json.RawMessage(`{}`)) require.NoError(t, err) - assert.FileExists(t, filepath.Join(dir, opteval.MetadataFile)) + assert.FileExists(t, filepath.Join(dir, opt_eval.MetadataFile)) }) } @@ -335,7 +365,7 @@ func TestCleanOtherCandidates(t *testing.T) { dir := t.TempDir() // Create baseline, current candidate, and old candidate directories. - require.NoError(t, os.MkdirAll(filepath.Join(dir, opteval.BaselineDir), 0750)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, opt_eval.BaselineDir), 0750)) require.NoError(t, os.MkdirAll(filepath.Join(dir, "cand_current"), 0750)) require.NoError(t, os.MkdirAll(filepath.Join(dir, "cand_old"), 0750)) @@ -343,7 +373,7 @@ func TestCleanOtherCandidates(t *testing.T) { cleanOtherCandidates(dir, "cand_current", &buf) // baseline and cand_current should remain; cand_old should be removed. - assert.DirExists(t, filepath.Join(dir, opteval.BaselineDir)) + assert.DirExists(t, filepath.Join(dir, opt_eval.BaselineDir)) assert.DirExists(t, filepath.Join(dir, "cand_current")) assert.NoDirExists(t, filepath.Join(dir, "cand_old")) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 0da075e3388..26be1ca628f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -4,18 +4,19 @@ // optimize_config.go defines OptimizeConfig (the YAML config structure for // optimization jobs), provides loading/validation, and converts configs into // API requests. It also handles reading skills from disk and parsing YAML -// frontmatter in skill files. +// preamble in skill files. package cmd import ( "bufio" + "encoding/json" "fmt" "os" "path/filepath" "strings" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "azureaiagent/internal/pkg/agents/optimize_api" "go.yaml.in/yaml/v3" @@ -23,12 +24,12 @@ import ( // OptimizeConfig extends the shared Config with optimize-specific fields. type OptimizeConfig struct { - opteval.Config `yaml:",inline"` + opt_eval.Config `yaml:",inline"` // Optimize-specific YAML fields. - ValidationReference *opteval.DatasetRef `yaml:"validation_reference,omitempty"` + ValidationReference *opt_eval.DatasetRef `yaml:"validation_reference,omitempty"` Criteria []OptimizeConfigCriterion `yaml:"criteria,omitempty"` - Options *opteval.Options `yaml:"options"` + Options *opt_eval.Options `yaml:"options"` InlineDataset []optimize_api.DatasetTask `yaml:"-"` // populated by defaultOptimizeConfig, not from YAML // Runtime-only: resolved skill directory and tools file (not serialized to YAML). @@ -86,13 +87,13 @@ func (c *OptimizeConfig) Validate() error { // evaluation dataset. func defaultOptimizeConfig(agentName string) *OptimizeConfig { return &OptimizeConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: agentName}, - Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: agentName}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, InlineDataset: defaultDataset, - Options: &opteval.Options{ - EvalModel: "gpt-4o", + Options: &opt_eval.Options{ + EvalModel: defaultEvalModel, Mode: "optimize", TargetAttributes: []string{"instruction", "skill"}, Budget: 5, @@ -219,21 +220,36 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi } // Load tool definitions if a tools file is specified. - // TODO: re-enable when tools optimization is supported in the service. - // if c.ToolsFile != "" { - // tools, err := loadToolDefinitions(c.ToolsFile) - // if err != nil { - // return nil, fmt.Errorf("loading tool definitions from %s: %w", c.ToolsFile, err) - // } - // req.Agent.ToolDefinitions = tools - // } + if c.ToolsFile != "" { + tools, err := loadToolDefinitions(c.ToolsFile) + if err != nil { + return nil, fmt.Errorf("loading tool definitions from %s: %w", c.ToolsFile, err) + } + req.Agent.ToolDefinitions = tools + } return req, nil } +// loadToolDefinitions reads an OpenAI-format tools JSON file and returns +// ToolDefinition entries for the optimize API request. +func loadToolDefinitions(path string) ([]optimize_api.ToolDefinition, error) { + data, err := os.ReadFile(path) //nolint:gosec // path derived from project tools file + if err != nil { + return nil, fmt.Errorf("reading tools file: %w", err) + } + + var tools []optimize_api.ToolDefinition + if err := json.Unmarshal(data, &tools); err != nil { + return nil, fmt.Errorf("parsing tools file: %w", err) + } + + return tools, nil +} + // loadSkillsFromDir reads skill files from a directory and returns SkillDefinitions. -// For markdown files (.md), YAML frontmatter is parsed to extract name and description; -// the content after the frontmatter becomes the skill body. +// For markdown files (.md), YAML preamble is parsed to extract name and description; +// the content after the preamble becomes the skill body. // For other files, the filename (without extension) is used as the name and the full // content as the body. // Subdirectories are recursed into — each file within is also loaded as a skill. @@ -268,15 +284,15 @@ func loadSkillsFromDir(dir string) ([]optimize_api.SkillDefinition, error) { return skills, nil } -// skillFrontmatter represents the YAML frontmatter in a skill markdown file. -type skillFrontmatter struct { +// skillPreamble represents the YAML preamble in a skill markdown file. +type skillPreamble struct { Name string `yaml:"name"` Description string `yaml:"description"` } // parseSkillFile parses a skill file. For .md files it attempts to extract -// YAML frontmatter (delimited by "---") for name and description; the body -// is the content after the frontmatter. For other files, the filename (sans +// YAML preamble (delimited by "---") for name and description; the body +// is the content after the preamble. For other files, the filename (sans // extension) is the name and the full content is the body. func parseSkillFile(filename, content string) optimize_api.SkillDefinition { ext := filepath.Ext(filename) @@ -289,15 +305,15 @@ func parseSkillFile(filename, content string) optimize_api.SkillDefinition { } } - // Try to parse YAML frontmatter from markdown. - fm, body := splitFrontmatter(content) + // Try to parse YAML preamble from markdown. + fm, body := splitPreamble(content) skill := optimize_api.SkillDefinition{ Name: baseName, Body: body, } if fm != "" { - var meta skillFrontmatter + var meta skillPreamble if err := yaml.Unmarshal([]byte(fm), &meta); err == nil { if meta.Name != "" { skill.Name = meta.Name @@ -309,10 +325,10 @@ func parseSkillFile(filename, content string) optimize_api.SkillDefinition { return skill } -// splitFrontmatter splits YAML frontmatter (between "---" delimiters) from -// the rest of the content. Returns (frontmatter, body). If no frontmatter is +// splitPreamble splits YAML preamble (between "---" delimiters) from +// the rest of the content. Returns (preamble, body). If no preamble is // found, returns ("", original content). -func splitFrontmatter(content string) (string, string) { +func splitPreamble(content string) (string, string) { const delimiter = "---" scanner := bufio.NewScanner(strings.NewReader(content)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index e8f99f214d6..2a4131e0c68 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -8,7 +8,7 @@ import ( "path/filepath" "testing" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/stretchr/testify/assert" @@ -113,10 +113,10 @@ func TestValidate_MissingAgentName(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ - Config: opteval.Config{ - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + Config: opt_eval.Config{ + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, - Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, } err := cfg.Validate() @@ -128,9 +128,9 @@ func TestValidate_MissingEvalModel(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent"}, - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, } @@ -143,12 +143,12 @@ func TestValidate_BothDatasetFileAndReference(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent"}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent"}, DatasetFile: "tasks.jsonl", - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, - Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, } err := cfg.Validate() @@ -160,8 +160,8 @@ func TestValidate_NeitherDatasetFileNorReference(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ - Config: opteval.Config{Agent: opteval.AgentRef{Name: "agent"}}, - Options: &opteval.Options{EvalModel: "gpt-4o-mini"}, + Config: opt_eval.Config{Agent: opt_eval.AgentRef{Name: "agent"}}, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, } err := cfg.Validate() @@ -299,7 +299,7 @@ options: // parseSkillFile / loadSkillsFromDir // --------------------------------------------------------------------------- -func TestParseSkillFile_MarkdownWithFrontmatter(t *testing.T) { +func TestParseSkillFile_MarkdownWithPreamble(t *testing.T) { t.Parallel() content := `--- name: policy-reviewer @@ -318,7 +318,7 @@ Review travel requests and provide a friendly assessment. assert.NotContains(t, skill.Body, "---") } -func TestParseSkillFile_MarkdownWithoutFrontmatter(t *testing.T) { +func TestParseSkillFile_MarkdownWithoutPreamble(t *testing.T) { t.Parallel() content := "# Simple Skill\n\nDo something useful.\n" skill := parseSkillFile("simple.md", content) @@ -336,7 +336,7 @@ func TestParseSkillFile_NonMarkdown(t *testing.T) { assert.Equal(t, content, skill.Body) } -func TestParseSkillFile_FrontmatterNameOnly(t *testing.T) { +func TestParseSkillFile_PreambleNameOnly(t *testing.T) { t.Parallel() content := "---\nname: custom-name\n---\nBody content here.\n" skill := parseSkillFile("ignored-filename.md", content) @@ -378,3 +378,101 @@ func TestLoadSkillsFromDir_WithMarkdownSkills(t *testing.T) { assert.Empty(t, txtSkill.Description) assert.Equal(t, txt, txtSkill.Body) } + +// --------------------------------------------------------------------------- +// loadToolDefinitions +// --------------------------------------------------------------------------- + +func TestLoadToolDefinitions_Valid(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + content := `[ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"] + } + } + }, + { + "type": "function", + "function": { + "name": "search", + "description": "Search the web" + } + } +]` + path := writeTestFile(t, dir, "tools.json", content) + + tools, err := loadToolDefinitions(path) + require.NoError(t, err) + require.Len(t, tools, 2) + + assert.Equal(t, "function", tools[0].Type) + assert.Equal(t, "get_weather", tools[0].Function.Name) + assert.Equal(t, "Get current weather for a location", tools[0].Function.Description) + assert.NotNil(t, tools[0].Function.Parameters) + + assert.Equal(t, "function", tools[1].Type) + assert.Equal(t, "search", tools[1].Function.Name) +} + +func TestLoadToolDefinitions_FileNotFound(t *testing.T) { + t.Parallel() + _, err := loadToolDefinitions(filepath.Join(t.TempDir(), "nonexistent.json")) + require.Error(t, err) + assert.Contains(t, err.Error(), "reading tools file") +} + +func TestLoadToolDefinitions_InvalidJSON(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := writeTestFile(t, dir, "tools.json", "not json") + + _, err := loadToolDefinitions(path) + require.Error(t, err) + assert.Contains(t, err.Error(), "parsing tools file") +} + +func TestLoadToolDefinitions_EmptyArray(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := writeTestFile(t, dir, "tools.json", "[]") + + tools, err := loadToolDefinitions(path) + require.NoError(t, err) + assert.Empty(t, tools) +} + +func TestToRequest_WithToolsFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + toolsContent := `[{"type":"function","function":{"name":"calculator","description":"Do math"}}]` + toolsPath := writeTestFile(t, dir, "tools.json", toolsContent) + + cfg := &OptimizeConfig{ + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "test-agent"}, + DatasetFile: writeTestFile(t, dir, "dataset.jsonl", `{"prompt":"test"}`), + }, + Options: &opt_eval.Options{ + EvalModel: "gpt-4o", + }, + ToolsFile: toolsPath, + } + + req, err := cfg.ToRequest("https://example.com") + require.NoError(t, err) + require.Len(t, req.Agent.ToolDefinitions, 1) + assert.Equal(t, "calculator", req.Agent.ToolDefinitions[0].Function.Name) + assert.Equal(t, "Do math", req.Agent.ToolDefinitions[0].Function.Description) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 0d243442761..5b6d23e1d19 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -45,10 +45,10 @@ func newOptimizeDeployCommand() *cobra.Command { This creates a new agent version with the optimized configuration applied. Use 'optimize apply' instead if you want to localize the config into your azd project first.`, Example: ` # Deploy candidate directly - azd ai agent optimize deploy --candidate cand_abc123 --agent my-agent + azd ai agent optimize deploy --candidate candidate_abc123 --agent my-agent # Deploy with explicit endpoint - azd ai agent optimize deploy --candidate cand_abc123 --agent my-agent --project-endpoint https://...`, + azd ai agent optimize deploy --candidate candidate_abc123 --agent my-agent --project-endpoint https://...`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { ctx := azdext.WithAccessToken(cmd.Context()) @@ -258,6 +258,8 @@ func resolveProjectEndpointForDeploy(ctx context.Context, connFlags *optimizeCon // isReservedEnvVarError checks if a version creation error is due to // the platform rejecting reserved AGENT_* or FOUNDRY_* environment variables. +// TODO: Use azcore.ResponseError.StatusCode + stable API error code when available, +// instead of brittle substring matching on server error wording. func isReservedEnvVarError(err error) bool { if err == nil { return false diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index c46717079d4..d54f206918f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -131,7 +131,7 @@ func printOptimizePortalLink(ctx context.Context, out io.Writer, agentName, oper }) } -// reportOptimizationDeployments reports optimization candidate deployments to FAOS. +// reportOptimizationDeployments reports optimization candidate deployments to the optimization service. // For each hosted agent service, if AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID is set in // the azd environment, it calls the promote API and then clears the env var. // This is best-effort — failures are logged but do not block the deploy. @@ -142,59 +142,71 @@ func reportOptimizationDeployments( envName, projectEndpoint string, newClient func(endpoint string) *optimize_api.OptimizeClient, ) { - defer func() { - if r := recover(); r != nil { - log.Printf("postdeploy: optimization deployment reporting panicked: %v", r) - } - }() - log.Printf("postdeploy: reporting optimization deployments for %d hosted agents", len(hostedAgents)) for _, svc := range hostedAgents { - serviceKey := toServiceKey(svc.Name) - candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) - - candidateResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ - EnvName: envName, - Key: candidateKey, - }) - if err != nil || candidateResp.Value == "" { - log.Printf("postdeploy: no optimization candidate for %s, skipping", svc.Name) - continue - } + func() { + defer func() { + if r := recover(); r != nil { + log.Printf("postdeploy: optimization reporting panicked for %s: %v", svc.Name, r) + } + }() + reportSvcOptimizationDeployment(ctx, azdClient, svc, envName, projectEndpoint, newClient) + }() + } +} - versionKey := fmt.Sprintf("AGENT_%s_VERSION", serviceKey) - versionResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ - EnvName: envName, - Key: versionKey, - }) - if err != nil || versionResp.Value == "" { - log.Printf("postdeploy: no version for %s, skipping", svc.Name) - continue - } +// reportSvcOptimizationDeployment reports a single service's optimization candidate. +func reportSvcOptimizationDeployment( + ctx context.Context, + azdClient *azdext.AzdClient, + svc *azdext.ServiceConfig, + envName, projectEndpoint string, + newClient func(endpoint string) *optimize_api.OptimizeClient, +) { + serviceKey := toServiceKey(svc.Name) + candidateKey := fmt.Sprintf("AGENT_%s_OPTIMIZATION_CANDIDATE_ID", serviceKey) - log.Printf("postdeploy: promoting candidate %s for %s (version %s)", - candidateResp.Value, svc.Name, versionResp.Value) - - optClient := newClient(projectEndpoint) - if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ - CandidateID: candidateResp.Value, - AgentName: svc.Name, - AgentVersion: versionResp.Value, - }); err != nil { - log.Printf("postdeploy: failed to report optimization deployment for %s: %v", svc.Name, err) - continue - } + candidateResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: candidateKey, + }) + if err != nil || candidateResp.Value == "" { + log.Printf("postdeploy: no optimization candidate for %s, skipping", svc.Name) + return + } + + versionKey := fmt.Sprintf("AGENT_%s_VERSION", serviceKey) + versionResp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: versionKey, + }) + if err != nil || versionResp.Value == "" { + log.Printf("postdeploy: no version for %s, skipping", svc.Name) + return + } - log.Printf("postdeploy: successfully promoted candidate %s for %s", candidateResp.Value, svc.Name) + log.Printf("postdeploy: promoting candidate %s for %s (version %s)", + candidateResp.Value, svc.Name, versionResp.Value) - // Clear the candidate ID after successful reporting. - if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ - EnvName: envName, - Key: candidateKey, - Value: "", - }); err != nil { - log.Printf("postdeploy: failed to clear %s: %v", candidateKey, err) - } + optClient := newClient(projectEndpoint) + if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ + CandidateID: candidateResp.Value, + AgentName: svc.Name, + AgentVersion: versionResp.Value, + }); err != nil { + log.Printf("postdeploy: failed to report optimization deployment for %s: %v", svc.Name, err) + return + } + + log.Printf("postdeploy: successfully promoted candidate %s for %s", candidateResp.Value, svc.Name) + + // Clear the candidate ID after successful reporting. + if _, err := azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: envName, + Key: candidateKey, + Value: "", + }); err != nil { + log.Printf("postdeploy: failed to clear %s: %v", candidateKey, err) } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go index 5dae37c0a8c..e94794847b9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go @@ -14,7 +14,7 @@ import ( "path/filepath" "strings" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/azure/azure-dev/cli/azd/pkg/azdext" ) @@ -275,27 +275,26 @@ func promptOptimizeConfigConfirmation(ctx context.Context, cfg *OptimizeConfig, cfg.SkillDir = "" } - // TODO: re-enable tools file prompt when tools optimization is supported. - // // Tools file. - // toolsDefault := relativeDisplay(cfg.ToolsFile, agentProject) - // resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ - // Options: &azdext.PromptOptions{ - // Message: "Tools file (enter to skip)", - // DefaultValue: toolsDefault, - // IgnoreHintKeys: true, - // }, - // }) - // if err != nil { - // return fmt.Errorf("prompting for tools file: %w", err) - // } - // if value := strings.TrimSpace(resp.Value); value != "" { - // if !filepath.IsAbs(value) && agentProject != "" { - // value = filepath.Join(agentProject, value) - // } - // cfg.ToolsFile = value - // } else { - // cfg.ToolsFile = "" - // } + // Tools file. + toolsDefault := relativeDisplay(cfg.ToolsFile, agentProject) + resp, err = prompt.Prompt(ctx, &azdext.PromptRequest{ + Options: &azdext.PromptOptions{ + Message: "Tools file (enter to skip)", + DefaultValue: toolsDefault, + IgnoreHintKeys: true, + }, + }) + if err != nil { + return fmt.Errorf("prompting for tools file: %w", err) + } + if value := strings.TrimSpace(resp.Value); value != "" { + if !filepath.IsAbs(value) && agentProject != "" { + value = filepath.Join(agentProject, value) + } + cfg.ToolsFile = value + } else { + cfg.ToolsFile = "" + } return nil } @@ -350,7 +349,7 @@ func resolveOptimizeTargetModels( if len(models) > 0 { if cfg.Options.TargetConfig == nil { - cfg.Options.TargetConfig = &opteval.TargetConfig{} + cfg.Options.TargetConfig = &opt_eval.TargetConfig{} } cfg.Options.TargetConfig.Model = models } diff --git a/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go b/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go index 4722c9e22e8..2bcc78683d6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go +++ b/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go @@ -163,3 +163,15 @@ const ( OpCreateToolboxVersion = "create_toolbox_version" OpGetToolbox = "get_toolbox" ) + +// Error codes for eval and optimize operations. +const ( + CodeEvalRunFailed = "eval_run_failed" + CodeEvalRunCancelled = "eval_run_cancelled" + CodeEvalRunTimeout = "eval_run_timeout" + CodeEvalConfigInvalid = "eval_config_invalid" + CodeOptimizeJobFailed = "optimize_job_failed" + CodeOptimizeJobTimeout = "optimize_job_timeout" + CodeInvalidTargetAttr = "invalid_target_attribute" + CodeReservedEnvVar = "reserved_env_var" +) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go index 6015b559c2b..48967aade35 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go @@ -20,6 +20,9 @@ type CreateDatasetRequest struct { } // Dataset is the response for dataset operations. +// Note: The GET /datasets API returns snake_case field names (data_uri, blob_uri, +// content_uri), while the POST /finalize API accepts camelCase (dataUri). +// Both conventions are correct for their respective endpoints. type Dataset struct { Name string `json:"name"` Version string `json:"version"` @@ -136,7 +139,7 @@ type FinalizeDatasetRequest struct { // // Rules: // 1. Empty → "1.0" -// 2. Parseable as a decimal number → increment by 1, format as "N.0" +// 2. Parsable as a decimal number → increment by 1, format as "N.0" // 3. Ends with trailing digits → increment the trailing numeric part // 4. Otherwise → append ".1" func NextVersion(current string) string { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go index bec24bd62df..a1d54bf47a5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations.go @@ -256,7 +256,7 @@ func (c *DatasetClient) ListContainerBlobs(ctx context.Context, containerSASUri } q := u.Query() - q.Set("restype", "container") + q.Set("restype", "container") // cspell:ignore restype — Azure Storage API query parameter q.Set("comp", "list") u.RawQuery = q.Encode() diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index b3d09bcac6f..f0d76baab3b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -14,7 +14,7 @@ import ( "time" "azureaiagent/internal/pkg/agents/dataset_api" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" ) // Artifact directory names relative to the agent project root. @@ -42,7 +42,7 @@ func DownloadDatasetArtifact( ctx context.Context, client *dataset_api.DatasetClient, agentProject string, - ref *opteval.DatasetRef, + ref *opt_eval.DatasetRef, apiVersion string, ) (string, error) { if ref == nil || ref.Name == "" { @@ -85,7 +85,7 @@ func DownloadDatasetArtifact( errs = append(errs, fmt.Errorf("downloading blob %q: %w", blobName, dlErr)) continue } - dest, pathErr := opteval.SafePath(destDir, blobName) + dest, pathErr := opt_eval.SafePath(destDir, blobName) if pathErr != nil { errs = append(errs, pathErr) continue @@ -153,7 +153,7 @@ func filenameFromURL(rawURL string) string { } // DatasetArtifactPath returns the local filesystem path for a downloaded dataset directory. -func DatasetArtifactPath(agentProject string, ref *opteval.DatasetRef) string { +func DatasetArtifactPath(agentProject string, ref *opt_eval.DatasetRef) string { if ref == nil || ref.Name == "" { return "" } @@ -180,28 +180,31 @@ func EvaluatorLocalURI(name string) string { // SaveEvaluatorResult extracts the rubric dimensions from the evaluator result // and saves them as the local artifact. Only dimensions are persisted so that // users can edit weights/descriptions and upload a new evaluator version. -func SaveEvaluatorResult(agentProject, evaluatorName string, result json.RawMessage) { +func SaveEvaluatorResult(agentProject, evaluatorName string, result json.RawMessage) error { if evaluatorName == "" || len(result) == 0 { - return + return nil } dir := evaluatorDir(agentProject, evaluatorName) if err := os.MkdirAll(dir, 0750); err != nil { - return + return fmt.Errorf("creating evaluator dir %q: %w", dir, err) } // Parse the evaluator result to extract the rubric dimensions. parsed := ParseEvaluatorResult(result) if parsed == nil || len(parsed.Definition.Dimensions) == 0 { - return + return nil } formatted, err := json.MarshalIndent(parsed.Definition.Dimensions, "", " ") if err != nil { - return + return fmt.Errorf("marshalling evaluator dimensions: %w", err) } path := filepath.Join(dir, EvaluatorContractFile) - _ = os.WriteFile(path, formatted, 0600) + if err := os.WriteFile(path, formatted, 0600); err != nil { + return fmt.Errorf("writing evaluator artifact %q: %w", path, err) + } + return nil } // PrintEvaluatorDimensions prints a compact table of rubric dimensions. @@ -217,16 +220,18 @@ func PrintEvaluatorDimensions(parsed *EvaluatorResult) { // WriteEvalReviewArtifacts writes human-readable review artifacts for evaluators. // It writes a stub YAML file for each evaluator unless a result JSON already exists. -func WriteEvalReviewArtifacts(agentProject string, cfg *EvalConfig) { +func WriteEvalReviewArtifacts(agentProject string, cfg *EvalConfig) error { if cfg == nil { - return + return nil } + var errs []error for _, evaluator := range cfg.Evaluators { if evaluator.Name == "" || IsBuiltinEvaluator(evaluator.Name) { continue } dir := evaluatorDir(agentProject, evaluator.Name) if err := os.MkdirAll(dir, 0750); err != nil { + errs = append(errs, fmt.Errorf("creating dir for evaluator %q: %w", evaluator.Name, err)) continue } // Skip if a result JSON already exists. @@ -236,8 +241,11 @@ func WriteEvalReviewArtifacts(agentProject string, cfg *EvalConfig) { } yamlPath := filepath.Join(dir, evaluator.Name+".yaml") stub := fmt.Sprintf("# Evaluator stub: %s\nname: %s\n", evaluator.Name, evaluator.Name) - _ = os.WriteFile(yamlPath, []byte(stub), 0600) + if err := os.WriteFile(yamlPath, []byte(stub), 0600); err != nil { + errs = append(errs, fmt.Errorf("writing evaluator stub %q: %w", yamlPath, err)) + } } + return errors.Join(errs...) } // WriteJSONFile writes a value as indented JSON to the specified path. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts_test.go new file mode 100644 index 00000000000..073497ef367 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts_test.go @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "path/filepath" + "testing" + + "azureaiagent/internal/pkg/agents/opt_eval" + + "github.com/stretchr/testify/assert" +) + +func TestDatasetArtifactPath_Basic(t *testing.T) { + t.Parallel() + ref := &opt_eval.DatasetRef{Name: "test-ds", Version: "v2"} + got := DatasetArtifactPath("/project", ref) + assert.Equal(t, filepath.Join("/project", "datasets", "test-ds"), got) +} + +func TestDatasetArtifactPath_NilRef(t *testing.T) { + t.Parallel() + got := DatasetArtifactPath("/project", nil) + assert.Empty(t, got) +} + +func TestDatasetArtifactPath_EmptyName(t *testing.T) { + t.Parallel() + ref := &opt_eval.DatasetRef{Name: ""} + got := DatasetArtifactPath("/project", ref) + assert.Empty(t, got) +} + +func TestDatasetLocalURI(t *testing.T) { + t.Parallel() + got := DatasetLocalURI("my-dataset") + assert.Equal(t, filepath.Join("datasets", "my-dataset"), got) +} + +func TestEvaluatorLocalURI(t *testing.T) { + t.Parallel() + got := EvaluatorLocalURI("coherence") + assert.Equal(t, filepath.Join("evaluators", "coherence", "rubric_dimensions.json"), got) +} + +func TestIsContainerSAS(t *testing.T) { + t.Parallel() + assert.True(t, isContainerSAS("https://blob.core.windows.net/container?sr=c&sig=abc")) + assert.False(t, isContainerSAS("https://blob.core.windows.net/container?sr=b&sig=abc")) + assert.False(t, isContainerSAS("https://blob.core.windows.net/container")) +} + +func TestFilenameFromURL(t *testing.T) { + t.Parallel() + assert.Equal(t, "data.jsonl", filenameFromURL("https://blob.core.windows.net/c/data.jsonl?sig=abc")) + assert.Equal(t, "data.jsonl", filenameFromURL("https://blob.core.windows.net/c/prefix/data.jsonl?sig=abc")) + assert.Equal(t, "data.jsonl", filenameFromURL("https://blob.core.windows.net/c/noext")) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index d6e4e62b7d4..0fb499f1a9a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -9,17 +9,18 @@ import ( "path/filepath" "strings" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/exterrors" + "azureaiagent/internal/pkg/agents/opt_eval" "go.yaml.in/yaml/v3" ) // EvalConfig extends the shared Config with eval-specific fields and helpers. type EvalConfig struct { - opteval.Config `yaml:",inline"` + opt_eval.Config `yaml:",inline"` // Options holds run-time options (eval_model, etc.). - Options *opteval.Options `yaml:"options,omitempty"` + Options *opt_eval.Options `yaml:"options,omitempty"` // MaxSamples is the maximum number of data samples to generate. MaxSamples int `yaml:"max_samples,omitempty"` @@ -63,8 +64,25 @@ func WriteEvalConfig(path string, cfg *EvalConfig) error { // Validate checks required fields for the eval command. func (c *EvalConfig) Validate() error { + if c.Name == "" { + return exterrors.Validation( + exterrors.CodeEvalConfigInvalid, + "name is required in the eval config", + "add a 'name:' field to your eval.yaml") + } + if c.Agent.Name == "" { - return fmt.Errorf("agent.name is required") + return exterrors.Validation( + exterrors.CodeEvalConfigInvalid, + "agent.name is required", + "add 'agent.name' to your eval.yaml or use --agent") + } + + if len(c.Evaluators) == 0 { + return exterrors.Validation( + exterrors.CodeEvalConfigInvalid, + "at least one evaluator is required", + "add an 'evaluators:' section to your eval.yaml or use --evaluator") } hasFile := c.DatasetFile != "" diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index e8e7b70e3cb..d571069ab43 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -9,7 +9,7 @@ import ( "testing" "azureaiagent/internal/pkg/agents/agent_yaml" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -19,13 +19,30 @@ import ( // Validate // --------------------------------------------------------------------------- +func TestValidate_RequiresName(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent-1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, + }, + } + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "name is required") +} + func TestValidate_RequiresAgentName(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{}, - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } err := cfg.Validate() @@ -33,12 +50,29 @@ func TestValidate_RequiresAgentName(t *testing.T) { assert.Contains(t, err.Error(), "agent.name is required") } +func TestValidate_RequiresEvaluators(t *testing.T) { + t.Parallel() + + cfg := &EvalConfig{ + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + }, + } + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "at least one evaluator") +} + func TestValidate_RequiresDataset(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent-1"}, + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } err := cfg.Validate() @@ -50,10 +84,12 @@ func TestValidate_MutuallyExclusiveDataset(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent-1"}, + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, DatasetFile: "tasks.jsonl", - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } err := cfg.Validate() @@ -65,9 +101,11 @@ func TestValidate_ValidWithDatasetFile(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent-1"}, + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, DatasetFile: "tasks.jsonl", + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } assert.NoError(t, cfg.Validate()) @@ -77,9 +115,11 @@ func TestValidate_ValidWithDatasetReference(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent-1"}, - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + Config: opt_eval.Config{ + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } assert.NoError(t, cfg.Validate()) @@ -95,19 +135,19 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { path := filepath.Join(dir, "eval.yaml") original := &EvalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "full-test", - Agent: opteval.AgentRef{ + Agent: opt_eval.AgentRef{ Name: "booking-agent", Kind: "hosted", Version: "v3", Model: "gpt-4.1", - Instruction: opteval.InstructionRef{Value: "This agent handles restaurant reservations"}, + Instruction: opt_eval.InstructionRef{Value: "This agent handles restaurant reservations"}, }, - DatasetReference: &opteval.DatasetRef{Name: "golden-data", Version: "v2"}, - Evaluators: opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, + DatasetReference: &opt_eval.DatasetRef{Name: "golden-data", Version: "v2"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, }, - Options: &opteval.Options{ + Options: &opt_eval.Options{ EvalModel: "gpt-4o", }, MaxSamples: 75, @@ -139,8 +179,8 @@ func TestEvalConfig_RoundTrip_MinimalFields(t *testing.T) { path := filepath.Join(dir, "eval.yaml") original := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "simple-agent"}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "simple-agent"}, DatasetFile: "data.jsonl", }, } @@ -179,8 +219,8 @@ func TestWriteEvalConfig_CreatesDirectory(t *testing.T) { path := filepath.Join(dir, "subdir", "nested", "eval.yaml") cfg := &EvalConfig{ - Config: opteval.Config{ - Agent: opteval.AgentRef{Name: "agent-1"}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent-1"}, }, } @@ -196,13 +236,13 @@ func TestToAgentTargetAdaptableEvalGroupRequest_WithEvaluators(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "test-eval", - Agent: opteval.AgentRef{Name: "agent-1", Version: "v1"}, - Evaluators: opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "custom-1"}}, + Agent: opt_eval.AgentRef{Name: "agent-1", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}, {Name: "custom-1"}}, DatasetFile: "tasks.jsonl", }, - Options: &opteval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o"}, } req := cfg.ToAgentTargetAdaptableEvalGroupRequest() @@ -224,10 +264,10 @@ func TestToAgentTargetAdaptableEvalGroupRequest_WithDatasetReference(t *testing. t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "ref-eval", - Agent: opteval.AgentRef{Name: "agent-1"}, - DatasetReference: &opteval.DatasetRef{Name: "ds", Version: "v1"}, + Agent: opt_eval.AgentRef{Name: "agent-1"}, + DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, }, } @@ -241,9 +281,9 @@ func TestToAgentTargetAdaptableEvalGroupRequest_NoEvaluators(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "test-eval", - Agent: opteval.AgentRef{Name: "agent-1"}, + Agent: opt_eval.AgentRef{Name: "agent-1"}, DatasetFile: "tasks.jsonl", }, } @@ -256,9 +296,9 @@ func TestToAgentTargetAdaptableEvalGroupRequest_MetadataFields(t *testing.T) { t.Parallel() cfg := &EvalConfig{ - Config: opteval.Config{ + Config: opt_eval.Config{ Name: "meta-test", - Agent: opteval.AgentRef{Name: "my-agent", Version: "v5"}, + Agent: opt_eval.AgentRef{Name: "my-agent", Version: "v5"}, DatasetFile: "tasks.jsonl", }, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index df2e6f8c8a1..47d9410083d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -8,7 +8,7 @@ import ( "strings" "time" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" ) // --------------------------------------------------------------------------- @@ -109,7 +109,7 @@ func IsBuiltinEvaluator(name string) bool { // SplitEvaluators partitions evaluators into generated (non-builtin) and // built-in lists. -func SplitEvaluators(evaluators opteval.EvaluatorList) (generated, builtin opteval.EvaluatorList) { +func SplitEvaluators(evaluators opt_eval.EvaluatorList) (generated, builtin opt_eval.EvaluatorList) { for _, e := range evaluators { if IsBuiltinEvaluator(e.Name) { builtin = append(builtin, e) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go index 2a08673d335..f4d95245408 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go @@ -6,7 +6,7 @@ package eval_api import ( "testing" - "azureaiagent/internal/pkg/agents/opteval" + "azureaiagent/internal/pkg/agents/opt_eval" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -94,20 +94,20 @@ func TestSplitEvaluators(t *testing.T) { t.Run("mixed", func(t *testing.T) { t.Parallel() - gen, bi := SplitEvaluators(opteval.EvaluatorList{ + gen, bi := SplitEvaluators(opt_eval.EvaluatorList{ {Name: "builtin.task_adherence"}, {Name: "my-quality"}, {Name: "builtin.safety"}, }) - assert.Equal(t, opteval.EvaluatorList{{Name: "my-quality"}}, gen) - assert.Equal(t, opteval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, bi) + assert.Equal(t, opt_eval.EvaluatorList{{Name: "my-quality"}}, gen) + assert.Equal(t, opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "builtin.safety"}}, bi) }) t.Run("all builtin", func(t *testing.T) { t.Parallel() - gen, bi := SplitEvaluators(opteval.EvaluatorList{ + gen, bi := SplitEvaluators(opt_eval.EvaluatorList{ {Name: "builtin.quality"}, {Name: "builtin.safety"}, }) assert.Nil(t, gen) - assert.Equal(t, opteval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, bi) + assert.Equal(t, opt_eval.EvaluatorList{{Name: "builtin.quality"}, {Name: "builtin.safety"}}, bi) }) t.Run("nil", func(t *testing.T) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go index 0a1b2c8541a..71e9df09788 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/poller.go @@ -5,13 +5,12 @@ package eval_api import ( "context" - "errors" "fmt" "log" "strings" "time" - "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "azureaiagent/internal/pkg/agents" ) // --------------------------------------------------------------------------- @@ -167,7 +166,7 @@ func (p *Poller) Poll(ctx context.Context) (*GenerationJob, error) { job, err := p.GetJob(ctx, p.OperationID, p.APIVersion) if err != nil { - if isTransientError(err) { + if agents.IsTransientError(err) { log.Printf("[poller] transient error polling %s, will retry: %v", p.OperationID, err) continue } @@ -194,12 +193,3 @@ func (p *Poller) Poll(ctx context.Context) (*GenerationJob, error) { Attempts: p.Options.MaxAttempts, } } - -// isTransientError checks whether an error represents a transient HTTP failure -// (429 Too Many Requests or 5xx Server Error) that is safe to retry. -func isTransientError(err error) bool { - if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { - return respErr.StatusCode == 429 || respErr.StatusCode >= 500 - } - return false -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls_test.go new file mode 100644 index 00000000000..264418af439 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/portal_urls_test.go @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package eval_api + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewPortalPrefix_Valid(t *testing.T) { + t.Parallel() + resID := "/subscriptions/00000000-0000-0000-0000-000000000001/resourceGroups/rg1/providers/Microsoft.CognitiveServices/accounts/acct1/projects/proj1" + prefix, err := NewPortalPrefix(resID) + require.NoError(t, err) + assert.Contains(t, prefix.prefix, "ai.azure.com/nextgen/r/") + assert.Contains(t, prefix.prefix, "rg1") + assert.Contains(t, prefix.prefix, "acct1") + assert.Contains(t, prefix.prefix, "proj1") +} + +func TestNewPortalPrefix_InvalidResourceID(t *testing.T) { + t.Parallel() + _, err := NewPortalPrefix("not-a-resource-id") + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse") +} + +func TestNewPortalPrefix_MissingParent(t *testing.T) { + t.Parallel() + // Resource ID without a parent (not a nested resource). + resID := "/subscriptions/00000000-0000-0000-0000-000000000001/resourceGroups/rg1/providers/Microsoft.CognitiveServices/accounts/acct1" + _, err := NewPortalPrefix(resID) + assert.Error(t, err) + assert.Contains(t, err.Error(), "Foundry project") +} + +func TestPortalPrefix_EvalRunURL(t *testing.T) { + t.Parallel() + p := &PortalPrefix{prefix: "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj"} + url := p.EvalRunURL("eval-123", "run-456") + assert.Equal(t, "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj/build/evaluations/eval-123/run/run-456", url) +} + +func TestPortalPrefix_EvaluatorURL(t *testing.T) { + t.Parallel() + p := &PortalPrefix{prefix: "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj"} + url := p.EvaluatorURL("coherence", "v1") + assert.Equal(t, "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj/build/evaluations/catalog/coherence/v1", url) +} + +func TestPortalPrefix_DatasetURL(t *testing.T) { + t.Parallel() + p := &PortalPrefix{prefix: "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj"} + url := p.DatasetURL("my-dataset", "v2") + assert.Equal(t, "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj/build/data/datasets/my-dataset/v2", url) +} + +func TestPortalPrefix_OptimizationURL(t *testing.T) { + t.Parallel() + p := &PortalPrefix{prefix: "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj"} + url := p.OptimizationURL("my-agent", "op-789") + assert.Equal(t, "https://ai.azure.com/nextgen/r/sub,rg,,acct,proj/build/agents/my-agent/optimization/op-789", url) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/state.go similarity index 93% rename from cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go rename to cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/state.go index 9d89c3401d9..078d807fd6f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/state.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/state.go @@ -5,12 +5,13 @@ // environment across CLI invocations. This covers eval job tracking and any // other cross-invocation state needed by eval, optimize, or related commands. -package opteval +package opt_eval import ( "context" "errors" "fmt" + "log" "github.com/azure/azure-dev/cli/azd/pkg/azdext" ) @@ -43,13 +44,15 @@ const ( ) // LoadEvalState reads eval runtime state from the azd environment. -// Returns an empty state if no values are set. +// Individual key-read errors are logged but do not prevent loading +// the remaining keys; a partial state is still useful for resume logic. func LoadEvalState(ctx context.Context, azdClient *azdext.AzdClient, envName string) *EvalState { get := func(key string) string { v, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ EnvName: envName, Key: key, }) - if err != nil || v.Value == "" { + if err != nil { + log.Printf("LoadEvalState: failed to read %s: %v", key, err) return "" } return v.Value diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go similarity index 95% rename from cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go rename to cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go index 251b4137e08..a3b09fea7a4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -package opteval +package opt_eval import ( "fmt" @@ -20,8 +20,12 @@ import ( func SafePath(baseDir, untrusted string) (string, error) { p := filepath.Join(baseDir, filepath.FromSlash(untrusted)) p = filepath.Clean(p) - if !strings.HasPrefix(p, filepath.Clean(baseDir)+string(filepath.Separator)) && - p != filepath.Clean(baseDir) { + + rel, err := filepath.Rel(baseDir, p) + if err != nil { + return "", fmt.Errorf("path %q escapes base directory", untrusted) + } + if rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) { return "", fmt.Errorf("path %q escapes base directory", untrusted) } return p, nil @@ -227,12 +231,13 @@ func (c *AgentConfig) ResolvedInstruction() string { // Optimize-specific fields (skill_dir, tools_file) are stored in // OptimizeConfig, not here, so eval.yaml stays target-agnostic. type AgentRef struct { - Name string `yaml:"name"` - Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` - Version string `yaml:"version,omitempty"` - ConfigFile string `yaml:"config,omitempty"` - Model string `yaml:"model,omitempty"` - Instruction InstructionRef `yaml:"instruction,omitempty"` + Name string `yaml:"name"` + Kind agent_yaml.AgentKind `yaml:"kind,omitempty"` + Version string `yaml:"version,omitempty"` + ConfigFile string `yaml:"config,omitempty"` + Model string `yaml:"model,omitempty"` + // Not expected to be shown in yaml + Instruction InstructionRef `yaml:"instruction,omitempty"` } // ResolveConfig loads the metadata.yaml pointed to by ConfigFile and returns @@ -378,7 +383,7 @@ type Options struct { TargetAttributes []string `yaml:"target_attributes,omitempty"` TargetConfig *TargetConfig `yaml:"target_config,omitempty"` Budget int `yaml:"budget,omitempty"` - MaxIterations int `yaml:"max_iterations,omitempty"` + MaxIterations *int `yaml:"max_iterations,omitempty"` MinImprovement float64 `yaml:"min_improvement,omitempty"` ImprovementThreshold float64 `yaml:"improvement_threshold,omitempty"` PassThreshold float64 `yaml:"pass_threshold,omitempty"` @@ -408,8 +413,8 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { } } - if o.MaxIterations <= 0 { - o.MaxIterations = 4 + if o.MaxIterations == nil { + o.MaxIterations = new(4) } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go similarity index 98% rename from cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go rename to cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go index a73009a0a66..4e5f5c3aaa5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opteval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -package opteval +package opt_eval import ( "path/filepath" @@ -209,7 +209,8 @@ reflection_model: gpt-4o assert.Equal(t, "full", opts.Mode) assert.Equal(t, []string{"prompt", "tool"}, opts.TargetAttributes) assert.Equal(t, 500, opts.Budget) - assert.Equal(t, 10, opts.MaxIterations) + require.NotNil(t, opts.MaxIterations) + assert.Equal(t, 10, *opts.MaxIterations) assert.InDelta(t, 0.05, opts.MinImprovement, 0.001) assert.InDelta(t, 0.1, opts.ImprovementThreshold, 0.001) assert.InDelta(t, 0.8, opts.PassThreshold, 0.001) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go index ce4a95ac328..0350168e538 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -222,7 +222,7 @@ func (c *OptimizeClient) CancelOptimize( } // ReportDeployment notifies the optimization service that a candidate has been -// deployed. This allows FAOS to track which candidates have been deployed. +// deployed. This allows the optimization service to track which candidates have been deployed. func (c *OptimizeClient) ReportDeployment( ctx context.Context, report *DeploymentReport, @@ -266,7 +266,7 @@ func (c *OptimizeClient) ReportDeployment( func (c *OptimizeClient) GetCandidateConfig( ctx context.Context, candidateID string, -) (any, error) { +) (json.RawMessage, error) { url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=%s", c.endpoint, netURL.PathEscape(candidateID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) @@ -289,14 +289,14 @@ func (c *OptimizeClient) GetCandidateConfig( return nil, fmt.Errorf("failed to read response body: %w", err) } - var config any - if err := json.Unmarshal(body, &config); err != nil { - return nil, fmt.Errorf("failed to parse candidate config: %w", err) + // Validate that the body is valid JSON. + if !json.Valid(body) { + return nil, fmt.Errorf("candidate config is not valid JSON") } - return config, nil + return json.RawMessage(body), nil } -// GetCandidate fetches the candidate manifest (metadata + file list) from FAOS. +// GetCandidate fetches the candidate manifest (metadata + file list) from the optimization service. // GET /optimize/candidates/{id} func (c *OptimizeClient) GetCandidate( ctx context.Context, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 97b21056521..212b949dfea 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -110,7 +110,7 @@ type TargetConfig struct { // OptimizeOptions controls the optimization run. type OptimizeOptions struct { Budget int `json:"budget,omitempty"` - MaxIterations int `json:"maxIterations,omitempty"` + MaxIterations *int `json:"maxIterations,omitempty"` MinImprovement float64 `json:"minImprovement,omitempty"` ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` PassThreshold float64 `json:"passThreshold,omitempty"` @@ -226,7 +226,7 @@ type OptimizeCancelResponse struct { // --- Deployment report --- -// DeploymentReport is sent to FAOS after a candidate is promoted, +// DeploymentReport is sent to the optimization service after a candidate is promoted, // creating the candidate→deployment mapping. type DeploymentReport struct { CandidateID string `json:"-"` // used in URL path, not serialized diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index 546ed43748d..a4f3b8ebda8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -46,7 +46,7 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { }, Options: OptimizeOptions{ Budget: 100, - MaxIterations: 5, + MaxIterations: new(5), MinImprovement: 0.01, ImprovementThreshold: 0.05, PassThreshold: 0.8, diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go index dedbfaa0ac7..7e4acec518d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go @@ -7,12 +7,11 @@ package optimize_api import ( "context" - "errors" "fmt" "log" "time" - "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "azureaiagent/internal/pkg/agents" ) // Poller polls an optimization job until it reaches a terminal state. @@ -26,8 +25,12 @@ type Poller struct { // PollUntilDone polls GetOptimizeStatus at the configured interval until the // job reaches a terminal state (completed, failed, cancelled), the context -// is cancelled, or MaxAttempts is reached. +// is cancelled, or MaxAttempts is reached. Transient errors (5xx, 429, +// connection reset) are retried up to maxConsecutiveTransient times before +// the poller gives up. func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) { + const maxConsecutiveTransient = 5 + interval := p.Interval if interval <= 0 { interval = 5 * time.Second @@ -36,16 +39,26 @@ func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) defer ticker.Stop() attempts := 0 + consecutiveTransient := 0 for { status, err := p.Client.GetOptimizeStatus(ctx, p.OperationID) if err != nil { - if isTransientError(err) { - log.Printf("[poller] transient error polling %s, will retry: %v", p.OperationID, err) + if agents.IsTransientError(err) { + consecutiveTransient++ + if consecutiveTransient > maxConsecutiveTransient { + return nil, fmt.Errorf( + "polling aborted after %d consecutive transient errors, last: %w", + consecutiveTransient, err) + } + log.Printf("[poller] transient error polling %s (%d/%d), will retry: %v", + p.OperationID, consecutiveTransient, maxConsecutiveTransient, err) goto wait } return nil, fmt.Errorf("failed to get optimization status: %w", err) } + consecutiveTransient = 0 // reset on success + if p.OnProgress != nil { p.OnProgress(status) } @@ -68,12 +81,3 @@ func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) } } } - -// isTransientError checks whether an error represents a transient HTTP failure -// (429 Too Many Requests or 5xx Server Error) that is safe to retry. -func isTransientError(err error) bool { - if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { - return respErr.StatusCode == 429 || respErr.StatusCode >= 500 - } - return false -} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go index 35dce98a62b..2d34db0058d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go @@ -159,3 +159,56 @@ func TestPoller_OnProgressCalled(t *testing.T) { assert.GreaterOrEqual(t, len(statuses), 2) assert.Equal(t, StatusCompleted, statuses[len(statuses)-1]) } + +func TestPoller_TransientRetryThenSuccess(t *testing.T) { + t.Parallel() + + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + n := atomic.AddInt32(&callCount, 1) + if n <= 3 { + // First 3 calls return 500 (transient). + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error": "server error"}`)) + return + } + _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ + OperationID: "op-retry", + Status: StatusCompleted, + }) + })) + defer server.Close() + + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-retry", + Interval: 10 * time.Millisecond, + } + + result, err := poller.PollUntilDone(t.Context()) + require.NoError(t, err) + assert.Equal(t, StatusCompleted, result.Status) + assert.GreaterOrEqual(t, atomic.LoadInt32(&callCount), int32(4)) +} + +func TestPoller_TransientRetryExhausted(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + // Always return 500. + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error": "server error"}`)) + })) + defer server.Close() + + poller := &Poller{ + Client: newPollerTestClient(server.URL), + OperationID: "op-exhaust", + Interval: 10 * time.Millisecond, + MaxAttempts: 20, // low cap to keep test fast + } + + _, err := poller.PollUntilDone(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "consecutive transient errors") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient.go new file mode 100644 index 00000000000..46000b4fe4c --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient.go @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package agents + +import ( + "errors" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" +) + +// IsTransientError checks whether an error represents a transient HTTP failure +// (429 Too Many Requests, 5xx Server Error, or connection-level errors) that +// is safe to retry. +func IsTransientError(err error) bool { + if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { + return respErr.StatusCode == 429 || respErr.StatusCode >= 500 + } + // Connection resets and similar I/O errors are also transient. + msg := err.Error() + return strings.Contains(msg, "connection reset") || + strings.Contains(msg, "connection refused") || + strings.Contains(msg, "EOF") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient_test.go new file mode 100644 index 00000000000..742a5deda1e --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/transient_test.go @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package agents + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsTransientError(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + err error + transient bool + }{ + {"connection reset", fmt.Errorf("read tcp: connection reset by peer"), true}, + {"connection refused", fmt.Errorf("dial tcp: connection refused"), true}, + {"unexpected EOF", fmt.Errorf("unexpected EOF"), true}, + {"not found", fmt.Errorf("not found"), false}, + {"auth error", fmt.Errorf("authorization denied"), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.transient, IsTransientError(tt.err)) + }) + } +} From 251f1d55cd6f59e0f0d08a518797d7ff3d38bcb3 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 18:16:18 -0700 Subject: [PATCH 29/33] fix more --- .../azure.ai.agents/internal/cmd/eval_test.go | 8 +++---- .../internal/cmd/optimize_helpers.go | 5 +---- .../internal/cmd/optimize_helpers_test.go | 21 ++++--------------- .../internal/cmd/optimize_list.go | 2 +- .../internal/pkg/agents/dataset_api/models.go | 3 ++- .../internal/pkg/agents/eval_api/artifacts.go | 13 ++++-------- .../pkg/agents/eval_api/eval_config_test.go | 10 ++++----- .../internal/pkg/agents/opt_eval/yaml.go | 4 ++-- .../pkg/agents/optimize_api/poller.go | 7 +++++++ .../pkg/agents/optimize_api/poller_test.go | 16 ++++++++++++++ 10 files changed, 44 insertions(+), 45 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 6d4b2115ebf..0d9b59d447b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -414,10 +414,9 @@ func TestEvalConfigRoundTrip(t *testing.T) { Config: opt_eval.Config{ Name: "smoke-core", Agent: evalAgentRef{ - Name: "my-agent", - Kind: agent_yaml.AgentKindHosted, - Version: "v1", - Instruction: opt_eval.InstructionRef{Value: "Test this agent"}, + Name: "my-agent", + Kind: agent_yaml.AgentKindHosted, + Version: "v1", }, DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, @@ -439,7 +438,6 @@ func TestEvalConfigRoundTrip(t *testing.T) { assert.Equal(t, original.Agent.Kind, loaded.Agent.Kind) assert.Equal(t, original.Agent.Version, loaded.Agent.Version) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, original.Agent.Instruction.Value, loaded.Agent.Instruction.Value) assert.Equal(t, original.MaxSamples, loaded.MaxSamples) require.NotNil(t, loaded.DatasetReference) assert.Equal(t, "ds", loaded.DatasetReference.Name) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index d54f206918f..cfa0de52a21 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -35,14 +35,11 @@ func (f *optimizeConnectionFlags) register(cmd *cobra.Command) { } // resolve returns the project endpoint for optimize API calls. -// Priority: --endpoint flag → AZURE_AI_OPTIMIZE_ENDPOINT → --project-endpoint → azd environment → AZURE_AI_PROJECT_ENDPOINT env var. +// Priority: --endpoint flag → --project-endpoint → azd environment → AZURE_AI_PROJECT_ENDPOINT env var. func (f *optimizeConnectionFlags) resolve(ctx context.Context) (string, error) { if f.endpoint != "" { return strings.TrimRight(f.endpoint, "/"), nil } - if ep := os.Getenv("AZURE_AI_OPTIMIZE_ENDPOINT"); ep != "" { - return strings.TrimRight(ep, "/"), nil - } // Explicit --project-endpoint flag if f.projectEndpoint != "" { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go index 889f50979d9..2bca62ec0a7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go @@ -23,26 +23,13 @@ import ( ) func TestOptimizeConnectionFlags_Resolve_AllEmpty(t *testing.T) { - t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "") - f := &optimizeConnectionFlags{} _, err := f.resolve(context.Background()) assert.Error(t, err) assert.Contains(t, err.Error(), "endpoint") } -func TestOptimizeConnectionFlags_Resolve_FromEnv(t *testing.T) { - t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://example.com") - - f := &optimizeConnectionFlags{} - endpoint, err := f.resolve(context.Background()) - assert.NoError(t, err) - assert.Equal(t, "https://example.com", endpoint) -} - -func TestOptimizeConnectionFlags_Resolve_FlagsOverrideEnv(t *testing.T) { - t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://from-env.com") - +func TestOptimizeConnectionFlags_Resolve_FlagEndpoint(t *testing.T) { f := &optimizeConnectionFlags{ endpoint: "https://from-flag.com", } @@ -52,9 +39,9 @@ func TestOptimizeConnectionFlags_Resolve_FlagsOverrideEnv(t *testing.T) { } func TestOptimizeConnectionFlags_Resolve_TrimsTrailingSlash(t *testing.T) { - t.Setenv("AZURE_AI_OPTIMIZE_ENDPOINT", "https://example.com/") - - f := &optimizeConnectionFlags{} + f := &optimizeConnectionFlags{ + endpoint: "https://example.com/", + } endpoint, err := f.resolve(context.Background()) assert.NoError(t, err) assert.Equal(t, "https://example.com", endpoint) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go index fa09c0d9de1..2b79ffe1274 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go @@ -85,7 +85,7 @@ func runOptimizeList(cmd *cobra.Command, flags *optimizeListFlags) error { fmt.Fprintln(out, " No optimization jobs found.") if flags.status != "" { fmt.Fprintf(out, "\n Try removing the --status filter or run a new job with:\n") - fmt.Fprintf(out, " azd ai agent optimize run --config spec.yaml\n") + fmt.Fprintf(out, " azd ai agent optimize --config spec.yaml\n") } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go index 48967aade35..cee6bbcd886 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/models.go @@ -5,6 +5,7 @@ package dataset_api import ( "fmt" + "math" "os" "path/filepath" "strconv" @@ -150,7 +151,7 @@ func NextVersion(current string) string { // Try parsing as a decimal number (e.g. "1", "1.0", "2.0"). if f, err := strconv.ParseFloat(current, 64); err == nil { - return strconv.FormatFloat(f+1, 'f', 1, 64) + return strconv.FormatFloat(math.Floor(f)+1, 'f', 1, 64) } // Find trailing digits and increment them. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index f0d76baab3b..994c015c39c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "strings" "time" @@ -121,18 +122,12 @@ func DownloadDatasetArtifact( // isContainerSAS checks if a SAS URI is container-scoped (sr=c in query). func isContainerSAS(rawURL string) bool { - idx := strings.IndexByte(rawURL, '?') - if idx == -1 { + _, query, ok := strings.Cut(rawURL, "?") + if !ok { return false } - query := rawURL[idx+1:] // Look for sr=c parameter. - for _, param := range strings.Split(query, "&") { - if param == "sr=c" { - return true - } - } - return false + return slices.Contains(strings.Split(query, "&"), "sr=c") } // filenameFromURL extracts the filename from a blob URL path. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index d571069ab43..c0acd91bf35 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -138,11 +138,10 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { Config: opt_eval.Config{ Name: "full-test", Agent: opt_eval.AgentRef{ - Name: "booking-agent", - Kind: "hosted", - Version: "v3", - Model: "gpt-4.1", - Instruction: opt_eval.InstructionRef{Value: "This agent handles restaurant reservations"}, + Name: "booking-agent", + Kind: "hosted", + Version: "v3", + Model: "gpt-4.1", }, DatasetReference: &opt_eval.DatasetRef{Name: "golden-data", Version: "v2"}, Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, @@ -169,7 +168,6 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) assert.Equal(t, "custom-quality", loaded.Evaluators[1].Name) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) - assert.Equal(t, "This agent handles restaurant reservations", loaded.Agent.Instruction.Value) assert.Equal(t, 75, loaded.MaxSamples) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go index a3b09fea7a4..cfa20f709fd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go @@ -236,8 +236,8 @@ type AgentRef struct { Version string `yaml:"version,omitempty"` ConfigFile string `yaml:"config,omitempty"` Model string `yaml:"model,omitempty"` - // Not expected to be shown in yaml - Instruction InstructionRef `yaml:"instruction,omitempty"` + // Not serialized to YAML — populated at runtime from config or flags. + Instruction InstructionRef `yaml:"-"` } // ResolveConfig loads the metadata.yaml pointed to by ConfigFile and returns diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go index 7e4acec518d..d380f42e1d5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller.go @@ -29,6 +29,13 @@ type Poller struct { // connection reset) are retried up to maxConsecutiveTransient times before // the poller gives up. func (p *Poller) PollUntilDone(ctx context.Context) (*OptimizeJobStatus, error) { + if p.Client == nil { + return nil, fmt.Errorf("poller Client is nil") + } + if p.OperationID == "" { + return nil, fmt.Errorf("poller OperationID is empty") + } + const maxConsecutiveTransient = 5 interval := p.Interval diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go index 2d34db0058d..fe623db11d7 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go @@ -212,3 +212,19 @@ func TestPoller_TransientRetryExhausted(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "consecutive transient errors") } + +func TestPoller_NilClient(t *testing.T) { + t.Parallel() + poller := &Poller{OperationID: "op-1"} + _, err := poller.PollUntilDone(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "Client is nil") +} + +func TestPoller_EmptyOperationID(t *testing.T) { + t.Parallel() + poller := &Poller{Client: &OptimizeClient{}} + _, err := poller.PollUntilDone(t.Context()) + require.Error(t, err) + assert.Contains(t, err.Error(), "OperationID is empty") +} From eca2c29c3da1afad884cf033a406f1cbc046341a Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 19:48:59 -0700 Subject: [PATCH 30/33] to align the latest option contract --- .../internal/cmd/eval_init_jobs.go | 3 +- .../azure.ai.agents/internal/cmd/optimize.go | 11 +- .../internal/cmd/optimize_config.go | 22 ++-- .../internal/cmd/optimize_config_test.go | 5 - .../internal/cmd/optimize_prompts.go | 105 ++++++++++++++++++ .../internal/cmd/optimize_status.go | 6 +- .../internal/cmd/optimize_test.go | 2 - .../pkg/agents/dataset_api/operations_test.go | 35 ++++++ .../internal/pkg/agents/eval_api/artifacts.go | 5 + .../internal/pkg/agents/opt_eval/yaml.go | 24 ++-- .../internal/pkg/agents/opt_eval/yaml_test.go | 10 -- .../pkg/agents/optimize_api/models.go | 46 ++++---- .../pkg/agents/optimize_api/models_test.go | 41 +++---- 13 files changed, 212 insertions(+), 103 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go index a2d2b6a4245..abc9d32beb0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_init_jobs.go @@ -270,8 +270,7 @@ func pollAndFinalizeJobs( ctx, resolved.datasetClient, resolved.agentProject, dsRef, DefaultAgentAPIVersion, ) if err != nil { - datasetPollErr = err - return + log.Printf("warning: downloading dataset artifact for %q: %v", dsRef.Name, err) } if localURI != "" { dsRef.LocalURI = localURI diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index 8e5d685653a..2bdfdb4cd8c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -327,6 +327,13 @@ func (a *OptimizeAction) applyOverrides( } } + // Resolve reflection_model: prompt user if not set. + if cfg.Options.ReflectionModel == "" && !a.noPrompt { + if err := resolveOptimizeReflectionModel(ctx, cfg); err != nil { + return err + } + } + return nil } @@ -444,8 +451,8 @@ func pollOptimizeJob( progress := fmt.Sprintf("\r %s %s", spin, status.Status) if status.Progress != nil { p := status.Progress - if p.CurrentStrategy != "" { - progress += fmt.Sprintf(" · strategy: %s", p.CurrentStrategy) + if p.CurrentTargetAttribute != "" { + progress += fmt.Sprintf(" · strategy: %s", p.CurrentTargetAttribute) } if p.CurrentIteration > 0 { progress += fmt.Sprintf(" · iteration %d", p.CurrentIteration) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 26be1ca628f..60c00f5626d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -94,9 +94,7 @@ func defaultOptimizeConfig(agentName string) *OptimizeConfig { InlineDataset: defaultDataset, Options: &opt_eval.Options{ EvalModel: defaultEvalModel, - Mode: "optimize", TargetAttributes: []string{"instruction", "skill"}, - Budget: 5, }, } } @@ -156,18 +154,14 @@ func (c *OptimizeConfig) ToRequest(projectEndpoint string) (*optimize_api.Optimi }, Evaluators: c.Evaluators.Names(), Options: optimize_api.OptimizeOptions{ - EvalModel: c.Options.EvalModel, - Budget: c.Options.Budget, - MaxIterations: c.Options.MaxIterations, - MinImprovement: c.Options.MinImprovement, - ImprovementThreshold: c.Options.ImprovementThreshold, - PassThreshold: c.Options.PassThreshold, - Strategies: c.Options.TargetAttributes, - TargetAttributes: c.Options.TargetAttributes, - KeepVersions: c.Options.KeepVersions, - TasksPerIteration: c.Options.TasksPerIteration, - ReflectionModel: c.Options.ReflectionModel, - Mode: c.Options.Mode, + EvalModel: c.Options.EvalModel, + MaxIterations: c.Options.MaxIterations, + Strategies: c.Options.TargetAttributes, + TargetAttributes: c.Options.TargetAttributes, + KeepVersions: c.Options.KeepVersions, + TasksPerIteration: c.Options.TasksPerIteration, + ReflectionModel: c.Options.ReflectionModel, + EvaluationLevel: c.Options.EvaluationLevel, }, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index 2a4131e0c68..33e1b6dfa79 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -68,7 +68,6 @@ options: assert.Equal(t, "4", req.Dataset[0].GroundTruth) assert.Nil(t, req.TrainDatasetReference) assert.Equal(t, "gpt-4o-mini", req.Options.EvalModel) - assert.Equal(t, 100, req.Options.Budget) assert.Equal(t, []string{"coherence", "relevance"}, req.Evaluators) assert.Len(t, req.Criteria, 1) assert.Equal(t, "accuracy", req.Criteria[0].Name) @@ -241,7 +240,6 @@ evaluators: options: eval_model: gpt-4o - mode: evaluate strategies: - instruction budget: 3 @@ -258,7 +256,6 @@ evaluators: - builtin.task_adherence options: eval_model: gpt-4o - mode: evaluate strategies: - instruction budget: 3 @@ -282,9 +279,7 @@ options: // Options require.NotNil(t, cfg.Options) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) - assert.Equal(t, "evaluate", cfg.Options.Mode) assert.Equal(t, []string{"instruction"}, cfg.Options.TargetAttributes) - assert.Equal(t, 3, cfg.Options.Budget) // Validate + ToRequest require.NoError(t, cfg.Validate()) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go index e94794847b9..e0601e0360b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go @@ -357,6 +357,111 @@ func resolveOptimizeTargetModels( return nil } +// allowedReflectionModels is the set of model families permitted as reflection +// models by the server. Deployments whose ModelName does not match one of these +// prefixes are excluded from the selection list. +var allowedReflectionModels = []string{"gpt-5", "gpt-5.1", "gpt-5.3"} + +// isAllowedReflectionModel checks whether a model name matches an allowed +// reflection model (exact match or prefix followed by a separator). +func isAllowedReflectionModel(modelName string) bool { + for _, allowed := range allowedReflectionModels { + if strings.EqualFold(modelName, allowed) { + return true + } + } + return false +} + +// resolveOptimizeReflectionModel prompts the user to select a reflection model +// for optimization. Only deployments whose underlying model is in the allowed +// reflection model set are shown. If the eval model is allowed, skipping +// defaults to it; otherwise the user must pick from the filtered list. +func resolveOptimizeReflectionModel(ctx context.Context, cfg *OptimizeConfig) error { + azdClient, clientErr := azdext.NewAzdClient() + if clientErr != nil { + return nil + } + defer azdClient.Close() + + deployments := listDeploymentsFromEnv(ctx, azdClient) + if len(deployments) == 0 { + return nil + } + + allowedList := strings.Join(allowedReflectionModels, ", ") + + // Check if the eval model deployment uses an allowed model. + evalModelAllowed := false + for _, d := range deployments { + if d.Name == cfg.Options.EvalModel && isAllowedReflectionModel(d.ModelName) { + evalModelAllowed = true + break + } + } + + var choices []*azdext.SelectChoice + seen := make(map[string]bool) + + // Only offer "Skip" when the eval model is in the allowed set. + if evalModelAllowed { + choices = append(choices, &azdext.SelectChoice{ + Label: fmt.Sprintf("Skip (use eval model: %s)", cfg.Options.EvalModel), + Value: "", + }) + } + + // Only include deployments whose underlying model is in the allowed set. + for _, d := range deployments { + if seen[d.Name] { + continue + } + if !isAllowedReflectionModel(d.ModelName) { + continue + } + label := d.Name + if d.ModelName != "" && d.ModelName != d.Name { + label = fmt.Sprintf("%s (%s)", d.Name, d.ModelName) + } + choices = append(choices, &azdext.SelectChoice{ + Label: label, + Value: d.Name, + }) + seen[d.Name] = true + } + + if len(choices) == 0 { + fmt.Printf("Warning: no deployed models match the allowed reflection model set: %s\n", allowedList) + return nil + } + + message := fmt.Sprintf("Select a reflection model (allowed: %s)", allowedList) + if !evalModelAllowed && cfg.Options.EvalModel != "" { + message = fmt.Sprintf( + "Eval model %q is not in the allowed reflection model set (%s). Select a reflection model", + cfg.Options.EvalModel, allowedList, + ) + } + + selectResp, selectErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ + Options: &azdext.SelectOptions{ + Message: message, + Choices: choices, + }, + }) + if selectErr != nil || selectResp.Value == nil { + return nil + } + + idx := int(*selectResp.Value) + if idx >= 0 && idx < len(choices) && choices[idx].Value != "" { + cfg.Options.ReflectionModel = choices[idx].Value + } + // Empty Value means Skip — leave ReflectionModel empty (server uses eval model). + + return nil +} + // buildOptimizeModelChoices fetches Foundry project deployments and returns // MultiSelectChoice items. The current deployed model is pre-selected. // Falls back to an empty list if deployments cannot be fetched. diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go index 9af0f57abfe..865ef11dab3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go @@ -111,10 +111,10 @@ func printOptimizeJobSummary(out io.Writer, status *optimize_api.OptimizeJobStat if status.Agent != nil && status.Agent.AgentName != "" { fmt.Fprintf(out, " Agent: %s\n", status.Agent.AgentName) } - if status.AllStrategiesFailed { + if status.AllTargetAttributesFailed { fmt.Fprintf(out, " Strategy: %s\n", color.YellowString("failed (baseline only — no candidates generated)")) - } else if status.Progress != nil && status.Progress.CurrentStrategy != "" { - fmt.Fprintf(out, " Strategy: %s\n", status.Progress.CurrentStrategy) + } else if status.Progress != nil && status.Progress.CurrentTargetAttribute != "" { + fmt.Fprintf(out, " Strategy: %s\n", status.Progress.CurrentTargetAttribute) } if status.Best != nil { fmt.Fprintf(out, " Best: %.2f\n", status.Best.AvgScore) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go index abbbe91368c..d10cf036233 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -95,8 +95,6 @@ func TestDefaultOptimizeConfig(t *testing.T) { assert.NotEmpty(t, cfg.InlineDataset) require.NotNil(t, cfg.Options) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) - assert.Equal(t, "optimize", cfg.Options.Mode) - assert.Equal(t, 5, cfg.Options.Budget) assert.Contains(t, cfg.Options.TargetAttributes, "instruction") assert.Contains(t, cfg.Options.TargetAttributes, "skill") require.Len(t, cfg.Evaluators, 1) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go index edb9b8ae4b8..64ec678fb29 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/dataset_api/operations_test.go @@ -116,6 +116,41 @@ func TestGetDataset_Success(t *testing.T) { assert.Equal(t, "https://storage.blob.core.windows.net/datasets/golden.jsonl", result.BlobURI) } +func TestDataset_UnmarshalServicePayload(t *testing.T) { + t.Parallel() + + // Recorded service GET /datasets//versions/ response (snake_case). + payload := `{ + "name": "eval-golden", + "version": "3", + "format": "jsonl", + "blob_uri": "https://store.blob.core.windows.net/ds/eval-golden.jsonl", + "data_uri": "https://store.blob.core.windows.net/ds/eval-golden-data.jsonl", + "content_uri": "https://store.blob.core.windows.net/ds/eval-golden-content.jsonl" + }` + + var ds Dataset + require.NoError(t, json.Unmarshal([]byte(payload), &ds)) + + assert.Equal(t, "eval-golden", ds.Name) + assert.Equal(t, "3", ds.Version) + assert.Equal(t, "jsonl", ds.Format) + assert.Equal(t, "https://store.blob.core.windows.net/ds/eval-golden.jsonl", ds.BlobURI) + assert.Equal(t, "https://store.blob.core.windows.net/ds/eval-golden-data.jsonl", ds.DataURI) + assert.Equal(t, "https://store.blob.core.windows.net/ds/eval-golden-content.jsonl", ds.ContentURI) + + // ResolvedBlobURI prefers blob_uri. + assert.Equal(t, ds.BlobURI, ds.ResolvedBlobURI()) + + // When blob_uri is empty, falls back to data_uri. + ds.BlobURI = "" + assert.Equal(t, ds.DataURI, ds.ResolvedBlobURI()) + + // When both blob_uri and data_uri are empty, falls back to content_uri. + ds.DataURI = "" + assert.Equal(t, ds.ContentURI, ds.ResolvedBlobURI()) +} + // --------------------------------------------------------------------------- // GetDatasetCredential // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go index 994c015c39c..2dd49447323 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/artifacts.go @@ -62,6 +62,11 @@ func DownloadDatasetArtifact( } destDir := DatasetArtifactPath(agentProject, ref) + + // Clear existing dataset directory to ensure a clean download. + if err := os.RemoveAll(destDir); err != nil { + return "", fmt.Errorf("removing existing dataset dir: %w", err) + } if err := os.MkdirAll(destDir, 0750); err != nil { return "", fmt.Errorf("creating dataset artifact dir: %w", err) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go index cfa20f709fd..58a98e1a8bd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go @@ -378,18 +378,14 @@ type TargetConfig struct { // Options holds run-time options for eval and optimize. // Eval only uses EvalModel; optimize uses all fields. type Options struct { - EvalModel string `yaml:"eval_model,omitempty"` - Mode string `yaml:"mode,omitempty"` - TargetAttributes []string `yaml:"target_attributes,omitempty"` - TargetConfig *TargetConfig `yaml:"target_config,omitempty"` - Budget int `yaml:"budget,omitempty"` - MaxIterations *int `yaml:"max_iterations,omitempty"` - MinImprovement float64 `yaml:"min_improvement,omitempty"` - ImprovementThreshold float64 `yaml:"improvement_threshold,omitempty"` - PassThreshold float64 `yaml:"pass_threshold,omitempty"` - KeepVersions bool `yaml:"keep_versions,omitempty"` - TasksPerIteration int `yaml:"tasks_per_iteration,omitempty"` - ReflectionModel string `yaml:"reflection_model,omitempty"` + EvalModel string `yaml:"eval_model,omitempty"` + TargetAttributes []string `yaml:"target_attributes,omitempty"` + TargetConfig *TargetConfig `yaml:"target_config,omitempty"` + MaxIterations *int `yaml:"max_iterations,omitempty"` + KeepVersions bool `yaml:"keep_versions,omitempty"` + TasksPerIteration int `yaml:"tasks_per_iteration,omitempty"` + ReflectionModel string `yaml:"reflection_model,omitempty"` + EvaluationLevel string `yaml:"evaluation_level,omitempty"` } // UnmarshalYAML populates default target attributes when the field is absent in YAML. @@ -412,10 +408,6 @@ func (o *Options) UnmarshalYAML(value *yaml.Node) error { o.TargetAttributes = legacy.Strategies } } - - if o.MaxIterations == nil { - o.MaxIterations = new(4) - } return nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go index 4e5f5c3aaa5..cc252d212a3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go @@ -189,15 +189,10 @@ func TestOptions_YAMLFields(t *testing.T) { input := ` eval_model: gpt-4.1 -mode: full target_attributes: - prompt - tool -budget: 500 max_iterations: 10 -min_improvement: 0.05 -improvement_threshold: 0.1 -pass_threshold: 0.8 keep_versions: true tasks_per_iteration: 20 reflection_model: gpt-4o @@ -206,14 +201,9 @@ reflection_model: gpt-4o require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) assert.Equal(t, "gpt-4.1", opts.EvalModel) - assert.Equal(t, "full", opts.Mode) assert.Equal(t, []string{"prompt", "tool"}, opts.TargetAttributes) - assert.Equal(t, 500, opts.Budget) require.NotNil(t, opts.MaxIterations) assert.Equal(t, 10, *opts.MaxIterations) - assert.InDelta(t, 0.05, opts.MinImprovement, 0.001) - assert.InDelta(t, 0.1, opts.ImprovementThreshold, 0.001) - assert.InDelta(t, 0.8, opts.PassThreshold, 0.001) assert.True(t, opts.KeepVersions) assert.Equal(t, 20, opts.TasksPerIteration) assert.Equal(t, "gpt-4o", opts.ReflectionModel) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 212b949dfea..c2ac594e283 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -109,12 +109,8 @@ type TargetConfig struct { // OptimizeOptions controls the optimization run. type OptimizeOptions struct { - Budget int `json:"budget,omitempty"` - MaxIterations *int `json:"maxIterations,omitempty"` - MinImprovement float64 `json:"minImprovement,omitempty"` - ImprovementThreshold float64 `json:"improvementThreshold,omitempty"` - PassThreshold float64 `json:"passThreshold,omitempty"` - EvalModel string `json:"evalModel,omitempty"` + MaxIterations *int `json:"maxIterations,omitempty"` + EvalModel string `json:"evalModel,omitempty"` // Send as both "strategies" (current server) and "targetAttributes" (future). Strategies []string `json:"strategies,omitempty"` TargetAttributes []string `json:"targetAttributes,omitempty"` @@ -123,7 +119,7 @@ type OptimizeOptions struct { TasksPerIteration int `json:"tasksPerIteration,omitempty"` MaxReflectionTasks int `json:"maxReflectionTasks,omitempty"` ReflectionModel string `json:"reflectionModel,omitempty"` - Mode string `json:"mode,omitempty"` + EvaluationLevel string `json:"evaluationLevel,omitempty"` } // --- Response models --- @@ -136,28 +132,28 @@ type OptimizeResponse struct { // OptimizeJobStatus is the full status of an optimization job. type OptimizeJobStatus struct { - OperationID string `json:"operationId"` - Status string `json:"status"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` - Agent *AgentDefinition `json:"agent,omitempty"` - Progress *JobProgress `json:"progress,omitempty"` - Error *JobError `json:"error,omitempty"` - Baseline *CandidateResult `json:"baseline,omitempty"` - Best *CandidateResult `json:"best,omitempty"` - Candidates []CandidateResult `json:"candidates,omitempty"` - AllStrategiesFailed bool `json:"allStrategiesFailed,omitempty"` - Warnings []string `json:"warnings,omitempty"` + OperationID string `json:"operationId"` + Status string `json:"status"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` + Agent *AgentDefinition `json:"agent,omitempty"` + Progress *JobProgress `json:"progress,omitempty"` + Error *JobError `json:"error,omitempty"` + Baseline *CandidateResult `json:"baseline,omitempty"` + Best *CandidateResult `json:"best,omitempty"` + Candidates []CandidateResult `json:"candidates,omitempty"` + AllTargetAttributesFailed bool `json:"allTargetAttributesFailed,omitempty"` + Warnings []string `json:"warnings,omitempty"` } // JobProgress reports iteration-level progress. type JobProgress struct { - CurrentStrategy string `json:"currentStrategy"` - CurrentIteration int `json:"currentIteration"` - TasksCompleted int `json:"tasksCompleted"` - TasksTotal int `json:"tasksTotal"` - BestScore float64 `json:"bestScore"` - ElapsedSeconds float64 `json:"elapsedSeconds"` + CurrentTargetAttribute string `json:"currentTargetAttribute"` + CurrentIteration int `json:"currentIteration"` + TasksCompleted int `json:"tasksCompleted"` + TasksTotal int `json:"tasksTotal"` + BestScore float64 `json:"bestScore"` + ElapsedSeconds float64 `json:"elapsedSeconds"` } // JobError captures an error from a failed job. diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index a4f3b8ebda8..9392943cc66 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -45,19 +45,14 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { {Name: "global-crit", Instruction: "be concise"}, }, Options: OptimizeOptions{ - Budget: 100, - MaxIterations: new(5), - MinImprovement: 0.01, - ImprovementThreshold: 0.05, - PassThreshold: 0.8, - EvalModel: "gpt-4o-mini", - Strategies: []string{"prompt_mutation"}, - TargetAttributes: []string{"prompt_mutation"}, - KeepVersions: true, - TasksPerIteration: 10, - MaxReflectionTasks: 3, - ReflectionModel: "gpt-4o", - Mode: "full", + MaxIterations: new(5), + EvalModel: "gpt-4o-mini", + Strategies: []string{"prompt_mutation"}, + TargetAttributes: []string{"prompt_mutation"}, + KeepVersions: true, + TasksPerIteration: 10, + MaxReflectionTasks: 3, + ReflectionModel: "gpt-4o", }, } @@ -69,8 +64,8 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { for _, field := range []string{ `"agent"`, `"foundryProjectUrl"`, `"agentName"`, `"agentVersion"`, `"dataset"`, `"trainDatasetReference"`, `"evaluators"`, `"criteria"`, - `"options"`, `"evalModel"`, `"maxIterations"`, `"minImprovement"`, - `"improvementThreshold"`, `"passThreshold"`, `"keepVersions"`, + `"options"`, `"evalModel"`, `"maxIterations"`, + `"keepVersions"`, `"tasksPerIteration"`, `"maxReflectionTasks"`, `"reflectionModel"`, `"strategies"`, `"targetAttributes"`, `"groundTruth"`, `"systemPrompt"`, `"skills"`, } { @@ -88,10 +83,8 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { assert.Equal(t, "4", got.Dataset[0].GroundTruth) assert.NotNil(t, got.TrainDatasetReference) assert.Equal(t, "train-ds", got.TrainDatasetReference.Name) - assert.Equal(t, 100, got.Options.Budget) assert.Equal(t, "gpt-4o-mini", got.Options.EvalModel) assert.True(t, got.Options.KeepVersions) - assert.Equal(t, "full", got.Options.Mode) } func TestOptimizeJobStatus_RoundTrip(t *testing.T) { @@ -107,12 +100,12 @@ func TestOptimizeJobStatus_RoundTrip(t *testing.T) { AgentName: "agent-1", }, Progress: &JobProgress{ - CurrentStrategy: "prompt_mutation", - CurrentIteration: 3, - TasksCompleted: 15, - TasksTotal: 20, - BestScore: 0.85, - ElapsedSeconds: 120.5, + CurrentTargetAttribute: "prompt_mutation", + CurrentIteration: 3, + TasksCompleted: 15, + TasksTotal: 20, + BestScore: 0.85, + ElapsedSeconds: 120.5, }, Baseline: &CandidateResult{ Name: "baseline", @@ -149,7 +142,7 @@ func TestOptimizeJobStatus_RoundTrip(t *testing.T) { s := string(data) for _, field := range []string{ `"operationId"`, `"status"`, `"createdAt"`, `"updatedAt"`, - `"progress"`, `"currentStrategy"`, `"currentIteration"`, + `"progress"`, `"currentTargetAttribute"`, `"currentIteration"`, `"tasksCompleted"`, `"tasksTotal"`, `"bestScore"`, `"elapsedSeconds"`, `"baseline"`, `"best"`, `"candidates"`, `"candidateId"`, `"avgScore"`, `"avgTokens"`, `"passRate"`, `"mutations"`, From 49ed57db9970cefe50f67ef3a6ca52d2710584c4 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 21:36:11 -0700 Subject: [PATCH 31/33] more tune for tool optimization --- .../azure.ai.agents/internal/cmd/eval.go | 6 ++ .../internal/cmd/optimize_apply.go | 36 +++++-- .../internal/cmd/optimize_apply_test.go | 97 +++++++++++++++++++ .../internal/cmd/optimize_deploy.go | 4 +- .../internal/cmd/optimize_helpers.go | 23 ++++- .../internal/cmd/optimize_helpers_test.go | 47 +++++++++ .../internal/cmd/optimize_prompts.go | 56 ++++------- 7 files changed, 216 insertions(+), 53 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go index 3cfc8addf66..d53ae625948 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval.go @@ -164,6 +164,12 @@ func resolveEvalContext(ctx context.Context, options evalContextOptions) (*evalR fmt.Println(output.WithGrayFormat(" Resolving Foundry project endpoint...")) projectEndpoint := options.projectEndpoint projectEndpointSource := "--project-endpoint" + if projectEndpoint == "" { + if v := getEnvValue("FOUNDRY_PROJECT_ENDPOINT"); v != "" { + projectEndpoint = v + projectEndpointSource = "FOUNDRY_PROJECT_ENDPOINT" + } + } if projectEndpoint == "" { if v := getEnvValue("AZURE_AI_PROJECT_ENDPOINT"); v != "" { projectEndpoint = v diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 60823566d99..3afdc0aeeb4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -324,10 +324,10 @@ func writeAgentConfigFromCandidate(candidateDir string, rawConfig json.RawMessag meta.SkillDir = opt_eval.SkillsDir } - // Write tool_definitions as a JSON file. + // Write the candidate config as tools.json (preserves original structure). if m != nil { - if err := writeToolDefinitions(candidateDir, m); err != nil { - return fmt.Errorf("writing candidate tool definitions: %w", err) + if err := writeToolsFile(candidateDir, m); err != nil { + return fmt.Errorf("writing candidate tools file: %w", err) } if _, err := os.Stat(filepath.Join(candidateDir, opt_eval.ToolsFile)); err == nil { meta.ToolsFile = opt_eval.ToolsFile @@ -401,17 +401,33 @@ func writeInlineSkills(candidateDir string, config map[string]any) error { return nil } -// writeToolDefinitions extracts the "tool_definitions" field from a candidate -// config and writes it as tools.json. -func writeToolDefinitions(candidateDir string, config map[string]any) error { - toolsRaw, exists := config["tool_definitions"] - if !exists { +// writeToolsFile writes the candidate config as tools.json, preserving its +// original structure (may be a list or an object). +func writeToolsFile(candidateDir string, config map[string]any) error { + toolDefs, hasDefs := config["toolDefinitions"] + toolDescs, hasDescs := config["toolDescriptions"] + if !hasDefs && !hasDescs { return nil } - data, err := json.MarshalIndent(toolsRaw, "", " ") + // Write whichever is present. If only one key exists, write its value + // directly (preserves array or object). If both exist, wrap in an object. + var payload any + switch { + case hasDefs && hasDescs: + payload = map[string]any{ + "toolDefinitions": toolDefs, + "toolDescriptions": toolDescs, + } + case hasDefs: + payload = toolDefs + default: + payload = toolDescs + } + + data, err := json.MarshalIndent(payload, "", " ") if err != nil { - return fmt.Errorf("serializing tool definitions: %w", err) + return fmt.Errorf("serializing tools file: %w", err) } return os.WriteFile(filepath.Join(candidateDir, opt_eval.ToolsFile), data, 0600) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go index fc1ef13a373..ed9e1488a97 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -421,3 +421,100 @@ func TestIsReservedEnvVarError(t *testing.T) { }) } } + +// ---- writeToolsFile ---- + +func TestWriteToolsFile_NoKeys(t *testing.T) { + t.Parallel() + dir := t.TempDir() + err := writeToolsFile(dir, map[string]any{"name": "agent"}) + require.NoError(t, err) + assert.NoFileExists(t, filepath.Join(dir, opt_eval.ToolsFile)) +} + +func TestWriteToolsFile_OnlyToolDefinitions(t *testing.T) { + t.Parallel() + dir := t.TempDir() + defs := []any{ + map[string]any{"type": "function", "function": map[string]any{"name": "search"}}, + } + err := writeToolsFile(dir, map[string]any{"toolDefinitions": defs}) + require.NoError(t, err) + + data, err := os.ReadFile(filepath.Join(dir, opt_eval.ToolsFile)) //nolint:gosec // test file path + require.NoError(t, err) + + // Should be written as a raw list (no wrapper object). + var parsed []any + require.NoError(t, json.Unmarshal(data, &parsed)) + assert.Len(t, parsed, 1) +} + +func TestWriteToolsFile_OnlyToolDescriptions(t *testing.T) { + t.Parallel() + dir := t.TempDir() + descs := map[string]any{ + "lookup_policy": map[string]any{ + "description": "Look up policy", + "parameters": map[string]any{}, + }, + } + err := writeToolsFile(dir, map[string]any{"toolDescriptions": descs}) + require.NoError(t, err) + + data, err := os.ReadFile(filepath.Join(dir, opt_eval.ToolsFile)) //nolint:gosec // test file path + require.NoError(t, err) + + // Should be written as a raw object (no wrapper). + var parsed map[string]any + require.NoError(t, json.Unmarshal(data, &parsed)) + assert.Contains(t, parsed, "lookup_policy") +} + +func TestWriteToolsFile_BothKeys(t *testing.T) { + t.Parallel() + dir := t.TempDir() + defs := []any{ + map[string]any{"type": "function", "function": map[string]any{"name": "search"}}, + } + descs := map[string]any{ + "search": map[string]any{"description": "Search stuff", "parameters": map[string]any{}}, + } + err := writeToolsFile(dir, map[string]any{ + "toolDefinitions": defs, + "toolDescriptions": descs, + }) + require.NoError(t, err) + + data, err := os.ReadFile(filepath.Join(dir, opt_eval.ToolsFile)) //nolint:gosec // test file path + require.NoError(t, err) + + var parsed map[string]any + require.NoError(t, json.Unmarshal(data, &parsed)) + assert.Contains(t, parsed, "toolDefinitions") + assert.Contains(t, parsed, "toolDescriptions") +} + +func TestWriteAgentConfigFromCandidate_WithToolDescriptions(t *testing.T) { + t.Parallel() + dir := t.TempDir() + config := mustMarshal(t, map[string]any{ + "systemPrompt": "prompt", + "toolDescriptions": map[string]any{ + "check_budget": map[string]any{ + "description": "Check the budget", + "parameters": map[string]any{}, + }, + }, + }) + + err := writeAgentConfigFromCandidate(dir, config) + require.NoError(t, err) + + assert.FileExists(t, filepath.Join(dir, opt_eval.ToolsFile)) + + // Verify metadata references tools_file. + metaData, err := os.ReadFile(filepath.Join(dir, opt_eval.MetadataFile)) //nolint:gosec // test file path + require.NoError(t, err) + assert.Contains(t, string(metaData), "tools_file") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 5b6d23e1d19..d757e001d91 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -247,8 +247,8 @@ func resolveProjectEndpointForDeploy(ctx context.Context, connFlags *optimizeCon projectEndpoint, err := resolveAgentEndpoint(ctx, "", "") if err != nil { - if ep := os.Getenv("AZURE_AI_PROJECT_ENDPOINT"); ep != "" { - return strings.TrimRight(ep, "/"), nil + if ep := projectEndpointFromEnv(); ep != "" { + return ep, nil } return "", fmt.Errorf("could not resolve project endpoint: %w\n\n"+ "Provide --project-endpoint (-p), or run 'azd ai agent init'", err) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index cfa0de52a21..d29e3251fa9 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -35,7 +35,20 @@ func (f *optimizeConnectionFlags) register(cmd *cobra.Command) { } // resolve returns the project endpoint for optimize API calls. -// Priority: --endpoint flag → --project-endpoint → azd environment → AZURE_AI_PROJECT_ENDPOINT env var. +// projectEndpointFromEnv returns the project endpoint from FOUNDRY_PROJECT_ENDPOINT +// or AZURE_AI_PROJECT_ENDPOINT environment variables (in that priority order). +// Returns empty string if neither is set. +func projectEndpointFromEnv() string { + if ep := os.Getenv("FOUNDRY_PROJECT_ENDPOINT"); ep != "" { + return strings.TrimRight(ep, "/") + } + if ep := os.Getenv("AZURE_AI_PROJECT_ENDPOINT"); ep != "" { + return strings.TrimRight(ep, "/") + } + return "" +} + +// Priority: --endpoint flag → --project-endpoint → azd environment → FOUNDRY_PROJECT_ENDPOINT / AZURE_AI_PROJECT_ENDPOINT env var. func (f *optimizeConnectionFlags) resolve(ctx context.Context) (string, error) { if f.endpoint != "" { return strings.TrimRight(f.endpoint, "/"), nil @@ -49,12 +62,12 @@ func (f *optimizeConnectionFlags) resolve(ctx context.Context) (string, error) { // Try azd environment (works when running under azd) projectEndpoint, err := resolveAgentEndpoint(ctx, "", "") if err != nil { - // Fall back to AZURE_AI_PROJECT_ENDPOINT env var (works standalone) - if ep := os.Getenv("AZURE_AI_PROJECT_ENDPOINT"); ep != "" { - return strings.TrimRight(ep, "/"), nil + // Fall back to FOUNDRY_PROJECT_ENDPOINT or AZURE_AI_PROJECT_ENDPOINT env var (works standalone) + if ep := projectEndpointFromEnv(); ep != "" { + return ep, nil } return "", fmt.Errorf("could not resolve project endpoint\n\n" + - "Set AZURE_AI_PROJECT_ENDPOINT, provide --project-endpoint (-p),\n" + + "Set FOUNDRY_PROJECT_ENDPOINT or AZURE_AI_PROJECT_ENDPOINT, provide --project-endpoint (-p),\n" + "or run 'azd ai agent init'") } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go index 2bca62ec0a7..e325e84f653 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go @@ -344,3 +344,50 @@ func TestReportOptimizationDeployments_PanicRecovery(t *testing.T) { // Candidate key should remain since the promote never succeeded. assert.Equal(t, "cand-panic", envServer.values["dev"]["AGENT_SVC_OPTIMIZATION_CANDIDATE_ID"]) } + +func TestOptimizeConnectionFlags_Resolve_FoundryEnvVar(t *testing.T) { + t.Setenv("FOUNDRY_PROJECT_ENDPOINT", "https://foundry.example.com/") + f := &optimizeConnectionFlags{} + endpoint, err := f.resolve(t.Context()) + assert.NoError(t, err) + assert.Equal(t, "https://foundry.example.com", endpoint) +} + +func TestOptimizeConnectionFlags_Resolve_AzureAIEnvVar(t *testing.T) { + t.Setenv("AZURE_AI_PROJECT_ENDPOINT", "https://azure-ai.example.com/") + f := &optimizeConnectionFlags{} + endpoint, err := f.resolve(t.Context()) + assert.NoError(t, err) + assert.Equal(t, "https://azure-ai.example.com", endpoint) +} + +func TestOptimizeConnectionFlags_Resolve_FoundryTakesPriorityOverAzureAI(t *testing.T) { + t.Setenv("FOUNDRY_PROJECT_ENDPOINT", "https://foundry.example.com") + t.Setenv("AZURE_AI_PROJECT_ENDPOINT", "https://azure-ai.example.com") + f := &optimizeConnectionFlags{} + endpoint, err := f.resolve(t.Context()) + assert.NoError(t, err) + assert.Equal(t, "https://foundry.example.com", endpoint) +} + +func TestResolveProjectEndpointForDeploy_FoundryEnvVar(t *testing.T) { + t.Setenv("FOUNDRY_PROJECT_ENDPOINT", "https://foundry-deploy.example.com/") + ep, err := resolveProjectEndpointForDeploy(t.Context(), &optimizeConnectionFlags{}) + assert.NoError(t, err) + assert.Equal(t, "https://foundry-deploy.example.com", ep) +} + +func TestResolveProjectEndpointForDeploy_AzureAIEnvVar(t *testing.T) { + t.Setenv("AZURE_AI_PROJECT_ENDPOINT", "https://azure-ai-deploy.example.com/") + ep, err := resolveProjectEndpointForDeploy(t.Context(), &optimizeConnectionFlags{}) + assert.NoError(t, err) + assert.Equal(t, "https://azure-ai-deploy.example.com", ep) +} + +func TestResolveProjectEndpointForDeploy_FoundryTakesPriorityOverAzureAI(t *testing.T) { + t.Setenv("FOUNDRY_PROJECT_ENDPOINT", "https://foundry-deploy.example.com") + t.Setenv("AZURE_AI_PROJECT_ENDPOINT", "https://azure-ai-deploy.example.com") + ep, err := resolveProjectEndpointForDeploy(t.Context(), &optimizeConnectionFlags{}) + assert.NoError(t, err) + assert.Equal(t, "https://foundry-deploy.example.com", ep) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go index e0601e0360b..99d9d19ba4b 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go @@ -374,9 +374,9 @@ func isAllowedReflectionModel(modelName string) bool { } // resolveOptimizeReflectionModel prompts the user to select a reflection model -// for optimization. Only deployments whose underlying model is in the allowed -// reflection model set are shown. If the eval model is allowed, skipping -// defaults to it; otherwise the user must pick from the filtered list. +// for optimization. All deployments are shown; if the user picks one whose +// model is not in the recommended set, a warning is printed. This avoids +// requiring client-side updates when the server's allowed set changes. func resolveOptimizeReflectionModel(ctx context.Context, cfg *OptimizeConfig) error { azdClient, clientErr := azdext.NewAzdClient() if clientErr != nil { @@ -391,34 +391,20 @@ func resolveOptimizeReflectionModel(ctx context.Context, cfg *OptimizeConfig) er allowedList := strings.Join(allowedReflectionModels, ", ") - // Check if the eval model deployment uses an allowed model. - evalModelAllowed := false - for _, d := range deployments { - if d.Name == cfg.Options.EvalModel && isAllowedReflectionModel(d.ModelName) { - evalModelAllowed = true - break - } - } - var choices []*azdext.SelectChoice seen := make(map[string]bool) - // Only offer "Skip" when the eval model is in the allowed set. - if evalModelAllowed { - choices = append(choices, &azdext.SelectChoice{ - Label: fmt.Sprintf("Skip (use eval model: %s)", cfg.Options.EvalModel), - Value: "", - }) - } + // Always offer Skip — defaults to using the eval model. + choices = append(choices, &azdext.SelectChoice{ + Label: fmt.Sprintf("Skip (use eval model: %s)", cfg.Options.EvalModel), + Value: "", + }) - // Only include deployments whose underlying model is in the allowed set. + // Show all deployments — don't filter by allowed set. for _, d := range deployments { if seen[d.Name] { continue } - if !isAllowedReflectionModel(d.ModelName) { - continue - } label := d.Name if d.ModelName != "" && d.ModelName != d.Name { label = fmt.Sprintf("%s (%s)", d.Name, d.ModelName) @@ -430,18 +416,7 @@ func resolveOptimizeReflectionModel(ctx context.Context, cfg *OptimizeConfig) er seen[d.Name] = true } - if len(choices) == 0 { - fmt.Printf("Warning: no deployed models match the allowed reflection model set: %s\n", allowedList) - return nil - } - - message := fmt.Sprintf("Select a reflection model (allowed: %s)", allowedList) - if !evalModelAllowed && cfg.Options.EvalModel != "" { - message = fmt.Sprintf( - "Eval model %q is not in the allowed reflection model set (%s). Select a reflection model", - cfg.Options.EvalModel, allowedList, - ) - } + message := fmt.Sprintf("Select a reflection model (recommended: %s)", allowedList) selectResp, selectErr := azdClient.Prompt().Select(ctx, &azdext.SelectRequest{ Options: &azdext.SelectOptions{ @@ -455,7 +430,16 @@ func resolveOptimizeReflectionModel(ctx context.Context, cfg *OptimizeConfig) er idx := int(*selectResp.Value) if idx >= 0 && idx < len(choices) && choices[idx].Value != "" { - cfg.Options.ReflectionModel = choices[idx].Value + selected := choices[idx].Value + // Warn if the selected deployment's model is not in the recommended set. + for _, d := range deployments { + if d.Name == selected && !isAllowedReflectionModel(d.ModelName) { + fmt.Printf("Warning: deployment %q uses model %q which is not in the recommended "+ + "reflection model set (%s). The server may reject it.\n", selected, d.ModelName, allowedList) + break + } + } + cfg.Options.ReflectionModel = selected } // Empty Value means Skip — leave ReflectionModel empty (server uses eval model). From c5ea305e08fa22791e6b062027ea9e89bfb0b077 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Thu, 21 May 2026 21:40:36 -0700 Subject: [PATCH 32/33] fix spelling --- .../azure.ai.agents/internal/cmd/optimize_apply.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 3afdc0aeeb4..9716d3190d5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -405,8 +405,8 @@ func writeInlineSkills(candidateDir string, config map[string]any) error { // original structure (may be a list or an object). func writeToolsFile(candidateDir string, config map[string]any) error { toolDefs, hasDefs := config["toolDefinitions"] - toolDescs, hasDescs := config["toolDescriptions"] - if !hasDefs && !hasDescs { + toolDescriptions, hasToolDescriptions := config["toolDescriptions"] + if !hasDefs && !hasToolDescriptions { return nil } @@ -414,15 +414,15 @@ func writeToolsFile(candidateDir string, config map[string]any) error { // directly (preserves array or object). If both exist, wrap in an object. var payload any switch { - case hasDefs && hasDescs: + case hasDefs && hasToolDescriptions: payload = map[string]any{ "toolDefinitions": toolDefs, - "toolDescriptions": toolDescs, + "toolDescriptions": toolDescriptions, } case hasDefs: payload = toolDefs default: - payload = toolDescs + payload = toolDescriptions } data, err := json.MarshalIndent(payload, "", " ") From b91a61dc9f6ac701aeac36a749c7cf35e16c3490 Mon Sep 17 00:00:00 2001 From: zyysurely Date: Fri, 22 May 2026 11:31:46 -0700 Subject: [PATCH 33/33] fix conflict --- cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go index 29c4b1caf19..2350896e6cc 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/listen_test.go @@ -6,10 +6,7 @@ package cmd import ( "os" "path/filepath" -<<<<<<< HEAD -======= "strings" ->>>>>>> main "testing" "azureaiagent/internal/project"