From 0a634791778cf1c6bf65d37732073c2fc12de96b Mon Sep 17 00:00:00 2001 From: Saanika Gupta Date: Thu, 19 Mar 2026 16:57:53 +0530 Subject: [PATCH 1/2] Initial changes --- .../internal/cmd/job_submit.go | 4 +- .../internal/service/compute_resolver.go | 115 +++++++++++++++++- 2 files changed, 112 insertions(+), 7 deletions(-) diff --git a/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go b/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go index 4cdc4e91636..6ac1a13686f 100644 --- a/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go +++ b/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go @@ -54,6 +54,8 @@ func newJobSubmitCommand() *cobra.Command { accountName := envValues[utils.EnvAzureAccountName] projectName := envValues[utils.EnvAzureProjectName] tenantID := envValues[utils.EnvAzureTenantID] + subscriptionID := envValues[utils.EnvAzureSubscriptionID] + resourceGroup := envValues[utils.EnvAzureResourceGroup] if accountName == "" || projectName == "" { return fmt.Errorf("environment not configured. Run 'azd ai training init' first") @@ -91,7 +93,7 @@ func newJobSubmitCommand() *cobra.Command { // Resolve references (compute name → ARM ID, local paths → datastore URIs) resolver := service.NewJobResolver( - service.NewDefaultComputeResolver(), + service.NewDefaultComputeResolver(subscriptionID, resourceGroup, accountName, credential), service.NewDefaultCodeResolver(uploadSvc, projectName), service.NewDefaultInputResolver(uploadSvc), ) diff --git a/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go b/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go index 377264364d6..f0fef35babe 100644 --- a/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go +++ b/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go @@ -5,17 +5,120 @@ package service import ( "context" + "encoding/json" "fmt" + "io" + "net/http" + "net/url" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" +) + +const ( + armBaseURL = "https://management.azure.com" + armComputeAPIVersion = "2026-01-15-preview" ) -// DefaultComputeResolver is a stub implementation of ComputeResolver. -// Replace with actual ARM API call to resolve compute name to ARM resource ID. -type DefaultComputeResolver struct{} +// DefaultComputeResolver resolves a compute name to a full ARM resource ID +// by calling the ARM control plane GET endpoint. +// +// ARM URL: +// +// GET https://management.azure.com/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.CognitiveServices/accounts/{account}/computes/{name}?api-version=2026-01-15-preview +// +// When compute GET moves to the data plane, this resolver can be swapped out +// for a DataPlaneComputeResolver without changing any other code. +type DefaultComputeResolver struct { + subscriptionID string + resourceGroup string + accountName string + credential azcore.TokenCredential + httpClient *http.Client +} -func NewDefaultComputeResolver() *DefaultComputeResolver { - return &DefaultComputeResolver{} +// NewDefaultComputeResolver creates a compute resolver that calls the ARM API. +// - subscriptionID: Azure subscription ID +// - resourceGroup: resource group containing the AI account +// - accountName: Azure AI Foundry account name +// - credential: token credential for ARM scope +func NewDefaultComputeResolver(subscriptionID, resourceGroup, accountName string, credential azcore.TokenCredential) *DefaultComputeResolver { + return &DefaultComputeResolver{ + subscriptionID: subscriptionID, + resourceGroup: resourceGroup, + accountName: accountName, + credential: credential, + httpClient: &http.Client{Timeout: 30 * time.Second}, + } } +// ResolveCompute calls the ARM API to resolve a compute name to its full ARM resource ID. +// Returns a helpful error message if the user lacks permissions (401/403). func (r *DefaultComputeResolver) ResolveCompute(ctx context.Context, computeName string) (string, error) { - return "", fmt.Errorf("compute resolution not implemented: provide a full ARM resource ID for compute '%s'", computeName) + reqURL := fmt.Sprintf( + "%s/subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s?api-version=%s", + armBaseURL, + url.PathEscape(r.subscriptionID), + url.PathEscape(r.resourceGroup), + url.PathEscape(r.accountName), + url.PathEscape(computeName), + armComputeAPIVersion, + ) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create compute request: %w", err) + } + + // Get ARM-scoped bearer token + token, err := r.credential.GetToken(ctx, policy.TokenRequestOptions{ + Scopes: []string{"https://management.azure.com/.default"}, + }) + if err != nil { + return "", fmt.Errorf("failed to get ARM token: %w", err) + } + req.Header.Set("Authorization", "Bearer "+token.Token) + + resp, err := r.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to call ARM compute API: %w", err) + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + + // Permission error — guide user to provide full ARM ID instead + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + return "", fmt.Errorf( + "insufficient permissions to resolve compute '%s'.\n"+ + " Provide the full ARM resource ID in your YAML instead:\n"+ + " compute: /subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s", + computeName, r.subscriptionID, r.resourceGroup, r.accountName, computeName, + ) + } + + if resp.StatusCode == http.StatusNotFound { + return "", fmt.Errorf("compute '%s' not found in account '%s'", computeName, r.accountName) + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("ARM compute API error (%d): %s", resp.StatusCode, string(body)) + } + + // Parse the response to extract the ARM resource ID + var result struct { + ID string `json:"id"` + Name string `json:"name"` + } + if err := json.Unmarshal(body, &result); err != nil { + return "", fmt.Errorf("failed to parse compute response: %w", err) + } + + if result.ID == "" { + return "", fmt.Errorf("compute '%s' response missing resource ID", computeName) + } + + fmt.Printf(" ✓ Compute resolved: %s\n", computeName) + return result.ID, nil } From b6b6a3f79a8335db85dc56bad1f9c2d95c04ca8b Mon Sep 17 00:00:00 2001 From: Saanika Gupta Date: Fri, 20 Mar 2026 10:28:00 +0530 Subject: [PATCH 2/2] Refactor --- .../internal/cmd/job_submit.go | 5 +- .../internal/service/compute_resolver.go | 102 ++---------------- .../pkg/client/client.go | 49 +++++++++ .../pkg/client/compute.go | 77 +++++++++++++ .../pkg/models/compute.go | 10 ++ 5 files changed, 149 insertions(+), 94 deletions(-) create mode 100644 cli/azd/extensions/azure.ai.customtraining/pkg/client/compute.go create mode 100644 cli/azd/extensions/azure.ai.customtraining/pkg/models/compute.go diff --git a/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go b/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go index 6ac1a13686f..970217a99a6 100644 --- a/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go +++ b/cli/azd/extensions/azure.ai.customtraining/internal/cmd/job_submit.go @@ -75,6 +75,9 @@ func newJobSubmitCommand() *cobra.Command { return fmt.Errorf("failed to create API client: %w", err) } + // Set ARM context for control plane calls (e.g. compute resolution) + apiClient.SetARMContext(subscriptionID, resourceGroup, accountName) + // Auto-generate job name if not provided (same pattern as AML SDK) if jobDef.Name == "" { jobDef.Name = utils.GenerateJobName() @@ -93,7 +96,7 @@ func newJobSubmitCommand() *cobra.Command { // Resolve references (compute name → ARM ID, local paths → datastore URIs) resolver := service.NewJobResolver( - service.NewDefaultComputeResolver(subscriptionID, resourceGroup, accountName, credential), + service.NewDefaultComputeResolver(apiClient), service.NewDefaultCodeResolver(uploadSvc, projectName), service.NewDefaultInputResolver(uploadSvc), ) diff --git a/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go b/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go index f0fef35babe..c330041943e 100644 --- a/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go +++ b/cli/azd/extensions/azure.ai.customtraining/internal/service/compute_resolver.go @@ -5,118 +5,34 @@ package service import ( "context" - "encoding/json" "fmt" - "io" - "net/http" - "net/url" - "time" - "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" -) - -const ( - armBaseURL = "https://management.azure.com" - armComputeAPIVersion = "2026-01-15-preview" + "azure.ai.customtraining/pkg/client" ) // DefaultComputeResolver resolves a compute name to a full ARM resource ID -// by calling the ARM control plane GET endpoint. -// -// ARM URL: -// -// GET https://management.azure.com/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.CognitiveServices/accounts/{account}/computes/{name}?api-version=2026-01-15-preview +// by calling the ARM control plane API via the Client. // // When compute GET moves to the data plane, this resolver can be swapped out // for a DataPlaneComputeResolver without changing any other code. type DefaultComputeResolver struct { - subscriptionID string - resourceGroup string - accountName string - credential azcore.TokenCredential - httpClient *http.Client + client *client.Client } -// NewDefaultComputeResolver creates a compute resolver that calls the ARM API. -// - subscriptionID: Azure subscription ID -// - resourceGroup: resource group containing the AI account -// - accountName: Azure AI Foundry account name -// - credential: token credential for ARM scope -func NewDefaultComputeResolver(subscriptionID, resourceGroup, accountName string, credential azcore.TokenCredential) *DefaultComputeResolver { +// NewDefaultComputeResolver creates a compute resolver that calls the ARM API +// via the given Client. The client must have ARM context set via SetARMContext. +func NewDefaultComputeResolver(apiClient *client.Client) *DefaultComputeResolver { return &DefaultComputeResolver{ - subscriptionID: subscriptionID, - resourceGroup: resourceGroup, - accountName: accountName, - credential: credential, - httpClient: &http.Client{Timeout: 30 * time.Second}, + client: apiClient, } } // ResolveCompute calls the ARM API to resolve a compute name to its full ARM resource ID. // Returns a helpful error message if the user lacks permissions (401/403). func (r *DefaultComputeResolver) ResolveCompute(ctx context.Context, computeName string) (string, error) { - reqURL := fmt.Sprintf( - "%s/subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s?api-version=%s", - armBaseURL, - url.PathEscape(r.subscriptionID), - url.PathEscape(r.resourceGroup), - url.PathEscape(r.accountName), - url.PathEscape(computeName), - armComputeAPIVersion, - ) - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) - if err != nil { - return "", fmt.Errorf("failed to create compute request: %w", err) - } - - // Get ARM-scoped bearer token - token, err := r.credential.GetToken(ctx, policy.TokenRequestOptions{ - Scopes: []string{"https://management.azure.com/.default"}, - }) - if err != nil { - return "", fmt.Errorf("failed to get ARM token: %w", err) - } - req.Header.Set("Authorization", "Bearer "+token.Token) - - resp, err := r.httpClient.Do(req) + result, err := r.client.GetCompute(ctx, computeName) if err != nil { - return "", fmt.Errorf("failed to call ARM compute API: %w", err) - } - defer resp.Body.Close() - - body, _ := io.ReadAll(resp.Body) - - // Permission error — guide user to provide full ARM ID instead - if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { - return "", fmt.Errorf( - "insufficient permissions to resolve compute '%s'.\n"+ - " Provide the full ARM resource ID in your YAML instead:\n"+ - " compute: /subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s", - computeName, r.subscriptionID, r.resourceGroup, r.accountName, computeName, - ) - } - - if resp.StatusCode == http.StatusNotFound { - return "", fmt.Errorf("compute '%s' not found in account '%s'", computeName, r.accountName) - } - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("ARM compute API error (%d): %s", resp.StatusCode, string(body)) - } - - // Parse the response to extract the ARM resource ID - var result struct { - ID string `json:"id"` - Name string `json:"name"` - } - if err := json.Unmarshal(body, &result); err != nil { - return "", fmt.Errorf("failed to parse compute response: %w", err) - } - - if result.ID == "" { - return "", fmt.Errorf("compute '%s' response missing resource ID", computeName) + return "", err } fmt.Printf(" ✓ Compute resolved: %s\n", computeName) diff --git a/cli/azd/extensions/azure.ai.customtraining/pkg/client/client.go b/cli/azd/extensions/azure.ai.customtraining/pkg/client/client.go index 231b8ae3a2b..8e44353605f 100644 --- a/cli/azd/extensions/azure.ai.customtraining/pkg/client/client.go +++ b/cli/azd/extensions/azure.ai.customtraining/pkg/client/client.go @@ -25,12 +25,19 @@ const ( ) // Client is an HTTP client for Azure AI Foundry project APIs. +// It supports both data plane calls (via project endpoint) and ARM +// control plane calls (via SetARMContext). type Client struct { baseURL string subPath string apiVersion string credential azcore.TokenCredential httpClient *http.Client + + // ARM context fields (set via SetARMContext) + subscriptionID string + resourceGroup string + accountName string } // NewClient creates a new client from a project endpoint URL. @@ -74,6 +81,48 @@ func NewClient(projectEndpoint string, credential azcore.TokenCredential) (*Clie }, nil } +// SetARMContext configures the client for ARM control plane calls. +// Required before calling ARM methods like GetCompute. +func (c *Client) SetARMContext(subscriptionID, resourceGroup, accountName string) { + c.subscriptionID = subscriptionID + c.resourceGroup = resourceGroup + c.accountName = accountName +} + +// doARM executes an authenticated HTTP request against the ARM control plane. +// The path should be relative to https://management.azure.com/ (no leading slash). +func (c *Client) doARM(ctx context.Context, method, path string, body interface{}, apiVersion string) (*http.Response, error) { + reqURL := fmt.Sprintf("https://management.azure.com/%s?api-version=%s", path, apiVersion) + + fmt.Printf("[DEBUG] %s %s\n", method, reqURL) + + var bodyReader io.Reader + if body != nil { + data, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to marshal request body: %w", err) + } + bodyReader = bytes.NewReader(data) + } + + req, err := http.NewRequestWithContext(ctx, method, reqURL, bodyReader) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + if err := c.addAuth(ctx, req, ARMScope); err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + + return resp, nil +} + // doDataPlane executes an authenticated HTTP request against the data plane. func (c *Client) doDataPlane(ctx context.Context, method, path string, body interface{}, queryParams ...string) (*http.Response, error) { reqURL := fmt.Sprintf("%s%s/%s?api-version=%s", c.baseURL, c.subPath, path, c.apiVersion) diff --git a/cli/azd/extensions/azure.ai.customtraining/pkg/client/compute.go b/cli/azd/extensions/azure.ai.customtraining/pkg/client/compute.go new file mode 100644 index 00000000000..d25f85fbaec --- /dev/null +++ b/cli/azd/extensions/azure.ai.customtraining/pkg/client/compute.go @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package client + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + + "azure.ai.customtraining/pkg/models" +) + +const ( + // armComputeAPIVersion is the ARM API version for compute operations. + // When compute GET moves to the data plane, this file can be removed + // in favor of a data plane compute method. + armComputeAPIVersion = "2026-01-15-preview" +) + +// GetCompute retrieves a compute resource by name from the ARM control plane. +// +// ARM URL: +// +// GET https://management.azure.com/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.CognitiveServices/accounts/{account}/computes/{name}?api-version=2026-01-15-preview +// +// Requires SetARMContext to be called first. +func (c *Client) GetCompute(ctx context.Context, computeName string) (*models.ComputeResource, error) { + if c.subscriptionID == "" || c.resourceGroup == "" || c.accountName == "" { + return nil, fmt.Errorf("ARM context not configured; call SetARMContext first") + } + + path := fmt.Sprintf( + "subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s", + url.PathEscape(c.subscriptionID), + url.PathEscape(c.resourceGroup), + url.PathEscape(c.accountName), + url.PathEscape(computeName), + ) + + resp, err := c.doARM(ctx, http.MethodGet, path, nil, armComputeAPIVersion) + if err != nil { + return nil, fmt.Errorf("failed to call ARM compute API: %w", err) + } + defer resp.Body.Close() + + // Permission error — guide user to provide full ARM ID instead + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + return nil, fmt.Errorf( + "insufficient permissions to resolve compute '%s'.\n"+ + " Provide the full ARM resource ID in your YAML instead:\n"+ + " compute: /subscriptions/%s/resourceGroups/%s/providers/Microsoft.CognitiveServices/accounts/%s/computes/%s", + computeName, c.subscriptionID, c.resourceGroup, c.accountName, computeName, + ) + } + + if resp.StatusCode == http.StatusNotFound { + return nil, fmt.Errorf("compute '%s' not found in account '%s'", computeName, c.accountName) + } + + if resp.StatusCode != http.StatusOK { + return nil, c.HandleError(resp) + } + + var result models.ComputeResource + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to parse compute response: %w", err) + } + + if result.ID == "" { + return nil, fmt.Errorf("compute '%s' response missing resource ID", computeName) + } + + return &result, nil +} diff --git a/cli/azd/extensions/azure.ai.customtraining/pkg/models/compute.go b/cli/azd/extensions/azure.ai.customtraining/pkg/models/compute.go new file mode 100644 index 00000000000..76b3514ad2a --- /dev/null +++ b/cli/azd/extensions/azure.ai.customtraining/pkg/models/compute.go @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package models + +// ComputeResource represents an ARM compute resource returned by the control plane. +type ComputeResource struct { + ID string `json:"id"` + Name string `json:"name"` +}