diff --git a/cli/azd/pkg/azapi/deployment_state_test.go b/cli/azd/pkg/azapi/deployment_state_test.go new file mode 100644 index 00000000000..ae100b98924 --- /dev/null +++ b/cli/azd/pkg/azapi/deployment_state_test.go @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package azapi + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsActiveDeploymentState(t *testing.T) { + active := []DeploymentProvisioningState{ + DeploymentProvisioningStateAccepted, + DeploymentProvisioningStateCanceling, + DeploymentProvisioningStateCreating, + DeploymentProvisioningStateDeleting, + DeploymentProvisioningStateDeletingResources, + DeploymentProvisioningStateDeploying, + DeploymentProvisioningStateRunning, + DeploymentProvisioningStateUpdating, + DeploymentProvisioningStateUpdatingDenyAssignments, + DeploymentProvisioningStateValidating, + DeploymentProvisioningStateWaiting, + } + + for _, state := range active { + t.Run(string(state), func(t *testing.T) { + require.True(t, IsActiveDeploymentState(state), + "expected %s to be active", state) + }) + } + + inactive := []DeploymentProvisioningState{ + DeploymentProvisioningStateSucceeded, + DeploymentProvisioningStateFailed, + DeploymentProvisioningStateCanceled, + DeploymentProvisioningStateDeleted, + DeploymentProvisioningStateNotSpecified, + DeploymentProvisioningStateReady, + } + + for _, state := range inactive { + t.Run(string(state), func(t *testing.T) { + require.False(t, IsActiveDeploymentState(state), + "expected %s to be inactive", state) + }) + } +} diff --git a/cli/azd/pkg/azapi/deployments.go b/cli/azd/pkg/azapi/deployments.go index 1e079370a4c..886d1e7c47c 100644 --- a/cli/azd/pkg/azapi/deployments.go +++ b/cli/azd/pkg/azapi/deployments.go @@ -107,6 +107,28 @@ const ( DeploymentProvisioningStateUpdating DeploymentProvisioningState = "Updating" ) +// IsActiveDeploymentState reports whether the given provisioning state +// indicates a deployment that is still in progress, including transitional +// states like canceling or deleting that can still block new deployments. +func IsActiveDeploymentState(state DeploymentProvisioningState) bool { + switch state { + case DeploymentProvisioningStateAccepted, + DeploymentProvisioningStateCanceling, + DeploymentProvisioningStateCreating, + DeploymentProvisioningStateDeleting, + DeploymentProvisioningStateDeletingResources, + DeploymentProvisioningStateDeploying, + DeploymentProvisioningStateRunning, + DeploymentProvisioningStateUpdating, + DeploymentProvisioningStateUpdatingDenyAssignments, + DeploymentProvisioningStateValidating, + DeploymentProvisioningStateWaiting: + return true + default: + return false + } +} + type DeploymentService interface { GenerateDeploymentName(baseName string) string CalculateTemplateHash( diff --git a/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go b/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go new file mode 100644 index 00000000000..9ff8b7aadde --- /dev/null +++ b/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go @@ -0,0 +1,212 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package bicep + +import ( + "context" + "fmt" + "sync/atomic" + "testing" + "time" + + "github.com/azure/azure-dev/cli/azd/pkg/azapi" + "github.com/azure/azure-dev/cli/azd/pkg/infra" + "github.com/azure/azure-dev/cli/azd/test/mocks/mockinput" + "github.com/stretchr/testify/require" +) + +// activeDeploymentScope is a test helper that implements infra.Scope and lets +// the caller control what ListDeployments returns on each call. The standalone +// infra.ListActiveDeployments function filters these results. +type activeDeploymentScope struct { + // calls tracks how many times ListDeployments has been invoked. + calls atomic.Int32 + // activePerCall maps a 0-based call index to the list of deployments + // returned for that call. If the index is missing, nil is returned. + activePerCall map[int][]*azapi.ResourceDeployment + // errOnCall, if non-nil, maps a call index to an error to return. + errOnCall map[int]error +} + +func (s *activeDeploymentScope) SubscriptionId() string { return "test-sub" } + +func (s *activeDeploymentScope) Deployment(_ string) infra.Deployment { return nil } + +func (s *activeDeploymentScope) ListDeployments( + _ context.Context, +) ([]*azapi.ResourceDeployment, error) { + idx := int(s.calls.Add(1)) - 1 + if s.errOnCall != nil { + if e, ok := s.errOnCall[idx]; ok { + return nil, e + } + } + if s.activePerCall != nil { + return s.activePerCall[idx], nil + } + return nil, nil +} + +// newTestProvider returns a BicepProvider with fast poll settings for tests. +func newTestProvider() *BicepProvider { + return &BicepProvider{ + console: mockinput.NewMockConsole(), + activeDeployPollInterval: 10 * time.Millisecond, + activeDeployTimeout: 2 * time.Second, + } +} + +func TestWaitForActiveDeployments_NoActive(t *testing.T) { + scope := &activeDeploymentScope{} + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope) + require.NoError(t, err) + require.Equal(t, int32(1), scope.calls.Load(), + "should call ListActiveDeployments once") +} + +func TestWaitForActiveDeployments_InitialListError_NotFound(t *testing.T) { + scope := &activeDeploymentScope{ + errOnCall: map[int]error{ + 0: fmt.Errorf("listing: %w", infra.ErrDeploymentsNotFound), + }, + } + p := newTestProvider() + + // ErrDeploymentsNotFound (resource group doesn't exist yet) is safe to ignore. + err := p.waitForActiveDeployments(t.Context(), scope) + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_InitialListError_Other(t *testing.T) { + scope := &activeDeploymentScope{ + errOnCall: map[int]error{ + 0: fmt.Errorf("auth failure: access denied"), + }, + } + p := newTestProvider() + + // Non-NotFound errors should propagate so the user knows the check failed. + err := p.waitForActiveDeployments(t.Context(), scope) + require.Error(t, err) + require.Contains(t, err.Error(), "checking for active deployments") +} + +func TestWaitForActiveDeployments_ActiveThenClear(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, // first call: active + // second call (index 1): missing key → returns nil (no active) + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope) + require.NoError(t, err) + require.Equal(t, int32(2), scope.calls.Load(), + "should poll once, then see clear") +} + +func TestWaitForActiveDeployments_CancelledContext(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-forever", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + // Always return active deployments. + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + }, + } + p := newTestProvider() + + // Cancel immediately so the wait loop exits on the first select. + cancel() + + err := p.waitForActiveDeployments(ctx, scope) + require.ErrorIs(t, err, context.Canceled) +} + +func TestWaitForActiveDeployments_PollError(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + }, + errOnCall: map[int]error{ + 1: fmt.Errorf("transient ARM failure"), + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope) + require.Error(t, err) + require.Contains(t, err.Error(), "transient ARM failure") +} + +func TestWaitForActiveDeployments_PollNotFound(t *testing.T) { + // If the resource group is deleted externally while polling, + // ListDeployments returns ErrDeploymentsNotFound. The wait should + // treat this as "no active deployments" and return nil. + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + }, + errOnCall: map[int]error{ + 1: infra.ErrDeploymentsNotFound, + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope) + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_Timeout(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "stuck-deploy", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + // Return active on every call. + perCall := make(map[int][]*azapi.ResourceDeployment) + for i := range 200 { + perCall[i] = running + } + + scope := &activeDeploymentScope{activePerCall: perCall} + p := &BicepProvider{ + console: mockinput.NewMockConsole(), + activeDeployPollInterval: 5 * time.Millisecond, + activeDeployTimeout: 50 * time.Millisecond, + } + + err := p.waitForActiveDeployments(t.Context(), scope) + require.Error(t, err) + require.Contains(t, err.Error(), "timed out") + require.Contains(t, err.Error(), "stuck-deploy") +} diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go index 8f00c63a7ca..9c408f12faf 100644 --- a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go @@ -91,6 +91,12 @@ type BicepProvider struct { // Internal state // compileBicepResult is cached to avoid recompiling the same bicep file multiple times in the same azd run. compileBicepMemoryCache *compileBicepResult + + // activeDeployPollInterval and activeDeployTimeout override the defaults + // for the active-deployment wait loop. Zero means use the default. These + // are only set in tests. + activeDeployPollInterval time.Duration + activeDeployTimeout time.Duration } // Name gets the name of the infra provider @@ -607,6 +613,95 @@ func logDS(msg string, v ...any) { log.Printf("%s : %s", "deployment-state: ", fmt.Sprintf(msg, v...)) } +const ( + // defaultActiveDeploymentPollInterval is how often we re-check for active deployments. + defaultActiveDeploymentPollInterval = 30 * time.Second + // defaultActiveDeploymentTimeout caps the total wait time for active deployments. + defaultActiveDeploymentTimeout = 30 * time.Minute +) + +// waitForActiveDeployments checks for deployments that are already in progress +// at the target scope. If any are found it logs a warning and polls until they +// finish or the timeout is reached. +func (p *BicepProvider) waitForActiveDeployments( + ctx context.Context, + scope infra.Scope, +) error { + active, err := infra.ListActiveDeployments(ctx, scope) + if err != nil { + // If the resource group doesn't exist yet, there are no active + // deployments — proceed normally. + if errors.Is(err, infra.ErrDeploymentsNotFound) { + return nil + } + // For other errors (auth, throttling, transient), surface them + // so the user knows the pre-check couldn't run. + log.Printf( + "active-deployment-check: unable to list deployments: %v", err) + return fmt.Errorf("checking for active deployments: %w", err) + } + + if len(active) == 0 { + return nil + } + + names := make([]string, len(active)) + for i, d := range active { + names[i] = d.Name + } + p.console.MessageUxItem(ctx, &ux.WarningMessage{ + Description: fmt.Sprintf( + "Waiting for %d active deployment(s) to complete: %s", + len(active), strings.Join(names, ", ")), + }) + + p.console.ShowSpinner(ctx, + "Waiting for active deployment(s) to complete", input.Step) + defer p.console.StopSpinner(ctx, "", input.StepDone) + + pollInterval := p.activeDeployPollInterval + if pollInterval == 0 { + pollInterval = defaultActiveDeploymentPollInterval + } + timeout := p.activeDeployTimeout + if timeout == 0 { + timeout = defaultActiveDeploymentTimeout + } + + deadline := time.After(timeout) + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline: + // Refresh names from latest poll for an accurate timeout message + currentNames := make([]string, len(active)) + for i, d := range active { + currentNames[i] = d.Name + } + return fmt.Errorf( + "timed out after %s waiting for active "+ + "deployment(s) to complete: %s", + timeout, strings.Join(currentNames, ", ")) + case <-ticker.C: + active, err = infra.ListActiveDeployments(ctx, scope) + if err != nil { + if errors.Is(err, infra.ErrDeploymentsNotFound) { + return nil + } + return fmt.Errorf( + "checking active deployments: %w", err) + } + if len(active) == 0 { + return nil + } + } + } +} + // Provisioning the infrastructure within the specified template func (p *BicepProvider) Deploy(ctx context.Context) (*provisioning.DeployResult, error) { if p.ignoreDeploymentState { @@ -718,6 +813,15 @@ func (p *BicepProvider) Deploy(ctx context.Context) (*provisioning.DeployResult, p.console.StopSpinner(ctx, "", input.StepDone) } + // Check for active deployments at the target scope and wait if any are in progress. + // Use scopeForTemplate to get the raw scope — deployment.Scope may have a nil + // inner scope in test mocks. + if activeScope, err := p.scopeForTemplate(planned.Template); err == nil { + if err := p.waitForActiveDeployments(ctx, activeScope); err != nil { + return nil, err + } + } + progressCtx, cancelProgress := context.WithCancel(ctx) var wg sync.WaitGroup wg.Add(1) diff --git a/cli/azd/pkg/infra/scope.go b/cli/azd/pkg/infra/scope.go index 303766d2d95..dcccc79c479 100644 --- a/cli/azd/pkg/infra/scope.go +++ b/cli/azd/pkg/infra/scope.go @@ -24,6 +24,31 @@ type Scope interface { Deployment(deploymentName string) Deployment } +// ListActiveDeployments lists all deployments at the given scope and returns +// only those with an active provisioning state (Running, Deploying, etc.). +func ListActiveDeployments( + ctx context.Context, + scope Scope, +) ([]*azapi.ResourceDeployment, error) { + all, err := scope.ListDeployments(ctx) + if err != nil { + return nil, err + } + + return filterActiveDeployments(all), nil +} + +// filterActiveDeployments returns only deployments with an active provisioning state. +func filterActiveDeployments(deployments []*azapi.ResourceDeployment) []*azapi.ResourceDeployment { + var active []*azapi.ResourceDeployment + for _, d := range deployments { + if azapi.IsActiveDeploymentState(d.ProvisioningState) { + active = append(active, d) + } + } + return active +} + type Deployment interface { Scope // Name is the name of this deployment. diff --git a/cli/azd/resources/error_suggestions.yaml b/cli/azd/resources/error_suggestions.yaml index 5dc30847f2f..2008a226312 100644 --- a/cli/azd/resources/error_suggestions.yaml +++ b/cli/azd/resources/error_suggestions.yaml @@ -49,6 +49,17 @@ rules: # 4th most common error category (~128,054 errors in 90-day analysis) # ============================================================================ + - errorType: "DeploymentErrorLine" + properties: + Code: "DeploymentActive" + message: "Another deployment is already in progress on this scope." + suggestion: > + Wait for the current deployment to complete, then retry. + You can check deployment status in the Azure portal under the Deployments blade. + links: + - url: "https://learn.microsoft.com/azure/azure-resource-manager/troubleshooting/error-deployment-active" + title: "Troubleshoot DeploymentActive errors" + - errorType: "DeploymentErrorLine" properties: Code: "FlagMustBeSetForRestore"