diff --git a/e2e/postgrest_test.go b/e2e/postgrest_test.go index 4d634aa2..39717ff7 100644 --- a/e2e/postgrest_test.go +++ b/e2e/postgrest_test.go @@ -203,6 +203,18 @@ func TestPostgRESTPreflight_MissingSchema(t *testing.T) { }) require.Error(t, err, "expected update task to fail due to missing schema") assert.Contains(t, err.Error(), "nonexistent_schema", "error should mention the missing schema") + + // It should succeed when we remove the nonexistent schema. + err = db.Update(ctx, UpdateOptions{ + Spec: postgrestBaseSpec( + "test_postgrest_preflight_schema", + []string{host1}, + []*controlplane.ServiceSpec{ + postgrestSpec(host1, 0, nil), + }, + ), + }) + require.NoError(t, err) } // TestPostgRESTPreflight_MissingAnonRole verifies the preflight check rejects @@ -236,6 +248,20 @@ func TestPostgRESTPreflight_MissingAnonRole(t *testing.T) { }) require.Error(t, err, "expected update task to fail due to missing anon role") assert.Contains(t, err.Error(), "nonexistent_role", "error should mention the missing role") + + // It should succeed when we switch to an existing role. + err = db.Update(ctx, UpdateOptions{ + Spec: postgrestBaseSpec( + "test_postgrest_preflight_role", + []string{host1}, + []*controlplane.ServiceSpec{ + postgrestSpec(host1, 0, map[string]any{ + "db_anon_role": "anon", + }), + }, + ), + }) + require.NoError(t, err) } // TestPostgRESTHealthCheck verifies the service responds to HTTP requests once running. diff --git a/server/internal/database/operations/golden_test/TestUpdateDatabase/update_after_failed_service_deployment.json b/server/internal/database/operations/golden_test/TestUpdateDatabase/update_after_failed_service_deployment.json new file mode 100644 index 00000000..18994d9e --- /dev/null +++ b/server/internal/database/operations/golden_test/TestUpdateDatabase/update_after_failed_service_deployment.json @@ -0,0 +1,43 @@ +[ + [ + [ + { + "type": "create", + "resource_id": "swarm.network::database-id", + "reason": "does_not_exist", + "diff": null + } + ], + [ + { + "type": "create", + "resource_id": "swarm.service_instance_spec::database-id-test-svc-host-1-id", + "reason": "does_not_exist", + "diff": null + } + ], + [ + { + "type": "create", + "resource_id": "swarm.service_instance::database-id-test-svc-host-1-id", + "reason": "does_not_exist", + "diff": null + } + ], + [ + { + "type": "create", + "resource_id": "monitor.service_instance::database-id-test-svc-host-1-id", + "reason": "does_not_exist", + "diff": null + } + ], + [ + { + "type": "delete", + "resource_id": "swarm.network::database-id-should-delete", + "diff": null + } + ] + ] +] \ No newline at end of file diff --git a/server/internal/database/operations/update_database.go b/server/internal/database/operations/update_database.go index 69b019cb..3ec57341 100644 --- a/server/internal/database/operations/update_database.go +++ b/server/internal/database/operations/update_database.go @@ -40,6 +40,9 @@ func UpdateDatabase( nodes []*NodeResources, services []*ServiceResources, ) ([]resource.Plan, error) { + // avoid modifying the caller's copy of the start state + start = start.Clone() + update, err := updateFunc(options) if err != nil { return nil, err @@ -81,7 +84,7 @@ func UpdateDatabase( } // Mark resources not in the end state with PendingDeletion = true so that // we skip updating them. - start.MarkPendingDeletion(end) + start.PreparePendingDeletesAndRecreates(end) // The states produced by the *Nodes functions are just diffs. Here's where // we create a sequence of incremental updates by iteratively applying those diff --git a/server/internal/database/operations/update_database_test.go b/server/internal/database/operations/update_database_test.go index fbcd178c..7cf301a1 100644 --- a/server/internal/database/operations/update_database_test.go +++ b/server/internal/database/operations/update_database_test.go @@ -216,6 +216,35 @@ func TestUpdateDatabase(t *testing.T) { ), ) + failedService := makeServiceResources(t, "database-id", "test-svc", "host-1-id", nil) + // This first resource should have a planned delete since it won't be in the + // end state. + failedService.Resources[0].Identifier.ID += "-should-delete" + failedService.Resources[0].NeedsRecreate = true + // This resource should have a planned create since it is in the end state. + failedService.Resources[1].NeedsRecreate = true + failedServiceState := makeState(t, + []resource.Resource{ + n1Instance1.Instance, + makeMonitorResource(n1Instance1), + &database.NodeResource{ + Name: "n1", + PrimaryInstanceID: n1Instance1.InstanceID(), + InstanceIDs: []string{n1Instance1.InstanceID()}, + }, + &database.PostgresDatabaseResource{ + NodeName: "n1", + DatabaseName: "test", + }, + }, + slices.Concat( + n1Instance1.InstanceDependencies, + // Only the first two resources got deployed, and both are marked + // with needs_recreate = true. + failedService.Resources[:2], + ), + ) + for _, tc := range []struct { name string options operations.UpdateDatabaseOptions @@ -629,6 +658,19 @@ func TestUpdateDatabase(t *testing.T) { }, services: []*operations.ServiceResources{svcRes}, }, + { + name: "update after failed service deployment", + options: operations.UpdateDatabaseOptions{}, + start: failedServiceState, + nodes: []*operations.NodeResources{ + { + NodeName: "n1", + InstanceResources: []*database.InstanceResources{n1Instance1}, + DatabaseName: "test", + }, + }, + services: []*operations.ServiceResources{svcRes}, + }, } { t.Run(tc.name, func(t *testing.T) { plans, err := operations.UpdateDatabase( diff --git a/server/internal/resource/state.go b/server/internal/resource/state.go index ce4520a8..a12b4b15 100644 --- a/server/internal/resource/state.go +++ b/server/internal/resource/state.go @@ -139,9 +139,11 @@ func (s *State) Merge(other *State) { } } -// MarkPendingDeletion takes an end state and marks all current resources that -// aren't in the end state with PendingDeletion = true. -func (s *State) MarkPendingDeletion(end *State) { +// PreparePendingDeletesAndRecreates takes an end state and marks all current +// resources that aren't in the end state with PendingDeletion = true. Resources +// that need to be recreated and are in the end state are removed from this +// state so that we can plan their creation in the appropriate phase. +func (s *State) PreparePendingDeletesAndRecreates(end *State) { for t, byType := range s.Resources { endByType, ok := end.Resources[t] if !ok { @@ -153,6 +155,11 @@ func (s *State) MarkPendingDeletion(end *State) { for id, resource := range byType { if _, ok := endByType[id]; !ok { resource.PendingDeletion = true + } else if resource.NeedsRecreate { + // This resource will be recreated since it's in the end state. + // Removing the old copy of it from the start state avoids an + // accidental create before it would normally occur. + delete(byType, id) } } }