From 7cf3aa86e77bc331b53557b669244a4a3e699242 Mon Sep 17 00:00:00 2001 From: acmore Date: Tue, 14 Apr 2026 11:48:41 +0800 Subject: [PATCH] feat(down): add wait-for-termination flag --- docs/command-reference.md | 6 +- docs/quickstart.md | 3 + internal/cli/down.go | 206 +++++++++++++++++++++++++++++++++++--- internal/cli/down_test.go | 154 ++++++++++++++++++++++++++++ internal/cli/timeouts.go | 6 ++ 5 files changed, 361 insertions(+), 14 deletions(-) diff --git a/docs/command-reference.md b/docs/command-reference.md index 0d4636d1..1b20d68c 100644 --- a/docs/command-reference.md +++ b/docs/command-reference.md @@ -19,7 +19,7 @@ - `okdev template show ` - `okdev validate` - `okdev up [--wait-timeout 10m] [--dry-run]` -- `okdev down [session] [--delete-pvc] [--dry-run] [--output json]` +- `okdev down [session] [--delete-pvc] [--dry-run] [--wait] [--wait-timeout 2m] [--output json]` - `okdev status [session] [--all] [--all-users] [--details]` - `okdev list [--all-namespaces] [--all-users]` - `okdev use ` @@ -143,12 +143,14 @@ - When `sync.engine=syncthing`, `okdev up` refreshes the session's local Syncthing processes, starts background sync in bidirectional mode by default, and waits for the initial sync to converge before exiting. - `spec.ports` is materialized as SSH `LocalForward` or `RemoteForward` based on `direction`. -### `okdev down [session] [--delete-pvc] [--dry-run] [--output json]` +### `okdev down [session] [--delete-pvc] [--dry-run] [--wait] [--wait-timeout 2m] [--output json]` - Deletes the current session workload and cleans up local SSH/sync metadata. - When `session` is provided, `okdev` can resolve the saved config from session metadata even outside the repo. - Prompts for confirmation by default; use `--yes` in scripts or non-interactive environments. - `--dry-run`: previews what would be deleted without removing cluster or local state. +- `--wait`: waits for the workload object to disappear and then for any remaining session pods to terminate before returning. +- `--wait-timeout`: caps the total time spent waiting for workload/pod termination when `--wait` is enabled. - `--output json`: emits a machine-readable summary of the planned or completed deletion and local cleanup steps. - `--delete-pvc` remains accepted for compatibility but is ignored; `okdev` no longer manages PVC lifecycle automatically. diff --git a/docs/quickstart.md b/docs/quickstart.md index 667c6966..93769a9e 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -191,6 +191,9 @@ okdev down --dry-run ```bash okdev down + +# Block until the workload object and session pods are fully gone +okdev down --wait okdev prune --ttl-hours 72 ``` diff --git a/internal/cli/down.go b/internal/cli/down.go index abd99038..3a69649a 100644 --- a/internal/cli/down.go +++ b/internal/cli/down.go @@ -2,32 +2,50 @@ package cli import ( "bufio" + "context" + "errors" "fmt" "io" "log/slog" "os" "path/filepath" "strings" + "time" + "github.com/acmore/okdev/internal/kube" "github.com/acmore/okdev/internal/session" + "github.com/acmore/okdev/internal/workload" "github.com/spf13/cobra" ) +var downWaitPollInterval = 200 * time.Millisecond + +type downWaitOutput struct { + Enabled bool `json:"enabled"` + Timeout string `json:"timeout,omitempty"` + Status string `json:"status,omitempty"` + WorkloadDeleted bool `json:"workloadDeleted,omitempty"` + PodsDeleted bool `json:"podsDeleted,omitempty"` +} + type downOutput struct { - Session string `json:"session"` - Namespace string `json:"namespace"` - Kind string `json:"kind"` - Workload string `json:"workload"` - DryRun bool `json:"dryRun"` - Deleted bool `json:"deleted"` - Status string `json:"status"` - Notes []string `json:"notes,omitempty"` - Cleanup map[string]any `json:"cleanup,omitempty"` + Session string `json:"session"` + Namespace string `json:"namespace"` + Kind string `json:"kind"` + Workload string `json:"workload"` + DryRun bool `json:"dryRun"` + Deleted bool `json:"deleted"` + Status string `json:"status"` + Notes []string `json:"notes,omitempty"` + Wait *downWaitOutput `json:"wait,omitempty"` + Cleanup map[string]any `json:"cleanup,omitempty"` } func newDownCmd(opts *Options) *cobra.Command { var deletePVC bool var dryRun bool + var wait bool + var waitTimeout time.Duration var yes bool cmd := &cobra.Command{ @@ -46,6 +64,9 @@ func newDownCmd(opts *Options) *cobra.Command { # Emit a machine-readable delete summary okdev down --output json --yes + # Delete and wait for the workload and matching pods to disappear + okdev down --wait + # Delete a specific session okdev down my-feature -y`, RunE: func(cmd *cobra.Command, args []string) error { @@ -73,7 +94,7 @@ func newDownCmd(opts *Options) *cobra.Command { return err } ui.stepDone("ownership", "ok") - ctx, cancel := defaultContext() + ctx, cancel := downCommandContext(wait, waitTimeout) defer cancel() exists, err := shouldReuseExistingWorkload(ctx, cc.kube, cc.namespace, runtime) if err != nil { @@ -89,6 +110,13 @@ func newDownCmd(opts *Options) *cobra.Command { Deleted: false, Status: "planned", } + if wait { + payload.Wait = &downWaitOutput{ + Enabled: true, + Timeout: waitTimeout.String(), + Status: "planned", + } + } if deletePVC { payload.Notes = append(payload.Notes, "--delete-pvc ignored: okdev no longer manages PVC lifecycle") } @@ -109,6 +137,9 @@ func newDownCmd(opts *Options) *cobra.Command { if deletePVC { fmt.Fprintln(cmd.OutOrStdout(), "- note: --delete-pvc is ignored (okdev no longer manages PVC lifecycle)") } + if wait { + fmt.Fprintf(cmd.OutOrStdout(), "- would wait for workload deletion and session pods to terminate (timeout=%s)\n", waitTimeout) + } return nil } @@ -124,6 +155,15 @@ func newDownCmd(opts *Options) *cobra.Command { Notes: []string{"session workload already absent"}, Cleanup: map[string]any{}, } + if wait { + payload.Wait = &downWaitOutput{ + Enabled: true, + Timeout: waitTimeout.String(), + Status: "already stopped", + WorkloadDeleted: true, + PodsDeleted: true, + } + } if err := downCleanupLocal(ui, &payload, cc.sessionName); err != nil { return err } @@ -160,6 +200,14 @@ func newDownCmd(opts *Options) *cobra.Command { Status: "stopped", Cleanup: map[string]any{}, } + if wait { + payload.Status = "terminating" + payload.Wait = &downWaitOutput{ + Enabled: true, + Timeout: waitTimeout.String(), + Status: "waiting", + } + } if len(cc.cfg.Spec.Agents) > 0 { if target, err := resolveTargetRef(ctx, cc.opts, cc.cfg, cc.namespace, cc.sessionName, cc.kube); err != nil { ui.warnf("failed to resolve target before agent auth cleanup: %v", err) @@ -179,6 +227,21 @@ func newDownCmd(opts *Options) *cobra.Command { return fmt.Errorf("delete session %s: %w", runtime.Kind(), err) } ui.stepDone(runtime.Kind(), "deleted") + var waitErr error + if wait { + ui.section("Wait") + ui.stepRun("termination", runtime.WorkloadName()) + waitResult, err := waitForDownDeletion(ctx, cc.kube, cc.namespace, cc.sessionName, runtime, waitTimeout) + payload.Wait = &waitResult + if err != nil { + waitErr = err + payload.Status = "terminating" + ui.warnf("%v", err) + } else { + payload.Status = "stopped" + ui.stepDone("termination", "workload deleted and session pods gone") + } + } if deletePVC { ui.warnf("--delete-pvc ignored: okdev no longer manages PVC lifecycle; delete PVCs manually if needed") payload.Notes = append(payload.Notes, "--delete-pvc ignored: okdev no longer manages PVC lifecycle; delete PVCs manually if needed") @@ -189,24 +252,143 @@ func newDownCmd(opts *Options) *cobra.Command { return err } if cc.opts.Output == "json" { - return outputJSON(cmd.OutOrStdout(), payload) + if err := outputJSON(cmd.OutOrStdout(), payload); err != nil { + return err + } + return waitErr + } + if waitErr != nil { + ui.printWarnings() + return waitErr } ui.printWarnings() ui.section("Ready") fmt.Fprintf(cmd.OutOrStdout(), "session: %s\n", cc.sessionName) fmt.Fprintf(cmd.OutOrStdout(), "namespace: %s\n", cc.namespace) fmt.Fprintln(cmd.OutOrStdout(), "status: stopped") - fmt.Fprintln(cmd.OutOrStdout(), "workspace: pod deleted; volumes/PVCs unchanged") + if wait { + fmt.Fprintln(cmd.OutOrStdout(), "workspace: workload deleted and session pods terminated; volumes/PVCs unchanged") + } else { + fmt.Fprintln(cmd.OutOrStdout(), "workspace: pod deleted; volumes/PVCs unchanged") + } return nil }, } cmd.Flags().BoolVarP(&yes, "yes", "y", false, "Skip confirmation prompt") cmd.Flags().BoolVar(&deletePVC, "delete-pvc", false, "Delete workspace PVC for this session") cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Preview actions without deleting resources") + cmd.Flags().BoolVar(&wait, "wait", false, "Wait for workload deletion and session pod termination") + cmd.Flags().DurationVar(&waitTimeout, "wait-timeout", downDefaultWaitTimeout, "Wait timeout for workload termination") _ = cmd.Flags().MarkDeprecated("delete-pvc", "PVC lifecycle is no longer managed; delete PVCs manually if needed") return cmd } +type downWaitClient interface { + ResourceExists(context.Context, string, string, string, string) (bool, error) + ListPods(context.Context, string, bool, string) ([]kube.PodSummary, error) +} + +func downCommandContext(wait bool, waitTimeout time.Duration) (context.Context, context.CancelFunc) { + timeout := defaultContextTimeout + if wait { + needed := waitTimeout + downContextBuffer + if needed > timeout { + timeout = needed + } + } + return context.WithTimeout(context.Background(), timeout) +} + +func waitForDownDeletion(ctx context.Context, k downWaitClient, namespace, sessionName string, runtime workload.Runtime, timeout time.Duration) (downWaitOutput, error) { + result := downWaitOutput{ + Enabled: true, + Timeout: timeout.String(), + Status: "waiting", + } + ref, ok := runtime.(workload.RefProvider) + if !ok { + return result, fmt.Errorf("wait for %s deletion is unsupported", runtime.Kind()) + } + apiVersion, kind, name, err := ref.WorkloadRef() + if err != nil { + return result, fmt.Errorf("resolve workload reference for wait: %w", err) + } + waitCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + if err := waitForResourceDeletion(waitCtx, k, namespace, apiVersion, kind, name); err != nil { + result.Status = "timed out" + return result, err + } + result.WorkloadDeleted = true + if err := waitForSessionPodsDeleted(waitCtx, k, namespace, sessionName); err != nil { + result.Status = "timed out" + return result, err + } + result.PodsDeleted = true + result.Status = "completed" + return result, nil +} + +func waitForResourceDeletion(ctx context.Context, k downWaitClient, namespace, apiVersion, kind, name string) error { + timeoutMessage := fmt.Sprintf("timed out waiting for %s/%s to be deleted", kind, name) + ticker := time.NewTicker(downWaitPollInterval) + defer ticker.Stop() + for { + exists, err := k.ResourceExists(ctx, namespace, apiVersion, kind, name) + if err != nil { + if ctx.Err() != nil { + return downWaitContextError(ctx, timeoutMessage) + } + return fmt.Errorf("check %s/%s deletion: %w", kind, name, err) + } + if !exists { + return nil + } + select { + case <-ctx.Done(): + return downWaitContextError(ctx, timeoutMessage) + case <-ticker.C: + } + } +} + +func waitForSessionPodsDeleted(ctx context.Context, k downWaitClient, namespace, sessionName string) error { + ticker := time.NewTicker(downWaitPollInterval) + defer ticker.Stop() + selector := "okdev.io/managed=true,okdev.io/session=" + sessionName + for { + pods, err := k.ListPods(ctx, namespace, false, selector) + if err != nil { + if ctx.Err() != nil { + return downWaitContextError(ctx, "timed out waiting for session pods to terminate") + } + return fmt.Errorf("list session pods while waiting for deletion: %w", err) + } + if len(pods) == 0 { + return nil + } + select { + case <-ctx.Done(): + names := make([]string, 0, len(pods)) + for _, pod := range pods { + names = append(names, pod.Name) + } + return downWaitContextError(ctx, fmt.Sprintf("timed out waiting for session pods to terminate: %s", strings.Join(names, ", "))) + case <-ticker.C: + } + } +} + +func downWaitContextError(ctx context.Context, timeoutMessage string) error { + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + return errors.New(timeoutMessage) + } + if err := ctx.Err(); err != nil { + return err + } + return errors.New(timeoutMessage) +} + func confirmDown(in io.Reader, out io.Writer, sessionName, namespace, kind, workloadName string) (bool, error) { if !isTerminalReader(in) { return false, fmt.Errorf("refusing to delete without --yes in non-interactive mode") diff --git a/internal/cli/down_test.go b/internal/cli/down_test.go index b57d2b4f..4f6b4b81 100644 --- a/internal/cli/down_test.go +++ b/internal/cli/down_test.go @@ -8,6 +8,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" ) func TestNewDownCmdDeprecatesDeletePVC(t *testing.T) { @@ -32,6 +33,18 @@ func TestNewDownCmdHasYesFlag(t *testing.T) { } } +func TestNewDownCmdHasWaitFlags(t *testing.T) { + cmd := newDownCmd(&Options{}) + waitFlag := cmd.Flags().Lookup("wait") + if waitFlag == nil { + t.Fatal("expected wait flag") + } + waitTimeoutFlag := cmd.Flags().Lookup("wait-timeout") + if waitTimeoutFlag == nil { + t.Fatal("expected wait-timeout flag") + } +} + func TestPromptConfirmDownAccepts(t *testing.T) { for _, input := range []string{"y\n", "Y\n", "yes\n", "YES\n", " y \n"} { in := strings.NewReader(input) @@ -196,3 +209,144 @@ func TestNewDownCmdDryRunReportsMissingWorkload(t *testing.T) { t.Fatalf("expected absent-workload note, got %#v", payload) } } + +func TestNewDownCmdDryRunOutputsJSONWithWait(t *testing.T) { + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + switch r.URL.Path { + case "/api/v1/namespaces/demo/pods": + _, _ = io.WriteString(w, `{"kind":"PodList","apiVersion":"v1","items":[{"metadata":{"namespace":"demo","name":"okdev-sess-a","creationTimestamp":"2026-03-29T00:00:00Z","labels":{"okdev.io/session":"sess-a","okdev.io/owner":"alice","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":true}]}}]}`) + case "/api/v1/namespaces/demo/pods/okdev-sess-a": + _, _ = io.WriteString(w, `{"metadata":{"namespace":"demo","name":"okdev-sess-a","labels":{"okdev.io/session":"sess-a","okdev.io/owner":"alice","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":true}]}}`) + default: + http.NotFound(w, r) + } + })) + defer server.Close() + + t.Setenv("KUBECONFIG", writeCLITLSTestKubeconfig(t, server)) + cfgPath := writeCLIConfig(t, "demo") + opts := &Options{ConfigPath: cfgPath, Context: "dev", Session: "sess-a", Owner: "alice", Output: "json"} + cmd := newDownCmd(opts) + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"--dry-run", "--wait", "--wait-timeout", "2m"}) + + if err := cmd.Execute(); err != nil { + t.Fatalf("down execute: %v", err) + } + + var payload downOutput + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("json unmarshal: %v\n%s", err, out.String()) + } + if !payload.DryRun { + t.Fatalf("expected dry-run payload: %#v", payload) + } + if payload.Wait == nil || !payload.Wait.Enabled { + t.Fatalf("expected wait enabled in payload: %#v", payload) + } + if payload.Wait.Timeout != "2m0s" { + t.Fatalf("expected wait timeout 2m0s, got %#v", payload.Wait) + } +} + +func TestNewDownCmdWaitsForPodDeletion(t *testing.T) { + var getCount int + var listCount int + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/demo/pods": + listCount++ + if listCount == 1 { + _, _ = io.WriteString(w, `{"kind":"PodList","apiVersion":"v1","items":[{"metadata":{"namespace":"demo","name":"okdev-sess-a","creationTimestamp":"2026-03-29T00:00:00Z","labels":{"okdev.io/managed":"true","okdev.io/session":"sess-a","okdev.io/name":"demo","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":true}]}}]}`) + return + } + if listCount == 2 { + _, _ = io.WriteString(w, `{"kind":"PodList","apiVersion":"v1","items":[{"metadata":{"namespace":"demo","name":"okdev-sess-a","creationTimestamp":"2026-03-29T00:00:00Z","deletionTimestamp":"2026-03-29T00:01:00Z","labels":{"okdev.io/managed":"true","okdev.io/session":"sess-a","okdev.io/name":"demo","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":false}]}}]}`) + return + } + _, _ = io.WriteString(w, `{"kind":"PodList","apiVersion":"v1","items":[]}`) + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/demo/pods/okdev-sess-a": + getCount++ + if getCount <= 2 { + _, _ = io.WriteString(w, `{"kind":"Pod","apiVersion":"v1","metadata":{"namespace":"demo","name":"okdev-sess-a","labels":{"okdev.io/managed":"true","okdev.io/session":"sess-a","okdev.io/name":"demo","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":true}]}}`) + return + } + http.NotFound(w, r) + case r.Method == http.MethodDelete && r.URL.Path == "/api/v1/namespaces/demo/pods/okdev-sess-a": + w.WriteHeader(http.StatusOK) + _, _ = io.WriteString(w, `{"kind":"Status","apiVersion":"v1","status":"Success"}`) + default: + http.NotFound(w, r) + } + })) + defer server.Close() + + t.Setenv("KUBECONFIG", writeCLITLSTestKubeconfig(t, server)) + cfgPath := writeCLIConfig(t, "demo") + opts := &Options{ConfigPath: cfgPath, Context: "dev", Session: "sess-a", Owner: "alice", Output: "json"} + cmd := newDownCmd(opts) + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"--yes", "--wait", "--wait-timeout", "2s"}) + + if err := cmd.Execute(); err != nil { + t.Fatalf("down execute: %v", err) + } + + var payload downOutput + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("json unmarshal: %v\n%s", err, out.String()) + } + if payload.Wait == nil || !payload.Wait.Enabled || payload.Wait.Status != "completed" { + t.Fatalf("expected completed wait payload, got %#v", payload) + } + if !payload.Wait.WorkloadDeleted || !payload.Wait.PodsDeleted { + t.Fatalf("expected workload and pod deletion observed, got %#v", payload.Wait) + } +} + +func TestNewDownCmdWaitTimeoutReturnsError(t *testing.T) { + previousInterval := downWaitPollInterval + downWaitPollInterval = 5 * time.Millisecond + t.Cleanup(func() { + downWaitPollInterval = previousInterval + }) + + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/demo/pods": + _, _ = io.WriteString(w, `{"kind":"PodList","apiVersion":"v1","items":[{"metadata":{"namespace":"demo","name":"okdev-sess-a","creationTimestamp":"2026-03-29T00:00:00Z","deletionTimestamp":"2026-03-29T00:01:00Z","labels":{"okdev.io/managed":"true","okdev.io/session":"sess-a","okdev.io/name":"demo","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":false}]}}]}`) + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/demo/pods/okdev-sess-a": + _, _ = io.WriteString(w, `{"kind":"Pod","apiVersion":"v1","metadata":{"namespace":"demo","name":"okdev-sess-a","labels":{"okdev.io/managed":"true","okdev.io/session":"sess-a","okdev.io/name":"demo","okdev.io/workload-type":"pod"}},"status":{"phase":"Running","containerStatuses":[{"name":"dev","ready":false}]}}`) + case r.Method == http.MethodDelete && r.URL.Path == "/api/v1/namespaces/demo/pods/okdev-sess-a": + w.WriteHeader(http.StatusOK) + _, _ = io.WriteString(w, `{"kind":"Status","apiVersion":"v1","status":"Success"}`) + default: + http.NotFound(w, r) + } + })) + defer server.Close() + + t.Setenv("KUBECONFIG", writeCLITLSTestKubeconfig(t, server)) + cfgPath := writeCLIConfig(t, "demo") + opts := &Options{ConfigPath: cfgPath, Context: "dev", Session: "sess-a", Owner: "alice", Output: "json"} + cmd := newDownCmd(opts) + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"--yes", "--wait", "--wait-timeout", "20ms"}) + + err := cmd.Execute() + if err == nil { + t.Fatal("expected wait timeout error") + } + if !strings.Contains(err.Error(), "timed out") { + t.Fatalf("expected timeout error, got %v", err) + } +} diff --git a/internal/cli/timeouts.go b/internal/cli/timeouts.go index 97a1ab91..2a6cf612 100644 --- a/internal/cli/timeouts.go +++ b/internal/cli/timeouts.go @@ -20,6 +20,12 @@ const ( // Extra headroom added around up wait timeout when building the parent context. upContextBuffer = 2 * time.Minute + // Default --wait-timeout for `okdev down`. + downDefaultWaitTimeout = 2 * time.Minute + + // Extra headroom added around down wait timeout when building the parent context. + downContextBuffer = 30 * time.Second + // Timeout for session-access and ownership checks. sessionAccessTimeout = 15 * time.Second