acmore · acmore · Apr 14, 2026 · Apr 14, 2026
diff --git a/internal/cli/cp.go b/internal/cli/cp.go
@@ -42,6 +42,7 @@ func newCpCmd(opts *Options) *cobra.Command {
 	var exclude []string
 	var container string
 	var fanout int
+	var readyOnly bool
 
 	cmd := &cobra.Command{
 		Use:   "cp [session] <src> <dst>",
@@ -97,7 +98,7 @@ func newCpCmd(opts *Options) *cobra.Command {
 			}
 
 			if multiPod {
-				return runMultiPodCp(cmd, cc, localPath, remotePath, upload, allPods, podNames, role, labels, exclude, container, fanout)
+				return runMultiPodCp(cmd, cc, localPath, remotePath, upload, allPods, podNames, role, labels, exclude, container, fanout, readyOnly)
 			}
 
 			// Single-pod mode.
@@ -120,6 +121,7 @@ func newCpCmd(opts *Options) *cobra.Command {
 	cmd.Flags().StringSliceVar(&exclude, "exclude", nil, "Exclude specific pods (repeatable/comma-separated)")
 	cmd.Flags().StringVar(&container, "container", "", "Override target container")
 	cmd.Flags().IntVar(&fanout, "fanout", pdshDefaultFanout, "Maximum concurrent pod transfers")
+	cmd.Flags().BoolVar(&readyOnly, "ready-only", false, "Copy only to/from pods that are already running (skip readiness check)")
 	return cmd
 }
 
@@ -188,30 +190,54 @@ type cpResult struct {
 	err error
 }
 
-func runMultiPodCp(cmd *cobra.Command, cc *commandContext, localPath, remotePath string, upload bool, allPods bool, podNames []string, role string, labels []string, exclude []string, container string, fanout int) error {
+func runMultiPodCp(cmd *cobra.Command, cc *commandContext, localPath, remotePath string, upload bool, allPods bool, podNames []string, role string, labels []string, exclude []string, container string, fanout int, readyOnly bool) error {
 	ctx := cmd.Context()
 	labelSel := selectorForSessionRun(cc.sessionName)
 	sessionPods, err := cc.kube.ListPods(ctx, cc.namespace, false, labelSel)
 	if err != nil {
 		return fmt.Errorf("list session pods: %w", err)
 	}
-	pods := filterRunningPods(sessionPods)
 
+	// Apply user-specified filters before the readiness check so the
+	// running-vs-total comparison only considers targeted pods.
+	filteredPods := sessionPods
 	switch {
 	case allPods:
 		// keep all
 	case len(podNames) > 0:
-		pods = filterPodsByName(pods, podNames)
+		filteredPods = filterPodsByName(filteredPods, podNames)
 	case role != "":
-		pods = filterPodsByRole(pods, role)
+		filteredPods = filterPodsByRole(filteredPods, role)
 	case len(labels) > 0:
-		pods = filterPodsByLabels(pods, labels)
+		filteredPods = filterPodsByLabels(filteredPods, labels)
 	}
 	if len(exclude) > 0 {
-		pods = excludePods(pods, exclude)
+		filteredPods = excludePods(filteredPods, exclude)
 	}
+
+	if len(filteredPods) == 0 {
+		return fmt.Errorf("no pods match the specified filters in session %q", cc.sessionName)
+	}
+
+	pods := filterRunningPods(filteredPods)
+
 	if len(pods) == 0 {
-		return fmt.Errorf("no running pods match the specified filters in session %q", cc.sessionName)
+		return fmt.Errorf("no running pods in session %q (0/%d pods ready)", cc.sessionName, len(filteredPods))
+	}
+
+	if len(pods) < len(filteredPods) && !readyOnly {
+		notReady := make([]string, 0, len(filteredPods)-len(pods))
+		runningSet := make(map[string]bool, len(pods))
+		for _, p := range pods {
+			runningSet[p.Name] = true
+		}
+		for _, p := range filteredPods {
+			if !runningSet[p.Name] {
+				notReady = append(notReady, fmt.Sprintf("%s (%s)", p.Name, p.Phase))
+			}
+		}
+		return fmt.Errorf("%d/%d pods are not running: %s\nUse --ready-only to copy on the %d ready pods",
+			len(notReady), len(filteredPods), strings.Join(notReady, ", "), len(pods))
 	}
 
 	targetContainer := container

diff --git a/internal/cli/cp_test.go b/internal/cli/cp_test.go
@@ -1,8 +1,11 @@
 package cli
 
 import (
+	"fmt"
 	"strings"
 	"testing"
+
+	"github.com/acmore/okdev/internal/kube"
 )
 
 func TestParseCpArgs(t *testing.T) {
@@ -95,3 +98,135 @@ func TestMultiPodDownloadPath(t *testing.T) {
 		t.Fatalf("directory download path = %q", got)
 	}
 }
+
+func TestCpReadinessCheckErrorsWhenPodsNotRunning(t *testing.T) {
+	allPods := []kube.PodSummary{
+		{Name: "sess-master-0", Phase: "Running", Labels: map[string]string{"okdev.io/workload-role": "Master"}},
+		{Name: "sess-worker-0", Phase: "Running", Labels: map[string]string{"okdev.io/workload-role": "Worker"}},
+		{Name: "sess-worker-1", Phase: "Pending", Labels: map[string]string{"okdev.io/workload-role": "Worker"}},
+		{Name: "sess-worker-2", Phase: "ContainerCreating", Labels: map[string]string{"okdev.io/workload-role": "Worker"}},
+	}
+
+	// Simulate the filter-then-readiness pipeline used by runMultiPodCp --all.
+	running := filterRunningPods(allPods)
+	if len(running) != 2 {
+		t.Fatalf("expected 2 running pods, got %d", len(running))
+	}
+	if len(running) == len(allPods) {
+		t.Fatal("expected some pods to be filtered out")
+	}
+
+	// Build the error message the same way cp.go does.
+	notReady := make([]string, 0)
+	runningSet := make(map[string]bool)
+	for _, p := range running {
+		runningSet[p.Name] = true
+	}
+	for _, p := range allPods {
+		if !runningSet[p.Name] {
+			notReady = append(notReady, fmt.Sprintf("%s (%s)", p.Name, p.Phase))
+		}
+	}
+	if len(notReady) != 2 {
+		t.Fatalf("expected 2 not-ready pods, got %d: %v", len(notReady), notReady)
+	}
+
+	errMsg := fmt.Sprintf("%d/%d pods are not running: %s\nUse --ready-only to copy on the %d ready pods",
+		len(notReady), len(allPods), strings.Join(notReady, ", "), len(running))
+	if !strings.Contains(errMsg, "2/4 pods are not running") {
+		t.Fatalf("unexpected error message: %s", errMsg)
+	}
+	if !strings.Contains(errMsg, "sess-worker-1 (Pending)") {
+		t.Fatalf("expected Pending pod in error: %s", errMsg)
+	}
+	if !strings.Contains(errMsg, "sess-worker-2 (ContainerCreating)") {
+		t.Fatalf("expected ContainerCreating pod in error: %s", errMsg)
+	}
+	if !strings.Contains(errMsg, "--ready-only") {
+		t.Fatalf("expected --ready-only hint: %s", errMsg)
+	}
+}
+
+func TestCpReadinessCheckWithRoleFilter(t *testing.T) {
+	allPods := []kube.PodSummary{
+		{Name: "sess-master-0", Phase: "Running", Labels: map[string]string{"okdev.io/workload-role": "Master"}},
+		{Name: "sess-worker-0", Phase: "Running", Labels: map[string]string{"okdev.io/workload-role": "Worker"}},
+		{Name: "sess-worker-1", Phase: "Pending", Labels: map[string]string{"okdev.io/workload-role": "Worker"}},
+	}
+
+	// Filter by role first (as runMultiPodCp does), then check readiness.
+	filtered := filterPodsByRole(allPods, "worker")
+	if len(filtered) != 2 {
+		t.Fatalf("expected 2 workers, got %d", len(filtered))
+	}
+
+	running := filterRunningPods(filtered)
+	if len(running) != 1 {
+		t.Fatalf("expected 1 running worker, got %d", len(running))
+	}
+
+	// The denominator should be 2 (filtered workers), not 3 (all pods).
+	notReady := len(filtered) - len(running)
+	errMsg := fmt.Sprintf("%d/%d pods are not running", notReady, len(filtered))
+	if !strings.Contains(errMsg, "1/2 pods are not running") {
+		t.Fatalf("readiness check should scope to filtered pods: %s", errMsg)
+	}
+}
+
+func TestCpReadinessCheckAllPodsRunning(t *testing.T) {
+	allPods := []kube.PodSummary{
+		{Name: "sess-worker-0", Phase: "Running"},
+		{Name: "sess-worker-1", Phase: "Running"},
+		{Name: "sess-worker-2", Phase: "Running"},
+	}
+
+	running := filterRunningPods(allPods)
+	if len(running) != len(allPods) {
+		t.Fatalf("all pods should be running, got %d/%d", len(running), len(allPods))
+	}
+	// No error should be produced — readyOnly doesn't matter when all pods are running.
+}
+
+func TestCpReadinessCheckNoneRunning(t *testing.T) {
+	allPods := []kube.PodSummary{
+		{Name: "sess-worker-0", Phase: "Pending"},
+		{Name: "sess-worker-1", Phase: "ContainerCreating"},
+	}
+
+	running := filterRunningPods(allPods)
+	if len(running) != 0 {
+		t.Fatalf("expected 0 running pods, got %d", len(running))
+	}
+
+	errMsg := fmt.Sprintf("no running pods in session %q (0/%d pods ready)", "test-session", len(allPods))
+	if !strings.Contains(errMsg, "0/2 pods ready") {
+		t.Fatalf("unexpected zero-running error: %s", errMsg)
+	}
+}
+
+func TestCpReadinessCheckReadyOnlyBypass(t *testing.T) {
+	allPods := []kube.PodSummary{
+		{Name: "sess-worker-0", Phase: "Running"},
+		{Name: "sess-worker-1", Phase: "Pending"},
+		{Name: "sess-worker-2", Phase: "Running"},
+	}
+
+	running := filterRunningPods(allPods)
+	if len(running) != 2 {
+		t.Fatalf("expected 2 running pods, got %d", len(running))
+	}
+
+	// With readyOnly=true, we proceed with running pods despite the gap.
+	readyOnly := true
+	shouldError := len(running) < len(allPods) && !readyOnly
+	if shouldError {
+		t.Fatal("readyOnly=true should bypass the error")
+	}
+
+	// With readyOnly=false, the gap triggers an error.
+	readyOnly = false
+	shouldError = len(running) < len(allPods) && !readyOnly
+	if !shouldError {
+		t.Fatal("readyOnly=false should trigger an error when pods are not running")
+	}
+}