diff --git a/README.md b/README.md index fc688a3..447297c 100644 --- a/README.md +++ b/README.md @@ -51,11 +51,13 @@ See [docs/why-init-container.md](docs/why-init-container.md) for the full ration # What the driver produces in the pod spec: initContainers: - name: supervisor-init - image: ghcr.io/nvidia/openshell-community/supervisor:latest - command: ["cp", "/usr/local/bin/openshell-sandbox", "/opt/openshell/bin/"] + image: ghcr.io/kagenti/openshell/supervisor:mvp-v2 + command: ["/openshell-sandbox", "copy-self", "/opt/openshell/bin/openshell-sandbox"] + securityContext: + runAsUser: 0 containers: - name: agent - command: ["/opt/openshell/bin/openshell-sandbox"] # supervisor runs first + command: ["/opt/openshell/bin/openshell-sandbox"] securityContext: runAsUser: 0 capabilities: @@ -124,8 +126,8 @@ openshell-gateway --compute-driver-socket /var/run/openshell-driver.sock |------|---------|---------| | `--socket` | `/var/run/openshell-driver.sock` | UDS path for gRPC | | `--namespace` | `openshell-system` | K8s namespace for sandboxes | -| `--supervisor-image` | `ghcr.io/nvidia/openshell-community/supervisor:latest` | Supervisor OCI image | -| `--supervisor-binary-path` | `/usr/local/bin/openshell-sandbox` | Binary path inside supervisor image | +| `--supervisor-image` | `ghcr.io/kagenti/openshell/supervisor:mvp-v2` | Supervisor OCI image | +| `--supervisor-binary-path` | `/openshell-sandbox` | Binary path inside supervisor image | | `--supervisor-mount-path` | `/opt/openshell/bin` | Mount point in agent container | ## Gateway dependency diff --git a/cmd/driver/main.go b/cmd/driver/main.go index 8fa37d2..5cf547b 100644 --- a/cmd/driver/main.go +++ b/cmd/driver/main.go @@ -30,8 +30,6 @@ func main() { "Container image that contains the supervisor binary") flag.StringVar(&cfg.SupervisorBinaryPath, "supervisor-binary-path", cfg.SupervisorBinaryPath, "Path to the supervisor binary inside the supervisor image") - flag.StringVar(&cfg.DtachBinaryPath, "dtach-binary-path", cfg.DtachBinaryPath, - "Path to the dtach binary inside the supervisor image") flag.StringVar(&cfg.SupervisorMountPath, "supervisor-mount-path", cfg.SupervisorMountPath, "Mount path for the supervisor binary volume in the agent container") flag.StringVar(&cfg.GatewayEndpoint, "gateway-endpoint", cfg.GatewayEndpoint, diff --git a/docs/why-init-container.md b/docs/why-init-container.md index d0b09e9..60e4536 100644 --- a/docs/why-init-container.md +++ b/docs/why-init-container.md @@ -26,13 +26,15 @@ Additionally, a DaemonSet to pre-stage the binary on every node requires its own ## Our approach -We use an init container that copies the supervisor binary from a container image into an emptyDir volume shared with the agent container: +We use an init container that copies the supervisor binary from a container image into an emptyDir volume shared with the agent container. The supervisor binary's built-in `copy-self` subcommand handles the copy without requiring a shell or coreutils in the image (it's a scratch/distroless image): ```yaml initContainers: - name: supervisor-init - image: ghcr.io/nvidia/openshell-community/supervisor:latest - command: ["cp", "/usr/local/bin/openshell-sandbox", "/opt/openshell/bin/"] + image: ghcr.io/kagenti/openshell/supervisor:mvp-v2 + command: ["/openshell-sandbox", "copy-self", "/opt/openshell/bin/openshell-sandbox"] + securityContext: + runAsUser: 0 volumeMounts: - name: supervisor-bin mountPath: /opt/openshell/bin @@ -56,7 +58,7 @@ volumes: |---|---|---| | SCC requirement | Needs hostPath access (custom or privileged SCC) | Works without hostPath | | Node pre-staging | Required (DaemonSet or baked into node image) | Not required | -| Cold start cost | None (binary already on node) | One `cp` command (~15MB, <1 second) | +| Cold start cost | None (binary already on node) | One `copy-self` invocation (~15MB, <1 second) | | Image pull | None (binary on node filesystem) | One pull per node (cached after first) | | Supervisor version | Tied to what's on the node | Tied to init container image tag | | BYOC compatibility | Works with any agent image | Works with any agent image | diff --git a/internal/driver/config.go b/internal/driver/config.go index e4714e0..0237e78 100644 --- a/internal/driver/config.go +++ b/internal/driver/config.go @@ -5,7 +5,6 @@ type Config struct { Tenant string // openshell.ai/tenant and kagenti.io/team label value; defaults to Namespace if empty SupervisorImage string SupervisorBinaryPath string - DtachBinaryPath string SupervisorMountPath string GatewayEndpoint string TLSCASecret string // Secret name containing ca.crt for gateway TLS verification @@ -17,8 +16,7 @@ func DefaultConfig() Config { return Config{ Namespace: "openshell-system", SupervisorImage: "quay.io/azaalouk/openshell-supervisor:latest", - SupervisorBinaryPath: "/usr/local/bin/openshell-sandbox", - DtachBinaryPath: "/usr/local/bin/dtach", + SupervisorBinaryPath: "/openshell-sandbox", SupervisorMountPath: "/opt/openshell/bin", } } diff --git a/internal/driver/config_test.go b/internal/driver/config_test.go index 5283603..7e6bd02 100644 --- a/internal/driver/config_test.go +++ b/internal/driver/config_test.go @@ -12,8 +12,7 @@ func TestDefaultConfig(t *testing.T) { }{ {"Namespace", cfg.Namespace, "openshell-system"}, {"SupervisorImage", cfg.SupervisorImage, "quay.io/azaalouk/openshell-supervisor:latest"}, - {"SupervisorBinaryPath", cfg.SupervisorBinaryPath, "/usr/local/bin/openshell-sandbox"}, - {"DtachBinaryPath", cfg.DtachBinaryPath, "/usr/local/bin/dtach"}, + {"SupervisorBinaryPath", cfg.SupervisorBinaryPath, "/openshell-sandbox"}, {"SupervisorMountPath", cfg.SupervisorMountPath, "/opt/openshell/bin"}, } diff --git a/internal/driver/provisioner.go b/internal/driver/provisioner.go index 30e8739..66d5473 100644 --- a/internal/driver/provisioner.go +++ b/internal/driver/provisioner.go @@ -231,15 +231,19 @@ func (p *K8sProvisioner) buildSandboxSpec(sb *pb.DriverSandbox) map[string]inter spec := sb.GetSpec() tmpl := spec.GetTemplate() - // Supervisor init container copies both the supervisor and dtach binaries into the shared volume. + // Supervisor init container uses copy-self to install the binary into the shared volume. + // This avoids requiring sh/cp in the supervisor image (which is scratch/distroless). + installedPath := p.cfg.SupervisorMountPath + "/openshell-sandbox" initContainer := map[string]interface{}{ "name": "supervisor-init", "image": p.cfg.SupervisorImage, "command": []interface{}{ - "sh", "-c", - fmt.Sprintf("cp %s %s/ && cp %s %s/", - p.cfg.SupervisorBinaryPath, p.cfg.SupervisorMountPath, - p.cfg.DtachBinaryPath, p.cfg.SupervisorMountPath), + p.cfg.SupervisorBinaryPath, + "copy-self", + installedPath, + }, + "securityContext": map[string]interface{}{ + "runAsUser": int64(0), }, "volumeMounts": []interface{}{ map[string]interface{}{ diff --git a/internal/driver/provisioner_test.go b/internal/driver/provisioner_test.go index d3691b2..cba00c1 100644 --- a/internal/driver/provisioner_test.go +++ b/internal/driver/provisioner_test.go @@ -4,7 +4,6 @@ import ( "context" "log/slog" "os" - "strings" "testing" pb "github.com/zanetworker/openshell-driver-openshift/gen/computev1" @@ -186,17 +185,22 @@ func TestBuildSandboxSpec_SupervisorInitContainer(t *testing.T) { t.Errorf("expected image %s, got %v", cfg.SupervisorImage, initC["image"]) } - // Verify command copies both supervisor and dtach binaries via sh -c. + // Verify command uses copy-self (no shell required — works with scratch images). cmd := initC["command"].([]interface{}) - if len(cmd) != 3 || cmd[0] != "sh" || cmd[1] != "-c" { - t.Errorf("expected sh -c command, got %v", cmd) + expectedInitCmd := []string{cfg.SupervisorBinaryPath, "copy-self", cfg.SupervisorMountPath + "/openshell-sandbox"} + if len(cmd) != 3 { + t.Fatalf("expected 3-element command [binary, copy-self, dest], got %v", cmd) } - script := cmd[2].(string) - if !strings.Contains(script, cfg.SupervisorBinaryPath) { - t.Errorf("expected script to contain supervisor path %s, got %s", cfg.SupervisorBinaryPath, script) + for i, want := range expectedInitCmd { + if cmd[i] != want { + t.Errorf("command[%d] = %v, want %s", i, cmd[i], want) + } } - if !strings.Contains(script, cfg.DtachBinaryPath) { - t.Errorf("expected script to contain dtach path %s, got %s", cfg.DtachBinaryPath, script) + + // Verify init container has runAsUser: 0. + initSecCtx := initC["securityContext"].(map[string]interface{}) + if initSecCtx["runAsUser"] != int64(0) { + t.Errorf("expected init container runAsUser 0, got %v", initSecCtx["runAsUser"]) } // Verify agent container runs supervisor.