From 06bac3617d0abe6d7c30d17beadd4048faa55c79 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 7 Apr 2026 16:17:17 -0500 Subject: [PATCH 1/4] fix(bootstrap): load kernel modules on install and fix Podman socket detection RPM spec: - Add br_netfilter to modules-load.d config for K3s bridge netfilter - Ship sysctl.d/99-openshell.conf with net.bridge.bridge-nf-call-iptables - Add %post scriptlet to modprobe modules immediately (no reboot required) - Add Recommends: podman-docker as belt-and-suspenders for socket compat Podman socket detection: - Add connect_local_auto() helper in docker.rs for auto-detecting runtime - Replace all 7 Docker::connect_with_local_defaults() calls outside docker.rs with runtime-aware alternatives (connect_local, connect_local_auto, or metadata-based lookup with fallback) - Remove unused bollard::Docker import from build.rs --- crates/openshell-bootstrap/src/build.rs | 11 ++++------- crates/openshell-bootstrap/src/docker.rs | 10 ++++++++++ crates/openshell-bootstrap/src/lib.rs | 20 +++++++++++++++----- openshell.spec | 22 ++++++++++++++++++++++ 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index eaa221311..19f3ea354 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -10,7 +10,6 @@ use std::collections::HashMap; use std::path::Path; -use bollard::Docker; use bollard::query_parameters::BuildImageOptionsBuilder; use futures::StreamExt; use miette::{IntoDiagnostic, Result, WrapErr}; @@ -46,9 +45,8 @@ pub async fn build_and_push_image( on_log(format!( "Pushing image {tag} into gateway \"{gateway_name}\"" )); - let local_docker = Docker::connect_with_local_defaults() - .into_diagnostic() - .wrap_err("failed to connect to local Docker daemon")?; + let local_docker = crate::docker::connect_local_auto() + .wrap_err("failed to connect to local container runtime")?; let container = container_name(gateway_name); let images: Vec<&str> = vec![tag]; push_local_images(&local_docker, &local_docker, &container, &images, on_log).await?; @@ -68,9 +66,8 @@ async fn build_image( build_args: &HashMap, on_log: &mut impl FnMut(String), ) -> Result<()> { - let docker = Docker::connect_with_local_defaults() - .into_diagnostic() - .wrap_err("failed to connect to local Docker daemon")?; + let docker = crate::docker::connect_local_auto() + .wrap_err("failed to connect to local container runtime")?; // Compute the relative path of the Dockerfile within the context. let dockerfile_relative = dockerfile_path diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index ff29bbd53..fd67e50c5 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -263,6 +263,16 @@ pub(crate) fn connect_local(runtime: ContainerRuntime) -> Result { } } +/// Connect to the local container runtime with auto-detection. +/// +/// This is a convenience wrapper for code paths that need a Docker client +/// but don't have a `ContainerRuntime` value available. It auto-detects +/// the runtime (Podman preferred) and connects via `connect_local`. +pub(crate) fn connect_local_auto() -> Result { + let runtime = crate::container_runtime::detect_runtime(None)?; + connect_local(runtime) +} + /// Build a rich, user-friendly error when a container runtime is not reachable. fn runtime_not_reachable_error( runtime: ContainerRuntime, diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 3eb78ab8d..bfe2e0242 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -544,7 +544,7 @@ where .collect(); if !images.is_empty() { log("[status] Deploying components".to_string()); - let local_docker = Docker::connect_with_local_defaults().into_diagnostic()?; + let local_docker = docker::connect_local(runtime)?; let container = container_name(&name); let on_log_ref = Arc::clone(&on_log); let mut push_log = move |msg: String| { @@ -669,7 +669,12 @@ pub async fn extract_and_store_pki( ) -> Result<()> { let docker = match remote { Some(r) => create_ssh_docker_client(r).await?, - None => Docker::connect_with_local_defaults().into_diagnostic()?, + None => { + let runtime = get_gateway_metadata(name) + .map(|m| m.container_runtime) + .unwrap_or_else(|| detect_runtime(None).unwrap_or(ContainerRuntime::Docker)); + docker::connect_local(runtime)? + } }; let cname = docker::find_gateway_container(&docker, port).await?; let bundle = load_existing_pki_bundle(&docker, &cname, constants::KUBECONFIG_PATH) @@ -684,7 +689,7 @@ pub async fn ensure_gateway_image( registry_username: Option<&str>, registry_token: Option<&str>, ) -> Result { - let docker = Docker::connect_with_local_defaults().into_diagnostic()?; + let docker = docker::connect_local_auto()?; let image_ref = format!("{}:{version}", image::DEFAULT_GATEWAY_IMAGE); ensure_image(&docker, &image_ref, registry_username, registry_token).await?; Ok(image_ref) @@ -712,7 +717,12 @@ pub async fn gateway_container_logs( let docker = match remote { Some(remote_opts) => create_ssh_docker_client(remote_opts).await?, - None => Docker::connect_with_local_defaults().into_diagnostic()?, + None => { + let runtime = get_gateway_metadata(name) + .map(|m| m.container_runtime) + .unwrap_or_else(|| detect_runtime(None).unwrap_or(ContainerRuntime::Docker)); + docker::connect_local(runtime)? + } }; let container = container_name(name); @@ -765,7 +775,7 @@ pub async fn gateway_container_logs( /// Returns an empty string on any Docker/connection error so callers don't /// need to worry about error handling. pub async fn fetch_gateway_logs(name: &str, n: usize) -> String { - let docker = match Docker::connect_with_local_defaults() { + let docker = match docker::connect_local_auto() { Ok(d) => d, Err(_) => return String::new(), }; diff --git a/openshell.spec b/openshell.spec index e128b5683..d61e37a53 100644 --- a/openshell.spec +++ b/openshell.spec @@ -39,6 +39,10 @@ BuildRequires: python3-devel # Runtime: container runtime for gateway lifecycle (start/stop/destroy). # Podman is preferred; Docker is also supported via --container-runtime flag. Recommends: podman +# When Podman is the container runtime, podman-docker provides the +# /var/run/docker.sock symlink and `docker` CLI alias that third-party +# libraries (e.g., bollard) expect. +Recommends: podman-docker %description OpenShell provides safe, sandboxed runtimes for autonomous AI agents. @@ -107,6 +111,17 @@ ip_tables iptable_nat iptable_filter iptable_mangle +br_netfilter +EOF + +# Install sysctl.d config for bridge netfilter settings required by K3s. +install -d %{buildroot}%{_sysctldir} +cat > %{buildroot}%{_sysctldir}/99-%{name}.conf << 'EOF' +# Enable bridge netfilter call chains for K3s pod-to-service networking. +# Required after br_netfilter is loaded so kube-proxy DNAT rules apply +# to bridged pod traffic. +net.bridge.bridge-nf-call-iptables = 1 +net.bridge.bridge-nf-call-ip6tables = 1 EOF # Install Python SDK modules (test files are intentionally excluded) @@ -138,6 +153,12 @@ echo "rpm" > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/INSTALL # RECORD can be empty for RPM-managed installs touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD +%post +# Load kernel modules immediately so a reboot is not required after +# initial installation. The modules-load.d config handles subsequent boots. +modprobe -a ip_tables iptable_nat iptable_filter iptable_mangle br_netfilter > /dev/null 2>&1 || : +%sysctl_apply 99-%{name}.conf + %check # Smoke-test the CLI binary %{buildroot}%{_bindir}/%{name} --version @@ -153,6 +174,7 @@ PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata %doc README.md %{_bindir}/%{name} %{_modulesloaddir}/%{name}.conf +%{_sysctldir}/99-%{name}.conf %files -n python3-%{name} %license LICENSE From 78067b139a8cbda779d97d7ec8bcfb7b814b7b24 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 7 Apr 2026 16:32:54 -0500 Subject: [PATCH 2/4] refactor(bootstrap): deduplicate runtime resolution for gateway connections Add connect_for_gateway(name) helper that resolves the container runtime from stored gateway metadata first, falling back to detect_runtime() with full error propagation instead of silently defaulting to Docker. Replace the duplicated inline metadata-detect-fallback blocks in extract_and_store_pki and gateway_container_logs with the new helper. --- crates/openshell-bootstrap/src/docker.rs | 17 +++++++++++++++++ crates/openshell-bootstrap/src/lib.rs | 14 ++------------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index fd67e50c5..02c9a27ad 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -273,6 +273,23 @@ pub(crate) fn connect_local_auto() -> Result { connect_local(runtime) } +/// Connect to the local container runtime for an existing gateway. +/// +/// Resolution order: +/// 1. Stored runtime from gateway metadata (if metadata exists) +/// 2. Auto-detect runtime via `detect_runtime` (propagates error on failure) +/// +/// This is used by code paths that have a gateway `name` but no `runtime` +/// in scope. Unlike `connect_local_auto()`, this checks metadata first so +/// that gateways deployed with a specific runtime reconnect to the same one. +pub(crate) fn connect_for_gateway(name: &str) -> Result { + let runtime = match crate::metadata::get_gateway_metadata(name) { + Some(m) => m.container_runtime, + None => crate::container_runtime::detect_runtime(None)?, + }; + connect_local(runtime) +} + /// Build a rich, user-friendly error when a container runtime is not reachable. fn runtime_not_reachable_error( runtime: ContainerRuntime, diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index bfe2e0242..e7290c040 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -669,12 +669,7 @@ pub async fn extract_and_store_pki( ) -> Result<()> { let docker = match remote { Some(r) => create_ssh_docker_client(r).await?, - None => { - let runtime = get_gateway_metadata(name) - .map(|m| m.container_runtime) - .unwrap_or_else(|| detect_runtime(None).unwrap_or(ContainerRuntime::Docker)); - docker::connect_local(runtime)? - } + None => docker::connect_for_gateway(name)?, }; let cname = docker::find_gateway_container(&docker, port).await?; let bundle = load_existing_pki_bundle(&docker, &cname, constants::KUBECONFIG_PATH) @@ -717,12 +712,7 @@ pub async fn gateway_container_logs( let docker = match remote { Some(remote_opts) => create_ssh_docker_client(remote_opts).await?, - None => { - let runtime = get_gateway_metadata(name) - .map(|m| m.container_runtime) - .unwrap_or_else(|| detect_runtime(None).unwrap_or(ContainerRuntime::Docker)); - docker::connect_local(runtime)? - } + None => docker::connect_for_gateway(name)?, }; let container = container_name(name); From aac57d08caafde7f85d52db6b2ff3de34d866753 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 7 Apr 2026 17:25:45 -0500 Subject: [PATCH 3/4] feat(bootstrap): use native nftables under Podman, drop legacy iptables modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running under Podman, the k3s cluster now uses: - Native nftables kube-proxy mode (--kube-proxy-arg=proxy-mode=nftables) - Host DNS resolution instead of iptables DNAT proxy (Podman DNS is routable) - Skipped iptables backend probe (unnecessary with nftables kube-proxy) This eliminates the need for legacy iptables kernel modules (ip_tables, iptable_nat, iptable_filter, iptable_mangle) on the host when using Podman. The Docker path is completely unchanged — all new behavior is gated on CONTAINER_RUNTIME=podman. Container image: add nftables package (provides nft binary for kube-proxy). RPM spec: modules-load.d now only loads br_netfilter (still required for bridged pod traffic regardless of iptables/nftables). Remove podman-docker recommends (no longer needed with native Podman socket detection and nftables networking). --- crates/openshell-bootstrap/src/docker.rs | 5 ++ deploy/docker/Dockerfile.images | 1 + deploy/docker/cluster-entrypoint.sh | 97 +++++++++++++++--------- openshell.spec | 31 ++++---- 4 files changed, 81 insertions(+), 53 deletions(-) diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index 02c9a27ad..bccf60de5 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -878,6 +878,11 @@ pub async fn ensure_container( env_vars.push("GPU_ENABLED=true".to_string()); } + // Pass the container runtime to the entrypoint so it can select the + // appropriate networking stack (nftables kube-proxy for Podman, iptables + // DNS proxy for Docker, etc.). + env_vars.push(format!("CONTAINER_RUNTIME={}", runtime.binary_name())); + let env = Some(env_vars); // Set the health check explicitly on the container config so it works diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index 837f4fb9c..05149765b 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -233,6 +233,7 @@ RUN dnf install -y fedora-repos && \ dnf install -y \ ca-certificates \ iptables \ + nftables \ util-linux \ bind-utils \ && dnf clean all diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 14f661ba3..769a2945e 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -34,22 +34,13 @@ yaml_quote() { printf "'%s'" "$(printf '%s' "$1" | sed "s/'/''/g")" } -# --------------------------------------------------------------------------- -# Select iptables backend -# --------------------------------------------------------------------------- -# Some kernels (e.g. Jetson Linux 5.15-tegra) have the nf_tables subsystem -# but lack the nft_compat bridge that allows flannel and kube-proxy to use -# xt extension modules (xt_comment, xt_conntrack). Detect this by probing -# whether xt_comment is usable via the current iptables backend. If the -# probe fails, switch to iptables-legacy. Set USE_IPTABLES_LEGACY=1 -# externally to skip the probe and force the legacy backend. # --------------------------------------------------------------------------- # Check br_netfilter kernel module # --------------------------------------------------------------------------- # br_netfilter makes the kernel pass bridge (pod-to-pod) traffic through -# iptables. Without it, kube-proxy's DNAT rules for ClusterIP services are -# never applied to pod traffic, so pods cannot reach services such as -# kube-dns (10.43.0.10), breaking all in-cluster DNS resolution. +# netfilter (iptables or nftables). Without it, kube-proxy's DNAT rules for +# ClusterIP services are never applied to pod traffic, so pods cannot reach +# services such as kube-dns (10.43.0.10), breaking all in-cluster DNS. # # The module must be loaded on the HOST before the container starts — # containers cannot load kernel modules themselves. If it is missing, log a @@ -65,25 +56,37 @@ if [ ! -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then echo " echo br_netfilter | sudo tee /etc/modules-load.d/br_netfilter.conf" >&2 fi -if [ -z "${USE_IPTABLES_LEGACY:-}" ]; then - if iptables -t filter -N _xt_probe 2>/dev/null; then - _probe_rc=0 - iptables -t filter -A _xt_probe -m comment --comment "probe" -j ACCEPT \ - 2>/dev/null || _probe_rc=$? - iptables -t filter -D _xt_probe -m comment --comment "probe" -j ACCEPT \ - 2>/dev/null || true - iptables -t filter -X _xt_probe 2>/dev/null || true - [ "$_probe_rc" -ne 0 ] && USE_IPTABLES_LEGACY=1 +# --------------------------------------------------------------------------- +# Select iptables backend (Docker only) +# --------------------------------------------------------------------------- +# Under Podman with nftables kube-proxy mode, the iptables backend probe is +# unnecessary — kube-proxy uses nft directly. Flannel still uses the iptables +# binary but through the nft compat shim which doesn't need the xt probe. +# +# Under Docker (or unset runtime), probe whether xt_comment is usable. Some +# kernels (e.g. Jetson Linux 5.15-tegra) have nf_tables but lack the +# nft_compat bridge. If the probe fails, switch to iptables-legacy. +if [ "${CONTAINER_RUNTIME:-}" != "podman" ]; then + if [ -z "${USE_IPTABLES_LEGACY:-}" ]; then + if iptables -t filter -N _xt_probe 2>/dev/null; then + _probe_rc=0 + iptables -t filter -A _xt_probe -m comment --comment "probe" -j ACCEPT \ + 2>/dev/null || _probe_rc=$? + iptables -t filter -D _xt_probe -m comment --comment "probe" -j ACCEPT \ + 2>/dev/null || true + iptables -t filter -X _xt_probe 2>/dev/null || true + [ "$_probe_rc" -ne 0 ] && USE_IPTABLES_LEGACY=1 + fi fi -fi -if [ "${USE_IPTABLES_LEGACY:-0}" = "1" ]; then - echo "iptables nf_tables xt extension bridge unavailable — switching to iptables-legacy" - if update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && - update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then - echo "Now using iptables-legacy mode" - else - echo "Warning: could not switch to iptables-legacy — cluster networking may fail" + if [ "${USE_IPTABLES_LEGACY:-0}" = "1" ]; then + echo "iptables nf_tables xt extension bridge unavailable — switching to iptables-legacy" + if update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && + update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then + echo "Now using iptables-legacy mode" + else + echo "Warning: could not switch to iptables-legacy — cluster networking may fail" + fi fi fi @@ -174,13 +177,20 @@ setup_dns_proxy() { echo "Configured k3s DNS to use ${CONTAINER_IP} (proxied to Docker DNS)" } -if ! setup_dns_proxy; then - echo "DNS proxy setup failed, falling back to public DNS servers" - echo "Note: this may not work on Docker Desktop (Mac/Windows)" - cat >"$RESOLV_CONF" <"$RESOLV_CONF" < %{buildroot}%{_modulesloaddir}/%{name}.conf << 'EOF' -# Load legacy iptables kernel modules required by k3s flannel CNI. -# Modern kernels use nf_tables by default; these modules provide the -# legacy iptables interface that k3s's bundled iptables-legacy needs. -ip_tables -iptable_nat -iptable_filter -iptable_mangle +# Load br_netfilter for K3s bridge networking. +# Required so kube-proxy DNAT rules (iptables or nftables) apply to +# bridged pod-to-pod traffic for ClusterIP service resolution. br_netfilter EOF @@ -154,9 +149,9 @@ echo "rpm" > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/INSTALL touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD %post -# Load kernel modules immediately so a reboot is not required after -# initial installation. The modules-load.d config handles subsequent boots. -modprobe -a ip_tables iptable_nat iptable_filter iptable_mangle br_netfilter > /dev/null 2>&1 || : +# Load br_netfilter immediately so a reboot is not required after install. +# The modules-load.d config handles subsequent boots. +modprobe br_netfilter > /dev/null 2>&1 || : %sysctl_apply 99-%{name}.conf %check From 56201712bc5e91759fc0f1a3441cbbf1abf61a2c Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 7 Apr 2026 17:58:26 -0500 Subject: [PATCH 4/4] ci: tag container images with dev alongside midstream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add :dev tag to both gateway and cluster multi-arch manifests in the midstream container build workflow. Local cargo builds default to the dev tag (OPENSHELL_IMAGE_TAG is unset), so this ensures locally-built CLI binaries can pull images from GHCR without needing to override the tag. The dev and midstream tags are kept in sync — both point to the same image built from the midstream branch on every merge. --- .github/workflows/midstream-container-build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/midstream-container-build.yml b/.github/workflows/midstream-container-build.yml index a68a82754..bfd6b2f83 100644 --- a/.github/workflows/midstream-container-build.yml +++ b/.github/workflows/midstream-container-build.yml @@ -135,6 +135,7 @@ jobs: docker buildx imagetools create \ -t "${{ env.IMAGE_REGISTRY }}/gateway:${{ github.sha }}" \ -t "${{ env.IMAGE_REGISTRY }}/gateway:midstream" \ + -t "${{ env.IMAGE_REGISTRY }}/gateway:dev" \ "${{ env.IMAGE_REGISTRY }}/gateway:${{ github.sha }}-amd64" \ "${{ env.IMAGE_REGISTRY }}/gateway:${{ github.sha }}-arm64" @@ -159,5 +160,6 @@ jobs: docker buildx imagetools create \ -t "${{ env.IMAGE_REGISTRY }}/cluster:${{ github.sha }}" \ -t "${{ env.IMAGE_REGISTRY }}/cluster:midstream" \ + -t "${{ env.IMAGE_REGISTRY }}/cluster:dev" \ "${{ env.IMAGE_REGISTRY }}/cluster:${{ github.sha }}-amd64" \ "${{ env.IMAGE_REGISTRY }}/cluster:${{ github.sha }}-arm64"