From 32f46b7769d234a83eeab9ca41e62c1a7f24f09b Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 7 Apr 2026 20:57:34 -0500 Subject: [PATCH] fix(bootstrap): fix nftables healthcheck and warn on missing flannel modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flannel's embedded traffic manager in k3s v1.35.x is compiled without the nft backend — it only has iptables-legacy support, which requires kernel modules (ip_tables, iptable_nat, iptable_filter, iptable_mangle) that modern distributions (Fedora 43+, RHEL 10+) no longer load by default. Changes: - cluster-entrypoint.sh: When running under Podman, check whether the iptable_nat module is loaded and emit an actionable warning if not. The modules are expected to be loaded at boot via modules-load.d (installed by the RPM spec); the warning covers the case where the host hasn't rebooted since installation. - cluster-healthcheck.sh: Replace the hardcoded 127.0.0.1 NodePort check with the node's actual InternalIP. When kube-proxy runs in nftables mode, NodePort DNAT rules only match the node's real IP addresses — loopback is not in the nftables nodeport-ips set, so the old check always failed. Tested on Fedora 43 (kernel 6.19, Podman 5.8.1) with the full lifecycle: gateway start, provider create/list/delete, sandbox create/exec/delete. --- deploy/docker/cluster-entrypoint.sh | 26 ++++++++++++++++++++++++-- deploy/docker/cluster-healthcheck.sh | 18 ++++++++++++++---- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 769a2945e..930680317 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -675,12 +675,34 @@ fi # Select kube-proxy mode # --------------------------------------------------------------------------- # Under Podman, use native nftables kube-proxy mode so no legacy iptables -# kernel modules (ip_tables, iptable_nat, etc.) are required on the host. -# Docker retains the default iptables mode for maximum compatibility. +# kernel modules are needed for kube-proxy service routing. +# +# Flannel's embedded traffic manager in k3s v1.35.x still uses the iptables +# binary (no nft backend compiled in). The iptables binary inside the +# container is iptables-legacy, which requires the iptable_nat, iptable_filter, +# and ip_tables kernel modules. Modern distributions (Fedora 43+, RHEL 10+) +# no longer load these modules by default. The RPM %post scriptlet both +# loads the modules immediately and installs a modules-load.d config for +# persistence across reboots. The warning below covers non-RPM installs. +# +# Docker retains the default iptables kube-proxy mode for maximum compatibility. EXTRA_KUBE_PROXY_ARGS="" if [ "${CONTAINER_RUNTIME:-}" = "podman" ]; then echo "Podman detected — using nftables kube-proxy mode" EXTRA_KUBE_PROXY_ARGS="--kube-proxy-arg=proxy-mode=nftables" + + # Verify legacy iptables kernel modules are loaded on the host. + # Flannel's traffic manager calls iptables-legacy for masquerade rules, + # which requires iptable_nat and related modules. The RPM loads these + # at install time and persists them via modules-load.d, but they may be + # absent on non-RPM installs or manually configured systems. + if ! cat /proc/modules 2>/dev/null | grep -q '^iptable_nat '; then + echo "Warning: iptable_nat kernel module is not loaded on the host." >&2 + echo " Flannel masquerade rules will fail without it." >&2 + echo " Load it now with: sudo modprobe iptable_nat" >&2 + echo " To persist across reboots:" >&2 + echo " echo iptable_nat | sudo tee /etc/modules-load.d/openshell-flannel.conf" >&2 + fi fi # Execute k3s with explicit resolv-conf passed as a kubelet arg. diff --git a/deploy/docker/cluster-healthcheck.sh b/deploy/docker/cluster-healthcheck.sh index e2828c6e5..c6eb8bd34 100644 --- a/deploy/docker/cluster-healthcheck.sh +++ b/deploy/docker/cluster-healthcheck.sh @@ -75,8 +75,18 @@ kubectl -n openshell get secret openshell-ssh-handshake >/dev/null 2>&1 || exit # --------------------------------------------------------------------------- # Verify the gateway NodePort (30051) is actually accepting TCP connections. # After a container restart, kube-proxy may need extra time to re-program -# iptables rules for NodePort routing. Without this check the health check -# can pass before the port is routable, causing "Connection refused" on the -# host-mapped port. +# iptables/nftables rules for NodePort routing. Without this check the +# health check can pass before the port is routable, causing "Connection +# refused" on the host-mapped port. +# +# When kube-proxy runs in nftables mode (Podman), NodePort DNAT rules only +# match traffic destined to the node's real IP addresses — loopback +# (127.0.0.1) is not in the nodeport-ips set. Use the node's InternalIP +# so the check works with both iptables and nftables kube-proxy modes. # --------------------------------------------------------------------------- -timeout 2 bash -c 'echo >/dev/tcp/127.0.0.1/30051' 2>/dev/null || exit 1 +NODEPORT_CHECK_IP="127.0.0.1" +NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null || true) +if [ -n "$NODE_IP" ]; then + NODEPORT_CHECK_IP="$NODE_IP" +fi +timeout 2 bash -c "echo >/dev/tcp/${NODEPORT_CHECK_IP}/30051" 2>/dev/null || exit 1