From 32f46b7769d234a83eeab9ca41e62c1a7f24f09b Mon Sep 17 00:00:00 2001
From: Adam Miller <admiller@redhat.com>
Date: Tue, 7 Apr 2026 20:57:34 -0500
Subject: [PATCH] fix(bootstrap): fix nftables healthcheck and warn on missing
 flannel modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Flannel's embedded traffic manager in k3s v1.35.x is compiled without the
nft backend — it only has iptables-legacy support, which requires kernel
modules (ip_tables, iptable_nat, iptable_filter, iptable_mangle) that
modern distributions (Fedora 43+, RHEL 10+) no longer load by default.

Changes:

- cluster-entrypoint.sh: When running under Podman, check whether the
  iptable_nat module is loaded and emit an actionable warning if not.
  The modules are expected to be loaded at boot via modules-load.d
  (installed by the RPM spec); the warning covers the case where the
  host hasn't rebooted since installation.

- cluster-healthcheck.sh: Replace the hardcoded 127.0.0.1 NodePort check
  with the node's actual InternalIP.  When kube-proxy runs in nftables
  mode, NodePort DNAT rules only match the node's real IP addresses —
  loopback is not in the nftables nodeport-ips set, so the old check
  always failed.

Tested on Fedora 43 (kernel 6.19, Podman 5.8.1) with the full lifecycle:
gateway start, provider create/list/delete, sandbox create/exec/delete.
---
 deploy/docker/cluster-entrypoint.sh  | 26 ++++++++++++++++++++++++--
 deploy/docker/cluster-healthcheck.sh | 18 ++++++++++++++----
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 769a2945e..930680317 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -675,12 +675,34 @@ fi
 # Select kube-proxy mode
 # ---------------------------------------------------------------------------
 # Under Podman, use native nftables kube-proxy mode so no legacy iptables
-# kernel modules (ip_tables, iptable_nat, etc.) are required on the host.
-# Docker retains the default iptables mode for maximum compatibility.
+# kernel modules are needed for kube-proxy service routing.
+#
+# Flannel's embedded traffic manager in k3s v1.35.x still uses the iptables
+# binary (no nft backend compiled in).  The iptables binary inside the
+# container is iptables-legacy, which requires the iptable_nat, iptable_filter,
+# and ip_tables kernel modules.  Modern distributions (Fedora 43+, RHEL 10+)
+# no longer load these modules by default.  The RPM %post scriptlet both
+# loads the modules immediately and installs a modules-load.d config for
+# persistence across reboots.  The warning below covers non-RPM installs.
+#
+# Docker retains the default iptables kube-proxy mode for maximum compatibility.
 EXTRA_KUBE_PROXY_ARGS=""
 if [ "${CONTAINER_RUNTIME:-}" = "podman" ]; then
 	echo "Podman detected — using nftables kube-proxy mode"
 	EXTRA_KUBE_PROXY_ARGS="--kube-proxy-arg=proxy-mode=nftables"
+
+	# Verify legacy iptables kernel modules are loaded on the host.
+	# Flannel's traffic manager calls iptables-legacy for masquerade rules,
+	# which requires iptable_nat and related modules.  The RPM loads these
+	# at install time and persists them via modules-load.d, but they may be
+	# absent on non-RPM installs or manually configured systems.
+	if ! cat /proc/modules 2>/dev/null | grep -q '^iptable_nat '; then
+		echo "Warning: iptable_nat kernel module is not loaded on the host." >&2
+		echo "         Flannel masquerade rules will fail without it." >&2
+		echo "         Load it now with: sudo modprobe iptable_nat" >&2
+		echo "         To persist across reboots:" >&2
+		echo "           echo iptable_nat | sudo tee /etc/modules-load.d/openshell-flannel.conf" >&2
+	fi
 fi
 
 # Execute k3s with explicit resolv-conf passed as a kubelet arg.
diff --git a/deploy/docker/cluster-healthcheck.sh b/deploy/docker/cluster-healthcheck.sh
index e2828c6e5..c6eb8bd34 100644
--- a/deploy/docker/cluster-healthcheck.sh
+++ b/deploy/docker/cluster-healthcheck.sh
@@ -75,8 +75,18 @@ kubectl -n openshell get secret openshell-ssh-handshake >/dev/null 2>&1 || exit
 # ---------------------------------------------------------------------------
 # Verify the gateway NodePort (30051) is actually accepting TCP connections.
 # After a container restart, kube-proxy may need extra time to re-program
-# iptables rules for NodePort routing.  Without this check the health check
-# can pass before the port is routable, causing "Connection refused" on the
-# host-mapped port.
+# iptables/nftables rules for NodePort routing.  Without this check the
+# health check can pass before the port is routable, causing "Connection
+# refused" on the host-mapped port.
+#
+# When kube-proxy runs in nftables mode (Podman), NodePort DNAT rules only
+# match traffic destined to the node's real IP addresses — loopback
+# (127.0.0.1) is not in the nodeport-ips set.  Use the node's InternalIP
+# so the check works with both iptables and nftables kube-proxy modes.
 # ---------------------------------------------------------------------------
-timeout 2 bash -c 'echo >/dev/tcp/127.0.0.1/30051' 2>/dev/null || exit 1
+NODEPORT_CHECK_IP="127.0.0.1"
+NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null || true)
+if [ -n "$NODE_IP" ]; then
+    NODEPORT_CHECK_IP="$NODE_IP"
+fi
+timeout 2 bash -c "echo >/dev/tcp/${NODEPORT_CHECK_IP}/30051" 2>/dev/null || exit 1