From d2cffa8eb440d262ce905cc27f787ee12bbf5da8 Mon Sep 17 00:00:00 2001 From: skullcmd Date: Tue, 28 Apr 2026 22:30:24 +0000 Subject: [PATCH] fix(install): robust kernel-version parser in AF_XDP runtime probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit probe_afxdp_runtime_available reported "kernel <5.10 or libxdp.so missing" on c6in.metal Debian 13 + kernel 6.12.74 even though both prerequisites were satisfied. The previous parameter-expansion parser silently mishandled some 3-component release shapes; the failure mode reported in PR #65 issuecomment-4339242358 (anygpt-52) was a generic "false" with no indication of which check fired, leaving the operator to guess. Fix: - New parse_kernel_major_minor() helper uses awk -F'[.-]' so 3- component releases like 6.12.74-cloud-amd64, 5.10.0-13-amd64, 6.12.74+deb13+1-amd64, and 5.4.282-rt all parse cleanly. Returns "MAJOR MINOR" on stdout, "0 0" on parse failure. - probe_afxdp_runtime_available emits a one-line stderr explanation whenever it returns "false" so the operator can immediately see which check fired ("kernel 4.19 < 5.10", "libxdp.so not in ldconfig -p", "could not parse kernel version"). Quiet on success. - apply_afxdp_availability captures the probe stderr and includes the reason in its summary log line — replaces the previous hardcoded "kernel <5.10 or libxdp.so missing" that was wrong half the time. - ANYSCAN_INSTALL_LOAD_ONLY=1 hook lets unit tests source the script for hermetic helper testing without triggering main(). Test (new tools/test-install-worker-bundle-afxdp-probe.sh, 21 cases): - parse_kernel_major_minor across 8 release shapes (clean 3-component, +deb13 suffix, -cloud-amd64 suffix, -rt suffix, 4.x, 2-component, 1-component, empty). - probe_afxdp_runtime_available with stubbed uname + ldconfig: c6in.metal 6.12.74 + libxdp → true (the bug 5 repro). Kernel 4.19 too old → false + stderr names the version. Kernel 5.9 vs 5.10 vs 5.11 boundary correctness. libxdp.so missing → false + stderr names the missing library. Empty/non-numeric uname → false + stderr names parse failure. Co-Authored-By: Claude Opus 4.7 (1M context) --- install-worker-bundle.sh | 69 +++++- .../test-install-worker-bundle-afxdp-probe.sh | 208 ++++++++++++++++++ 2 files changed, 266 insertions(+), 11 deletions(-) create mode 100755 tools/test-install-worker-bundle-afxdp-probe.sh diff --git a/install-worker-bundle.sh b/install-worker-bundle.sh index 0158c15..32a8af6 100755 --- a/install-worker-bundle.sh +++ b/install-worker-bundle.sh @@ -366,6 +366,25 @@ apply_host_resource_defaults() { fi } +# Parse the major.minor pair out of a `uname -r` output. Uses awk -F. +# so 3-component releases like `6.12.74-cloud-amd64`, `5.10.0-13-amd64`, +# or `6.12.74+deb13+1-amd64` get parsed correctly — the previous +# parameter-expansion approach silently mishandled some shapes on +# Debian 13 + kernel 6.12 (anygpt-52, c6in.metal: probe reported +# "kernel <5.10 or libxdp.so missing" despite the kernel being 6.12). +# Echoes "MAJOR MINOR" on stdout. On parse failure, echoes "0 0" so +# callers can decide how to handle it. +parse_kernel_major_minor() { + local release="$1" + awk -F'[.-]' ' + { + major = $1 + 0 + minor = $2 + 0 + printf "%d %d\n", major, minor + } + ' <<<"$release" +} + probe_afxdp_runtime_available() { # Phase 2 PR C of plans/2026-04-27-portscan-afxdp-plan-v1.md §4.3. # The scanner can be invoked with --io-engine=af_xdp only when (a) the @@ -380,26 +399,37 @@ probe_afxdp_runtime_available() { # override to "true" by hand if they know the bundle's bin/scanner # was built with USE_AF_XDP=1 and they have the libs from a path not # visible to ldconfig (e.g. LD_LIBRARY_PATH). - local kernel_release kernel_major kernel_minor + # + # On stderr, the function emits a one-line reason whenever it + # returns "false" so the operator can tell which check failed + # (kernel too old vs. libxdp missing). Quiet on success. + local kernel_release kernel_release="$(uname -r 2>/dev/null || true)" - if [ -z "$kernel_release" ]; then + + local kernel_major=0 kernel_minor=0 + if [ -n "$kernel_release" ]; then + read -r kernel_major kernel_minor < <(parse_kernel_major_minor "$kernel_release") + fi + if [ "$kernel_major" -eq 0 ]; then + printf '[probe-afxdp] could not parse kernel version from uname -r=%q\n' \ + "$kernel_release" >&2 printf 'false' return 0 fi - kernel_major="${kernel_release%%.*}" - local rest="${kernel_release#*.}" - kernel_minor="${rest%%.*}" - case "$kernel_major" in ''|*[!0-9]*) printf 'false'; return 0 ;; esac - case "$kernel_minor" in ''|*[!0-9]*) kernel_minor=0 ;; esac - if [ "$kernel_major" -lt 5 ] || { [ "$kernel_major" -eq 5 ] && [ "$kernel_minor" -lt 10 ]; }; then + if [ "$kernel_major" -lt 5 ] || \ + { [ "$kernel_major" -eq 5 ] && [ "$kernel_minor" -lt 10 ]; }; then + printf '[probe-afxdp] kernel %s.%s < 5.10\n' \ + "$kernel_major" "$kernel_minor" >&2 printf 'false' return 0 fi if ! command_exists ldconfig; then + printf '[probe-afxdp] ldconfig not on PATH (cannot verify libxdp.so)\n' >&2 printf 'false' return 0 fi if ! ldconfig -p 2>/dev/null | grep -q '\&2 printf 'false' return 0 fi @@ -411,14 +441,22 @@ apply_afxdp_availability() { # /etc/agentd/runtime.env and a partial upgrade can't leave a stale # "true" in place after the kernel was downgraded or libxdp was # uninstalled. This mirrors the AGENT_REMOTE_UPDATE_* pattern above. - local afxdp_available - afxdp_available="$(probe_afxdp_runtime_available)" + local afxdp_available probe_stderr + probe_stderr="$(mktemp)" + afxdp_available="$(probe_afxdp_runtime_available 2>"$probe_stderr")" upsert_env_value "ANYSCAN_AF_XDP_AVAILABLE" "$afxdp_available" "$RUNTIME_ENV_FILE" if [ "$afxdp_available" = "true" ]; then printf '[*] AF_XDP runtime probe passed (kernel + libxdp.so present); ANYSCAN_AF_XDP_AVAILABLE=true.\n' else - printf '[*] AF_XDP runtime probe failed (kernel <5.10 or libxdp.so missing); ANYSCAN_AF_XDP_AVAILABLE=false.\n' + local reason + reason="$(tail -n1 "$probe_stderr" 2>/dev/null || true)" + if [ -n "$reason" ]; then + printf '[*] AF_XDP runtime probe failed (%s); ANYSCAN_AF_XDP_AVAILABLE=false.\n' "$reason" + else + printf '[*] AF_XDP runtime probe failed; ANYSCAN_AF_XDP_AVAILABLE=false.\n' + fi fi + rm -f "$probe_stderr" } # True when the installed scanner binary at $1 was linked against librte_eal @@ -1066,4 +1104,13 @@ main() { fi } +# Test hook: when ANYSCAN_INSTALL_LOAD_ONLY=1 is set the script is being +# sourced for unit-test access to its helpers (tools/test-install- +# worker-bundle-afxdp-probe.sh) and must skip main(). `return` works in +# sourced bash; falling through to `exit` covers the unlikely case +# where the hook is set during a direct invocation. +if [ "${ANYSCAN_INSTALL_LOAD_ONLY:-0}" = "1" ]; then + return 0 2>/dev/null || exit 0 +fi + main "$@" diff --git a/tools/test-install-worker-bundle-afxdp-probe.sh b/tools/test-install-worker-bundle-afxdp-probe.sh new file mode 100755 index 0000000..3d3fa4e --- /dev/null +++ b/tools/test-install-worker-bundle-afxdp-probe.sh @@ -0,0 +1,208 @@ +#!/usr/bin/env bash +# Unit tests for install-worker-bundle.sh::probe_afxdp_runtime_available +# and parse_kernel_major_minor. +# +# anygpt-52 (PR #65 issuecomment-4339242358) reported the probe +# returning false on c6in.metal Debian 13 + kernel 6.12.74 — claiming +# "kernel <5.10 or libxdp.so missing" when the kernel was clearly 6.12 +# and libxdp1 was on the host. The fix uses awk -F'[.-]' so the parser +# robustly handles 3-component releases with suffixes like: +# +# 6.12.74-cloud-amd64 +# 5.10.0-13-amd64 +# 6.12.74+deb13+1-amd64 +# 5.4.282-rt +# +# These tests exercise: +# 1. parse_kernel_major_minor on the full set of release shapes we've +# seen in the wild + a couple of edge cases. +# 2. probe_afxdp_runtime_available with a stubbed `uname` and `ldconfig` +# to assert the four return paths: +# - kernel < 5.10 → false +# - kernel >= 5.10 + libxdp.so missing → false +# - kernel >= 5.10 + libxdp.so present → true +# - unparseable kernel → false +# 3. The probe stderr carries a single-line reason on failure so the +# operator can tell which check failed. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_SCRIPT="${SCRIPT_DIR}/../install-worker-bundle.sh" + +if [ ! -r "$TARGET_SCRIPT" ]; then + printf '[!] %s not found\n' "$TARGET_SCRIPT" >&2 + exit 1 +fi + +PASS=0 +FAIL=0 + +note_pass() { PASS=$(( PASS + 1 )); printf ' [ok] %s\n' "$1"; } +note_fail() { FAIL=$(( FAIL + 1 )); printf ' [FAIL] %s: %s\n' "$1" "$2" >&2; } + +assert_eq() { + local label="$1" expected="$2" actual="$3" + if [ "$expected" = "$actual" ]; then + note_pass "$label" + else + note_fail "$label" "expected $(printf '%q' "$expected") got $(printf '%q' "$actual")" + fi +} + +WORK_ROOT="$(mktemp -d)" +trap 'rm -rf "$WORK_ROOT"' EXIT + +# Source the script with the load-only hook so main() is skipped. Puts +# parse_kernel_major_minor + probe_afxdp_runtime_available in scope. +load_script() { + # shellcheck disable=SC1090 + export ANYSCAN_INSTALL_LOAD_ONLY=1 + source "$TARGET_SCRIPT" +} + +# --------------------------------------------------------------------------- +# parse_kernel_major_minor: cover every release shape we've shipped on. +# Each subshell isolates the sourced state so failures don't cascade. +# --------------------------------------------------------------------------- +test_parse() { + local release="$1" expected="$2" + local actual + actual="$( + load_script + parse_kernel_major_minor "$release" + )" + assert_eq "parse_kernel_major_minor $(printf '%q' "$release") → $(printf '%q' "$expected")" \ + "$expected" "$actual" +} + +test_parse "6.12.74+deb13+1-amd64" "6 12" +test_parse "6.12.74-cloud-amd64" "6 12" +test_parse "5.10.0-13-amd64" "5 10" +test_parse "5.4.282-rt" "5 4" +test_parse "4.19.276-amd64" "4 19" +test_parse "5.10" "5 10" +test_parse "5" "5 0" +test_parse "" "0 0" + +# --------------------------------------------------------------------------- +# probe_afxdp_runtime_available with stubbed uname / ldconfig. +# --------------------------------------------------------------------------- +make_stubs() { + local stub_dir="$1" uname_release="$2" libxdp_present="$3" + + mkdir -p "$stub_dir" + + cat >"$stub_dir/uname" <"$stub_dir/ldconfig" <<'EOF' +#!/usr/bin/env bash +if [ "$1" = "-p" ]; then + cat < /lib/x86_64-linux-gnu/libxdp.so.1 + libxdp.so (libc6,x86-64) => /lib/x86_64-linux-gnu/libxdp.so +LD + exit 0 +fi +exit 0 +EOF + else + cat >"$stub_dir/ldconfig" <<'EOF' +#!/usr/bin/env bash +if [ "$1" = "-p" ]; then + cat <<'LD' + libc.so.6 (libc6,x86-64) => /lib/x86_64-linux-gnu/libc.so.6 +LD + exit 0 +fi +exit 0 +EOF + fi + chmod +x "$stub_dir/ldconfig" +} + +test_probe() { + local label="$1" uname_release="$2" libxdp_present="$3" expected_value="$4" expected_stderr_contains="$5" + + local case_dir="$WORK_ROOT/$(echo "$label" | tr ' /' '__')" + mkdir -p "$case_dir" + local stub_dir="$case_dir/stubs" + make_stubs "$stub_dir" "$uname_release" "$libxdp_present" + + local stdout_file="$case_dir/probe.stdout" + local stderr_file="$case_dir/probe.stderr" + + ( + export PATH="$stub_dir:$PATH" + load_script + probe_afxdp_runtime_available + ) >"$stdout_file" 2>"$stderr_file" + + local actual + actual="$(cat "$stdout_file")" + assert_eq "probe ($label) → $(printf '%q' "$expected_value")" \ + "$expected_value" "$actual" + + if [ -n "$expected_stderr_contains" ]; then + if grep -Fq -- "$expected_stderr_contains" "$stderr_file"; then + note_pass "probe ($label) stderr contains $(printf '%q' "$expected_stderr_contains")" + else + note_fail "probe ($label) stderr contains $(printf '%q' "$expected_stderr_contains")" \ + "stderr was: $(cat "$stderr_file")" + fi + fi +} + +# Bug 5 repro: 6.12.74 + libxdp present must return true. The OLD +# parameter-expansion parser claimed kernel <5.10 here. +test_probe "c6in.metal Debian 13 6.12.74 + libxdp" \ + "6.12.74+deb13+1-amd64" "true" \ + "true" "" + +# 6.12 with no libxdp → false, stderr names the missing library. +test_probe "kernel 6.12 + libxdp missing" \ + "6.12.74-cloud-amd64" "false" \ + "false" "libxdp.so not in ldconfig" + +# Old kernel (4.19) → false, stderr names the version. +test_probe "kernel 4.19 too old" \ + "4.19.276-amd64" "true" \ + "false" "kernel 4.19 < 5.10" + +# 5.10 boundary: 5.9 → false, 5.10 → true, 5.11 → true. +test_probe "kernel 5.9 too old" \ + "5.9.0-amd64" "true" \ + "false" "kernel 5.9 < 5.10" +test_probe "kernel 5.10 boundary" \ + "5.10.0-amd64" "true" \ + "true" "" +test_probe "kernel 5.11 above boundary" \ + "5.11.0-amd64" "true" \ + "true" "" + +# Empty uname → false, stderr says could not parse. +test_probe "empty uname output" \ + "" "true" \ + "false" "could not parse kernel" + +# Unparseable uname (no leading digit) → false, stderr says could not parse. +test_probe "non-numeric uname" \ + "linux-custom-build" "true" \ + "false" "could not parse kernel" + +printf '\n' +printf 'PASS: %d\n' "$PASS" +printf 'FAIL: %d\n' "$FAIL" + +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi