From bcdbc28ede3c639038f1b754179364d4417b5c79 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Sat, 2 May 2026 15:46:16 +0000 Subject: [PATCH] Rewrite chromium-headful + chromium-headless wrapper as a Go binary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the bash wrapper.sh shipped in both browser images with a single Go binary (server/cmd/wrapper) that detects the headful vs headless profile from supervisor's conf.d at boot. The Go wrapper preserves behaviour parity with the bash scripts but removes serial dead time on the boot path: - Phase A starts xorg/xvfb, dbus, and chromedriver in a single supervisorctl invocation; readiness is then probed concurrently. - Envoy bootstrap (cert generation, NSS DB, template render) runs in a goroutine alongside Phase A. Phase B gates on it because chromium reads the system CA trust store at process start. - Phase B starts chromium, kernel-images-api, and (headful) mutter + optional neko in one call so their bring-up overlaps chromium boot. - Final readiness waits on the union of CDP, chromedriver, forward-proxy, and (when enabled) neko + envoy in parallel. Per-service tweaks: - supervisor confs: startsecs=2 → 0 so supervisorctl start returns as soon as the program is launched (Go probes readiness directly). - init-envoy.sh: drop the trailing 50-iteration port poll and curl-through-proxy test; the Go wrapper's waitAllReady covers it. - Kraftfile cmd updated from /wrapper.sh to /wrapper. Cosmetic + non-critical work (pulseaudio, --no-sandbox infobar dismissal) runs off the hot path. --- README.md | 2 +- images/chromium-headful/Dockerfile | 10 +- images/chromium-headful/Kraftfile | 2 +- .../supervisor/services/chromedriver.conf | 2 +- .../supervisor/services/dbus.conf | 2 +- .../services/kernel-images-api.conf | 2 +- .../supervisor/services/mutter.conf | 2 +- .../supervisor/services/neko.conf | 2 +- .../supervisor/services/xorg.conf | 2 +- images/chromium-headful/wrapper.sh | 338 ---------- images/chromium-headless/image/Dockerfile | 12 +- images/chromium-headless/image/Kraftfile | 2 +- .../supervisor/services/chromedriver.conf | 2 +- .../image/supervisor/services/dbus.conf | 2 +- .../services/kernel-images-api.conf | 2 +- .../image/supervisor/services/xvfb.conf | 2 +- images/chromium-headless/image/wrapper.sh | 294 --------- server/cmd/wrapper/main.go | 591 ++++++++++++++++++ shared/envoy/init-envoy.sh | 38 +- shared/envoy/supervisor-envoy.conf | 2 +- 20 files changed, 626 insertions(+), 685 deletions(-) delete mode 100755 images/chromium-headful/wrapper.sh delete mode 100755 images/chromium-headless/image/wrapper.sh create mode 100644 server/cmd/wrapper/main.go diff --git a/README.md b/README.md index 50fbe2db..22ef9bc0 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Deployed successfully! ├────── service: ├─ private fqdn: ├─── private ip: - └───────── args: /wrapper.sh + └───────── args: /wrapper ``` ### Unikernel Notes diff --git a/images/chromium-headful/Dockerfile b/images/chromium-headful/Dockerfile index 84ecfd9d..89907f16 100644 --- a/images/chromium-headful/Dockerfile +++ b/images/chromium-headful/Dockerfile @@ -27,6 +27,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + # webrtc client FROM node:22-bullseye-slim AS client WORKDIR /src @@ -348,7 +354,6 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xor COPY images/chromium-headful/image-chromium/ / COPY images/chromium-headful/start-pulseaudio.sh /images/chromium-headful/start-pulseaudio.sh RUN chmod +x /images/chromium-headful/start-pulseaudio.sh -COPY images/chromium-headful/wrapper.sh /wrapper.sh COPY images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf COPY images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/ COPY shared/envoy/supervisor-envoy.conf /etc/supervisor/conf.d/services/envoy.conf @@ -365,6 +370,7 @@ RUN chmod +x /usr/local/bin/init-envoy.sh # copy the kernel-images API binary built in the builder stage COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher +COPY --from=server-builder /out/wrapper /wrapper # Copy and compile the Playwright daemon COPY server/runtime/playwright-daemon.ts /tmp/playwright-daemon.ts @@ -381,4 +387,4 @@ RUN esbuild /tmp/playwright-daemon.ts \ RUN useradd -m -s /bin/bash kernel -ENTRYPOINT [ "/wrapper.sh" ] +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headful/Kraftfile b/images/chromium-headful/Kraftfile index 18af1a0b..9bb12637 100644 --- a/images/chromium-headful/Kraftfile +++ b/images/chromium-headful/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headful/supervisor/services/chromedriver.conf b/images/chromium-headful/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headful/supervisor/services/chromedriver.conf +++ b/images/chromium-headful/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/dbus.conf b/images/chromium-headful/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headful/supervisor/services/dbus.conf +++ b/images/chromium-headful/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/kernel-images-api.conf b/images/chromium-headful/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headful/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headful/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/mutter.conf b/images/chromium-headful/supervisor/services/mutter.conf index 5de00213..3fac9ea7 100644 --- a/images/chromium-headful/supervisor/services/mutter.conf +++ b/images/chromium-headful/supervisor/services/mutter.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'XDG_SESSION_TYPE=x11 mutter --replace --sm-disable' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/mutter redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/neko.conf b/images/chromium-headful/supervisor/services/neko.conf index c30c8b46..9662df02 100644 --- a/images/chromium-headful/supervisor/services/neko.conf +++ b/images/chromium-headful/supervisor/services/neko.conf @@ -2,6 +2,6 @@ command=/usr/bin/neko serve --server.static /var/www --server.bind 0.0.0.0:8080 autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/neko redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/xorg.conf b/images/chromium-headful/supervisor/services/xorg.conf index 72e515e5..5357ba7c 100644 --- a/images/chromium-headful/supervisor/services/xorg.conf +++ b/images/chromium-headful/supervisor/services/xorg.conf @@ -2,6 +2,6 @@ command=/usr/bin/Xorg :1 -config /etc/neko/xorg.conf -noreset -nolisten tcp autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xorg redirect_stderr=true diff --git a/images/chromium-headful/wrapper.sh b/images/chromium-headful/wrapper.sh deleted file mode 100755 index 1e3797cb..00000000 --- a/images/chromium-headful/wrapper.sh +++ /dev/null @@ -1,338 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If the WITHDOCKER environment variable is not set, it means we are not running inside a Docker container. -# Docker manages /dev/shm itself, and attempting to mount or modify it can cause permission or device errors. -# However, in a unikernel container environment (non-Docker), we need to manually create and mount /dev/shm as a tmpfs -# to support shared memory operations. -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -# Some environments boot with an empty or \"(none)\" hostname which shows up in -# prompts. Best-effort set a friendly hostname early so services inherit it. -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -# Also export HOSTNAME so shells pick it up immediately. -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user -------------------------------- -# Some Chromium subsystems want to create files under $HOME (NSS cert DB, dconf -# cache). If those directories are missing or owned by root Chromium emits -# noisy error messages such as: -# [ERROR:crypto/nss_util.cc:48] Failed to create /home/kernel/.pki/nssdb ... -# dconf-CRITICAL **: unable to create directory '/home/kernel/.cache/dconf' -# Pre-create them and hand ownership to the user so the messages disappear. -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. - -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -export DISPLAY=:1 - -# Predefine ports and export for services -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Track background tailing processes for cleanup -tail_pids=() - -# Cleanup handler (set early so we catch early failures) -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -# Start supervisord early so it can manage Xorg and Mutter -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do -if [ -S /var/run/supervisor.sock ]; then - break -fi -sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting Xorg via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xorg -echo "[wrapper] Waiting for Xorg to open display $DISPLAY..." -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Mutter via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start mutter -echo "[wrapper] Waiting for Mutter to be ready..." -timeout=30 -while [ $timeout -gt 0 ]; do - if xdotool search --class "mutter" >/dev/null 2>&1; then - break - fi - sleep 1 - ((timeout--)) -done - -# ----------------------------------------------------------------------------- -# System-bus setup via supervisord -------------------------------------------- -# ----------------------------------------------------------------------------- -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -echo "[wrapper] Waiting for D-Bus system bus socket..." -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done - -# We will point DBUS_SESSION_BUS_ADDRESS at the system bus socket to suppress -# autolaunch attempts that failed and spammed logs. -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -# Start Chromium with display :1 and remote debugging, loading our recorder extension. -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then - # use webrtc - echo "[wrapper] ✨ Starting neko (webrtc server) via supervisord." - supervisorctl -c /etc/supervisor/supervisord.conf start neko - - # Wait for neko to be ready. - wait_for_tcp_port 127.0.0.1 8080 "neko" -fi - -echo "[wrapper] ✨ Starting kernel-images API." - -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" -API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" -API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" -API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" - -# Start via supervisord (env overrides are read by the service's command) -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] Starting PulseAudio daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start pulseaudio - -# close the "--no-sandbox unsupported flag" warning when running as root -# in the unikernel runtime we haven't been able to get chromium to launch as non-root without cryptic crashpad errors -# and when running as root you must use the --no-sandbox flag, which generates a warning -if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then - echo "[wrapper] Running as root, attempting to dismiss the --no-sandbox unsupported flag warning" - if read -r WIDTH HEIGHT <<< "$(xdotool getdisplaygeometry 2>/dev/null)"; then - # Work out an x-coordinate slightly inside the right-hand edge of the - OFFSET_X=$(( WIDTH - 30 )) - if (( OFFSET_X < 0 )); then - OFFSET_X=0 - fi - - # Wait for Chromium window to open before dismissing the --no-sandbox warning. - target='New Tab - Chromium' - echo "[wrapper] Waiting for Chromium window \"${target}\" to appear and become active..." - while :; do - win_id=$(xwininfo -root -tree 2>/dev/null | awk -v t="$target" '$0 ~ t {print $1; exit}') - if [[ -n $win_id ]]; then - win_id=${win_id%:} - if xdotool windowactivate --sync "$win_id"; then - echo "[wrapper] Focused window $win_id ($target) on $DISPLAY" - break - fi - fi - sleep 0.5 - done - - # wait... not sure but this just increases the likelihood of success - # without the sleep you often open the live view and see the mouse hovering over the "X" to dismiss the warning, suggesting that it clicked before the warning or chromium appeared - sleep 5 - - # Attempt to click the warning's close button - echo "[wrapper] Clicking the warning's close button at x=$OFFSET_X y=115" - if curl -s -o /dev/null -X POST \ - http://localhost:${API_PORT}/computer/click_mouse \ - -H "Content-Type: application/json" \ - -d "{\"x\":${OFFSET_X},\"y\":115}"; then - echo "[wrapper] Successfully clicked the warning's close button" - else - echo "[wrapper] Failed to click the warning's close button" >&2 - fi - else - echo "[wrapper] xdotool failed to obtain display geometry; skipping sandbox warning dismissal." >&2 - fi -fi - -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi - -# Keep the container running while streaming logs -wait diff --git a/images/chromium-headless/image/Dockerfile b/images/chromium-headless/image/Dockerfile index aa7d17ea..b9a3462a 100644 --- a/images/chromium-headless/image/Dockerfile +++ b/images/chromium-headless/image/Dockerfile @@ -28,6 +28,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + FROM docker.io/ubuntu:22.04 AS ffmpeg-downloader # Allow cross-compilation when building with BuildKit platforms @@ -217,8 +223,8 @@ RUN useradd -m -s /bin/bash kernel COPY images/chromium-headless/image/start-xvfb.sh /images/chromium-headless/image/start-xvfb.sh RUN chmod +x /images/chromium-headless/image/start-xvfb.sh -# Wrapper script to set environment -COPY images/chromium-headless/image/wrapper.sh /usr/bin/wrapper.sh +# Container entrypoint wrapper (Go binary, replaces wrapper.sh) +COPY --from=server-builder /out/wrapper /wrapper # Supervisord configuration COPY images/chromium-headless/image/supervisord.conf /etc/supervisor/supervisord.conf @@ -251,4 +257,4 @@ RUN esbuild /tmp/playwright-daemon.ts \ --external:esbuild \ && rm /tmp/playwright-daemon.ts -ENTRYPOINT [ "/usr/bin/wrapper.sh" ] +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headless/image/Kraftfile b/images/chromium-headless/image/Kraftfile index b11a88c2..9bb12637 100644 --- a/images/chromium-headless/image/Kraftfile +++ b/images/chromium-headless/image/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/usr/bin/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headless/image/supervisor/services/chromedriver.conf b/images/chromium-headless/image/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headless/image/supervisor/services/chromedriver.conf +++ b/images/chromium-headless/image/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/dbus.conf b/images/chromium-headless/image/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headless/image/supervisor/services/dbus.conf +++ b/images/chromium-headless/image/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/xvfb.conf b/images/chromium-headless/image/supervisor/services/xvfb.conf index 5279bda4..28974551 100644 --- a/images/chromium-headless/image/supervisor/services/xvfb.conf +++ b/images/chromium-headless/image/supervisor/services/xvfb.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc '/images/chromium-headless/image/start-xvfb.sh' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xvfb redirect_stderr=true diff --git a/images/chromium-headless/image/wrapper.sh b/images/chromium-headless/image/wrapper.sh deleted file mode 100755 index 6a1935b9..00000000 --- a/images/chromium-headless/image/wrapper.sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If we are outside Docker-in-Docker make sure /dev/shm exists -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# if CHROMIUM_FLAGS is not set, default to the flags used in playwright_stealth -# NOTE: --disable-background-networking was intentionally removed because it prevents -# Chrome from fetching extensions via ExtensionInstallForcelist enterprise policy. -# Enterprise extensions require Chrome to make HTTP requests to fetch update.xml and .crx files. -if [ -z "${CHROMIUM_FLAGS:-}" ]; then - CHROMIUM_FLAGS="--accept-lang=en-US,en \ - --allow-pre-commit-input \ - --blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4 \ - --crash-dumps-dir=/tmp/chromium-dumps \ - --disable-back-forward-cache \ - --disable-background-timer-throttling \ - --disable-backgrounding-occluded-windows \ - --disable-blink-features=AutomationControlled \ - --disable-breakpad \ - --disable-client-side-phishing-detection \ - --disable-component-extensions-with-background-pages \ - --disable-crash-reporter \ - --disable-crashpad \ - --disable-dev-shm-usage \ - --disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate \ - --disable-field-trial-config \ - --disable-gcm-registration \ - --disable-gpu \ - --disable-gpu-compositing \ - --disable-hang-monitor \ - --disable-ipc-flooding-protection \ - --disable-notifications \ - --disable-popup-blocking \ - --disable-prompt-on-repost \ - --disable-renderer-backgrounding \ - --disable-search-engine-choice-screen \ - --disable-software-rasterizer \ - --enable-use-zoom-for-dsf=false \ - --export-tagged-pdf \ - --force-color-profile=srgb \ - --hide-crash-restore-bubble \ - --hide-scrollbars \ - --metrics-recording-only \ - --mute-audio \ - --no-default-browser-check \ - --no-first-run \ - --no-sandbox \ - --no-service-autorun \ - --ozone-platform=headless \ - --password-store=basic \ - --unsafely-disable-devtools-self-xss-warnings \ - --use-angle=swiftshader \ - --use-gl=angle \ - --use-mock-keychain" -fi -export CHROMIUM_FLAGS - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user ---------------------------- -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. -# ----------------------------------------------------------------------------- -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Track background tailing processes for cleanup -tail_pids=() - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -# Export common env used by services -export DISPLAY=:1 -export HEIGHT=${HEIGHT:-1080} -export WIDTH=${WIDTH:-1920} -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Cleanup handler -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop xvfb || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do - if [ -S /var/run/supervisor.sock ]; then - break - fi - sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -echo "[wrapper] Starting Xvfb via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xvfb -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -echo "[wrapper] ✨ Starting kernel-images API via supervisord." -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] startup complete!" -# Re-enable scale-to-zero once startup has completed (when not under Docker) -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi -# Keep the container running while streaming logs -wait diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go new file mode 100644 index 00000000..01374063 --- /dev/null +++ b/server/cmd/wrapper/main.go @@ -0,0 +1,591 @@ +// wrapper boots the chromium-headful and chromium-headless containers: +// prepares the environment, starts supervisord, brings services up in parallel +// where the dependency graph allows, and waits for CDP to be reachable through +// kernel-images-api. +// +// Replaces the legacy /wrapper.sh shipped in both images. Behavior parity is +// intentional — we still rely on supervisord, sysctl, dbus, etc. The only goal +// beyond parity is minimizing time-to-CDP-ready by removing serial dead time. +// +// The headful vs headless profile is detected at boot from supervisor's conf.d +// (xorg.conf → headful, xvfb.conf → headless), which keeps a single binary +// usable in both images without Dockerfile coordination. +package main + +import ( + "bufio" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" +) + +const ( + supervisorConf = "/etc/supervisor/supervisord.conf" + supervisorConfD = "/etc/supervisor/conf.d/services" + supervisorSock = "/var/run/supervisor.sock" + supervisordLogD = "/var/log/supervisord" + scaleToZeroFile = "/uk/libukp/scale_to_zero_disable" + dbusSocket = "/run/dbus/system_bus_socket" + defaultDisplay = ":1" + defaultIntPort = "9223" + defaultAPIPort = "10001" +) + +type profile int + +const ( + profileHeadful profile = iota + profileHeadless +) + +// detectProfile keys off whichever X server's supervisor conf is present. +// The image build is what writes these files, so this is deterministic. +func detectProfile() profile { + if _, err := os.Stat(filepath.Join(supervisorConfD, "xvfb.conf")); err == nil { + return profileHeadless + } + return profileHeadful +} + +func main() { + t0 := time.Now() + prof := detectProfile() + logf("starting wrapper (profile=%s)", profileName(prof)) + + // /dev/shm: only mount when not running under Docker (Docker manages it). + if os.Getenv("WITHDOCKER") == "" { + _ = os.MkdirAll("/dev/shm", 0o1777) + _ = os.Chmod("/dev/shm", 0o1777) + _ = exec.Command("mount", "-t", "tmpfs", "tmpfs", "/dev/shm").Run() + } + + // Disable scale-to-zero for the duration of startup; restored on exit. + disableScaleToZero() + defer enableScaleToZero() + + // Headless ships a default CHROMIUM_FLAGS list (headless+stealth flags) + // when callers don't set one. Headful's defaults are caller-supplied. + if prof == profileHeadless { + applyHeadlessDefaultFlags() + } + + // Hostname: some envs boot with empty/(none); pick a friendly default. + if h, err := os.ReadFile("/proc/sys/kernel/hostname"); err == nil { + if v := strings.TrimSpace(string(h)); v == "" || v == "(none)" { + _ = exec.Command("hostname", "kernel-vm").Run() + _ = os.WriteFile("/proc/sys/kernel/hostname", []byte("kernel-vm"), 0o644) + } + } + if os.Getenv("HOSTNAME") == "" { + _ = os.Setenv("HOSTNAME", "kernel-vm") + } + + // Disable IPv6 — Chromium DOH wastes connection slots on unreachable v6 endpoints. + _ = os.WriteFile("/proc/sys/net/ipv6/conf/all/disable_ipv6", []byte("1"), 0o644) + _ = os.WriteFile("/proc/sys/net/ipv6/conf/default/disable_ipv6", []byte("1"), 0o644) + + // Pre-create per-user dirs so chromium subsystems don't error. + prepareUserDirs(os.Getenv("RUN_AS_ROOT") == "true") + + // Tail aggregator for service logs. + startLogAggregator() + + // Default env that downstream services expect. + _ = os.Setenv("DISPLAY", defaultDisplay) + if os.Getenv("INTERNAL_PORT") == "" { + _ = os.Setenv("INTERNAL_PORT", defaultIntPort) + } + if os.Getenv("CHROME_PORT") == "" { + _ = os.Setenv("CHROME_PORT", "9222") + } + // Point dbus clients at the system bus socket. Set before supervisord + // starts so it captures the env for child services (notably chromium, + // which would otherwise spam autolaunch errors). + _ = os.Setenv("DBUS_SESSION_BUS_ADDRESS", "unix:path="+dbusSocket) + + // Stale X locks from prior runs. + _ = os.Remove("/tmp/.X1-lock") + _ = os.Remove("/tmp/.X11-unix/X1") + + // supervisord — start in nodaemon mode so we own its lifecycle. + // Without -n it forks and the parent exits with code 0, which would + // drop us out of supCmd.Wait() and the container would stop. + logf("starting supervisord") + supCmd := exec.Command("supervisord", "-n", "-c", supervisorConf) + supCmd.Stdout = os.Stdout + supCmd.Stderr = os.Stderr + if err := supCmd.Start(); err != nil { + fatalf("supervisord start: %v", err) + } + waitForSocket(supervisorSock, 10*time.Second) + + // Envoy bootstrap: cert generation, NSS DB, template render, and + // `supervisorctl start envoy`. Run concurrently with Phase A so the + // shell-out work (openssl, certutil, update-ca-certificates) overlaps + // xorg/dbus/chromedriver bring-up. Phase B (chromium) gates on this + // because chromium reads the system CA trust store at process start + // and needs the envoy self-signed cert in place. The envoy listener + // itself (port 3128) is probed in waitAllReady, not here. + envoyDone := make(chan struct{}) + if isExecutable("/usr/local/bin/init-envoy.sh") { + go func() { + defer close(envoyDone) + runStream("envoy-init", "/usr/local/bin/init-envoy.sh") + }() + } else { + close(envoyDone) + } + + // Phase A: services with no X/dbus/chromium dependency. chromedriver + // listens on 9225 immediately and only attaches to chromium on session + // creation, so it can come up alongside the display stack. + xServer := "xorg" + if prof == profileHeadless { + xServer = "xvfb" + } + startAll(xServer, "dbus", "chromedriver") + waitForX(defaultDisplay, 20*time.Second) + waitForSocket(dbusSocket, 10*time.Second) + + // Pre-touch chromium's supervisord log so kernel-images-api's `tail -f` + // doesn't bail out and enter its 250ms retry backoff when started in + // parallel with chromium. + _ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644) + + // Gate chromium on envoy cert/template work being done. + <-envoyDone + + // Phase B: everything that needs X+dbus, started in a single supervisorctl + // invocation. On headful, mutter is the compositor and neko/api come up + // alongside chromium so their bring-up overlaps with chromium boot rather + // than trailing CDP. Headless has no compositor and no neko. + webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true" + var phaseB []string + if prof == profileHeadful { + phaseB = []string{"mutter", "chromium", "kernel-images-api"} + if webrtc { + phaseB = append(phaseB, "neko") + } + } else { + phaseB = []string{"chromium", "kernel-images-api"} + } + startAll(phaseB...) + + // Wait for the union of caller-visible ready signals. Each probe runs + // concurrently and logs as soon as its target is reachable. + waitAllReady(t0, webrtc) + logf("ready in %s", since(t0)) + + // Cosmetic + non-critical services come up off the hot path. Headless has + // no audio stack and no UI to dismiss. + if prof == profileHeadful { + go func() { + startAll("pulseaudio") + if os.Getenv("RUN_AS_ROOT") == "true" { + dismissNoSandboxWarning() + } + }() + } + + // Re-enable scale-to-zero now that the hot path is up. + enableScaleToZero() + + // Forward signals so cleanup runs and supervisord is taken down cleanly. + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT) + go func() { + <-sigs + logf("shutdown: stopping services") + _ = exec.Command("supervisorctl", "-c", supervisorConf, "stop", "all").Run() + _ = supCmd.Process.Signal(syscall.SIGTERM) + }() + + // Block on supervisord; container exits when it does. + if err := supCmd.Wait(); err != nil { + logf("supervisord exited: %v", err) + } +} + +// startAll asks supervisord to start the given programs. We invoke +// supervisorctl once (it accepts multiple args) so we don't pay python +// cold-start costs per service. +func startAll(progs ...string) { + if len(progs) == 0 { + return + } + args := append([]string{"-c", supervisorConf, "start"}, progs...) + cmd := exec.Command("supervisorctl", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + _ = cmd.Run() // a service that fails to come up will surface via readiness checks +} + +func waitForSocket(path string, timeout time.Duration) { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if fi, err := os.Stat(path); err == nil && fi.Mode()&os.ModeSocket != 0 { + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: socket %s not ready after %s", path, timeout) +} + +// waitAllReady gates on all caller-visible ready signals concurrently: +// - CDP : HTTP /json/version on the public CDP port (proves api proxy is wired +// through to chromium's DevTools server) +// - cd : TCP on chromedriver's internal port 9225 (api on 9224 is bound when +// api itself is up, which CDP readiness already implies) +// - proxy : TCP on chromium's --forward-proxy-port (8888) +// - neko : TCP on neko's HTTP port (8080), only when ENABLE_WEBRTC=true +// - envoy : TCP on envoy's listener (3128), only when envoy is enabled +func waitAllReady(t0 time.Time, webrtc bool) { + chromePort := os.Getenv("CHROME_PORT") + if chromePort == "" { + chromePort = "9222" + } + probes := []struct { + name string + fn func() bool + }{ + {"cdp", func() bool { return httpProbeOK("http://127.0.0.1:" + chromePort + "/json/version") }}, + {"chromedriver", func() bool { return tcpOK("127.0.0.1", "9225") }}, + {"forward-proxy", func() bool { return tcpOK("127.0.0.1", "8888") }}, + } + if webrtc { + probes = append(probes, struct { + name string + fn func() bool + }{"neko", func() bool { return tcpOK("127.0.0.1", "8080") }}) + } + if envoyEnabled() { + probes = append(probes, struct { + name string + fn func() bool + }{"envoy", func() bool { return tcpOK("127.0.0.1", "3128") }}) + } + + done := make(chan string, len(probes)) + for _, p := range probes { + go func(name string, fn func() bool) { + deadline := time.Now().Add(60 * time.Second) + for time.Now().Before(deadline) { + if fn() { + logf("[ready] %s in %s", name, since(t0)) + done <- name + return + } + time.Sleep(20 * time.Millisecond) + } + logf("[ready] WARNING: %s never became ready", name) + done <- name + }(p.name, p.fn) + } + for range probes { + <-done + } +} + +func tcpOK(host, port string) bool { + c, err := net.DialTimeout("tcp4", net.JoinHostPort(host, port), 200*time.Millisecond) + if err != nil { + return false + } + _ = c.Close() + return true +} + +var probeClient = &http.Client{Timeout: time.Second} + +func httpProbeOK(url string) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return false + } + resp, err := probeClient.Do(req) + if err != nil { + return false + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false + } + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return strings.Contains(string(body), `"Browser"`) +} + +// waitForX waits until the X server is reachable on display :N. We try both +// the named unix socket (Xorg, headful) and the abstract namespace socket +// (Xvfb runs with -nolisten unix, which disables the named socket but leaves +// the abstract one). Cheaper than spawning xdpyinfo in a loop. +func waitForX(display string, timeout time.Duration) { + num := strings.TrimPrefix(display, ":") + named := "/tmp/.X11-unix/X" + num + abstract := "@/tmp/.X11-unix/X" + num // Linux abstract namespace + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: X display %s not responsive after %s", display, timeout) +} + +func disableScaleToZero() { writeScaleToZero("+") } +func enableScaleToZero() { writeScaleToZero("-") } + +func writeScaleToZero(c string) { + if _, err := os.Stat(scaleToZeroFile); err != nil { + return // not running on Unikraft Cloud + } + _ = os.WriteFile(scaleToZeroFile, []byte(c), 0o644) +} + +// applyHeadlessDefaultFlags mirrors the legacy headless wrapper.sh: when +// CHROMIUM_FLAGS is unset, fill in a curated headless+stealth flag list. +// --disable-background-networking is intentionally omitted: it prevents +// Chrome from fetching ExtensionInstallForcelist managed extensions. +func applyHeadlessDefaultFlags() { + if strings.TrimSpace(os.Getenv("CHROMIUM_FLAGS")) != "" { + return + } + flags := strings.Join([]string{ + "--accept-lang=en-US,en", + "--allow-pre-commit-input", + "--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4", + "--crash-dumps-dir=/tmp/chromium-dumps", + "--disable-back-forward-cache", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-blink-features=AutomationControlled", + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-crash-reporter", + "--disable-crashpad", + "--disable-dev-shm-usage", + "--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate", + "--disable-field-trial-config", + "--disable-gcm-registration", + "--disable-gpu", + "--disable-gpu-compositing", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-notifications", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-search-engine-choice-screen", + "--disable-software-rasterizer", + "--enable-use-zoom-for-dsf=false", + "--export-tagged-pdf", + "--force-color-profile=srgb", + "--hide-crash-restore-bubble", + "--hide-scrollbars", + "--metrics-recording-only", + "--mute-audio", + "--no-default-browser-check", + "--no-first-run", + "--no-sandbox", + "--no-service-autorun", + "--ozone-platform=headless", + "--password-store=basic", + "--unsafely-disable-devtools-self-xss-warnings", + "--use-angle=swiftshader", + "--use-gl=angle", + "--use-mock-keychain", + }, " ") + _ = os.Setenv("CHROMIUM_FLAGS", flags) +} + +func profileName(p profile) string { + if p == profileHeadless { + return "headless" + } + return "headful" +} + +// envoyEnabled mirrors init-envoy.sh's gate: when any of these are unset +// the script exits early without starting envoy, so we should skip the +// readiness probe too (otherwise it would just time out at 60s). +func envoyEnabled() bool { + return os.Getenv("INST_NAME") != "" && + os.Getenv("METRO_NAME") != "" && + os.Getenv("XDS_SERVER") != "" && + os.Getenv("KERNEL_INSTANCE_JWT") != "" +} + +func prepareUserDirs(asRoot bool) { + if asRoot { + for _, d := range []string{"/tmp", "/var/log", supervisordLogD, "/home/kernel", "/home/kernel/user-data"} { + _ = os.MkdirAll(d, 0o755) + } + return + } + dirs := []string{ + "/home/kernel/user-data", + "/home/kernel/.config/chromium", + "/home/kernel/.pki/nssdb", + "/home/kernel/.cache/dconf", + "/tmp", + "/var/log", + supervisordLogD, + } + for _, d := range dirs { + _ = os.MkdirAll(d, 0o755) + } + _ = exec.Command("chown", "-R", "kernel:kernel", + "/home/kernel", "/home/kernel/user-data", "/home/kernel/.config", + "/home/kernel/.pki", "/home/kernel/.cache").Run() + _ = exec.Command("chown", "-R", "kernel:kernel", "/etc/chromium/policies").Run() +} + +// startLogAggregator tails any file under /var/log/supervisord, prefixing +// each line with the relative path so the container log stream remains +// readable. +func startLogAggregator() { + _ = os.MkdirAll(supervisordLogD, 0o755) + go func() { + seen := map[string]bool{} + for { + entries, _ := os.ReadDir(supervisordLogD) + for _, e := range entries { + path := filepath.Join(supervisordLogD, e.Name()) + if seen[path] { + continue + } + if fi, err := os.Stat(path); err == nil && fi.Mode().IsRegular() { + seen[path] = true + go tailFile(path) + } + } + time.Sleep(500 * time.Millisecond) + } + }() +} + +func tailFile(path string) { + cmd := exec.Command("tail", "-n", "+1", "-F", path) + stdout, err := cmd.StdoutPipe() + if err != nil { + return + } + cmd.Stderr = nil + if err := cmd.Start(); err != nil { + return + } + label := filepath.Base(path) + scanner := bufio.NewScanner(stdout) + scanner.Buffer(make([]byte, 64*1024), 1024*1024) + for scanner.Scan() { + fmt.Printf("[%s] %s\n", label, scanner.Text()) + } +} + +// dismissNoSandboxWarning replicates the wrapper.sh behaviour of clicking the +// "X" on the --no-sandbox infobar. Cosmetic; runs off the hot path. +func dismissNoSandboxWarning() { + out, err := exec.Command("xdotool", "getdisplaygeometry").Output() + if err != nil { + return + } + parts := strings.Fields(strings.TrimSpace(string(out))) + if len(parts) != 2 { + return + } + width := parts[0] + x := width + if w := atoi(width); w > 30 { + x = fmt.Sprintf("%d", w-30) + } + target := "New Tab - Chromium" + deadline := time.Now().Add(30 * time.Second) + for time.Now().Before(deadline) { + out, err := exec.Command("xdotool", "search", "--name", target).Output() + if err == nil && len(strings.TrimSpace(string(out))) > 0 { + id := strings.Fields(string(out))[0] + if exec.Command("xdotool", "windowactivate", "--sync", id).Run() == nil { + break + } + } + time.Sleep(100 * time.Millisecond) + } + port := os.Getenv("KERNEL_IMAGES_API_PORT") + if port == "" { + port = defaultAPIPort + } + body := fmt.Sprintf(`{"x":%s,"y":115}`, x) + _ = exec.Command("curl", "-s", "-o", "/dev/null", "-X", "POST", + "http://localhost:"+port+"/computer/click_mouse", + "-H", "Content-Type: application/json", + "-d", body).Run() +} + +func atoi(s string) int { + n := 0 + for _, c := range s { + if c < '0' || c > '9' { + return 0 + } + n = n*10 + int(c-'0') + } + return n +} + +func isExecutable(path string) bool { + fi, err := os.Stat(path) + return err == nil && fi.Mode().IsRegular() && fi.Mode().Perm()&0o111 != 0 +} + +func runStream(label, name string, args ...string) { + cmd := exec.Command(name, args...) + cmd.Stdout = prefixWriter{label: label, w: os.Stdout} + cmd.Stderr = prefixWriter{label: label, w: os.Stderr} + _ = cmd.Run() +} + +type prefixWriter struct { + label string + w *os.File +} + +func (p prefixWriter) Write(b []byte) (int, error) { + for _, line := range strings.Split(strings.TrimRight(string(b), "\n"), "\n") { + if line == "" { + continue + } + fmt.Fprintf(p.w, "[%s] %s\n", p.label, line) + } + return len(b), nil +} + +// timestamped wrapper log; prefix mirrors the bash script's [wrapper] tag. +func logf(format string, args ...any) { + fmt.Fprintf(os.Stdout, "[wrapper] "+format+"\n", args...) +} + +func since(t time.Time) time.Duration { + return time.Since(t).Truncate(time.Millisecond) +} + +func fatalf(format string, args ...any) { + logf(format, args...) + os.Exit(1) +} diff --git a/shared/envoy/init-envoy.sh b/shared/envoy/init-envoy.sh index c27a3f25..cedbdd2c 100644 --- a/shared/envoy/init-envoy.sh +++ b/shared/envoy/init-envoy.sh @@ -73,37 +73,7 @@ sed -e "s|{INST_NAME}|$inst_esc|g" \ echo "[envoy-init] Starting Envoy via supervisord" supervisorctl -c /etc/supervisor/supervisord.conf start envoy -# Wait for Envoy port to be open -echo "[envoy-init] Waiting for Envoy port to open..." -port_open=false -for i in {1..50}; do - if nc -z 127.0.0.1 "3128" 2>/dev/null; then - echo "[envoy-init] Envoy port confirmed open" - port_open=true - break - fi - sleep 0.2 -done - -if [[ "$port_open" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy port 3128 failed to open after 10 seconds" - exit 1 -fi - -# Test proxy functionality -echo "[envoy-init] Testing proxy functionality..." -proxy_working=false -for i in {1..50}; do - if curl -s -f -x https://127.0.0.1:3128 --max-time 2 https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html >/dev/null 2>&1; then - echo "[envoy-init] Confirmed a request is proxied" - proxy_working=true - break - fi - echo "[envoy-init] Check failed, trying again..." - sleep 0.2 -done - -if [[ "$proxy_working" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy proxy test failed after 10 seconds" - exit 1 -fi +# Readiness (port 3128 reachable) is now probed by the Go wrapper's +# waitAllReady alongside CDP/chromedriver, so this script returns as soon +# as the start request has been issued. Removing the in-script poll lets +# init-envoy.sh run concurrently with Phase A bring-up. diff --git a/shared/envoy/supervisor-envoy.conf b/shared/envoy/supervisor-envoy.conf index 4da59010..ae18726f 100644 --- a/shared/envoy/supervisor-envoy.conf +++ b/shared/envoy/supervisor-envoy.conf @@ -2,6 +2,6 @@ command=envoy -c /etc/envoy/bootstrap.yaml --log-level warn --drain-time-s 1 --drain-strategy immediate autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/envoy redirect_stderr=true