From 982fe638e5730e44544c596c88a015183c88b006 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Thu, 7 May 2026 21:31:36 +0000 Subject: [PATCH 1/6] cloudhypervisor: route serial through a hypeman-owned unix socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud Hypervisor's serial.mode=File opens the log file with plain O_WRONLY|O_CREAT and never reopens on signal. When the file is externally truncated (e.g. by rotateLogIfNeeded's copytruncate), CH's next write lands at its stale fd offset, leaving a sparse hole of NUL bytes from byte 0 to that offset. Downstream log readers chunk those NULs and choke (they JSON-encode at ~6x expansion, so a 64KiB chunk becomes a ~384KiB record and small batches blow past the receiver's 1MiB body limit). CH's ConsoleConfig has no append flag, so we can't ask CH to use O_APPEND directly. Switch to mode=Socket: hypeman binds a unix socket in the instance directory, CH connects to it as a client, and a small goroutine in the cloudhypervisor package copies bytes from the socket into the log file opened with O_APPEND. Because hypeman now owns the writer fd, copytruncate is safe — O_APPEND atomically seeks to EOF on every write, so post-truncate writes correctly resume at byte 0. Snapshot fork rewrites are updated to migrate legacy File-mode serial config to Socket on fork. Adds: - TestSerialReader_CopiesBytesToLog — basic byte path - TestSerialReader_NoSparseHoleAfterCopytruncate — regression - integration TestSystemdMode/SerialLogSurvivesCopytruncate — boots a real CH VM, copies+truncates app.log mid-run, asserts the post-rotation file is non-sparse and starts with non-NUL bytes --- integration/systemd_test.go | 62 ++++++++++ lib/hypervisor/cloudhypervisor/config.go | 26 ++++- .../cloudhypervisor/fork_snapshot.go | 7 +- .../cloudhypervisor/fork_snapshot_test.go | 7 +- lib/hypervisor/cloudhypervisor/process.go | 19 +++ lib/hypervisor/cloudhypervisor/serial.go | 102 ++++++++++++++++ lib/hypervisor/cloudhypervisor/serial_test.go | 110 ++++++++++++++++++ 7 files changed, 327 insertions(+), 6 deletions(-) create mode 100644 lib/hypervisor/cloudhypervisor/serial.go create mode 100644 lib/hypervisor/cloudhypervisor/serial_test.go diff --git a/integration/systemd_test.go b/integration/systemd_test.go index 4c552db0..22b8393b 100644 --- a/integration/systemd_test.go +++ b/integration/systemd_test.go @@ -3,8 +3,10 @@ package integration import ( "bytes" "context" + "io" "os" "strings" + "syscall" "testing" "time" @@ -162,6 +164,66 @@ func TestSystemdMode(t *testing.T) { t.Logf("Agent logs (last 5 lines):\n%s", output) }) + // Regression: serial log must survive copytruncate without leaving a + // sparse NUL hole. Pre-fix, Cloud Hypervisor held a non-O_APPEND fd on + // app.log, so post-truncate writes landed at the stale offset and the + // file became multi-GB sparse with NUL bytes from byte 0. After the + // fix, hypeman owns the writer fd with O_APPEND. + t.Run("SerialLogSurvivesCopytruncate", func(t *testing.T) { + if inst.HypervisorType != hypervisor.TypeCloudHypervisor { + t.Skipf("regression test is CH-specific; instance uses %s", inst.HypervisorType) + } + appLog := p.InstanceAppLog(inst.Id) + + require.Eventually(t, func() bool { + st, err := os.Stat(appLog) + return err == nil && st.Size() > 1024 + }, 30*time.Second, 200*time.Millisecond, "expected serial output to accumulate before rotation") + + src, err := os.Open(appLog) + require.NoError(t, err) + dst, err := os.Create(appLog + ".1") + require.NoError(t, err) + _, err = io.Copy(dst, src) + _ = src.Close() + _ = dst.Close() + require.NoError(t, err) + require.NoError(t, os.Truncate(appLog, 0)) + + // Drive more serial output post-truncate. + _, _, err = execInInstance(ctx, inst, "sh", "-c", "for i in 1 2 3; do echo post-rotate-marker-$i > /dev/kmsg; done") + require.NoError(t, err) + + require.Eventually(t, func() bool { + st, err := os.Stat(appLog) + return err == nil && st.Size() > 0 + }, 10*time.Second, 200*time.Millisecond) + + st, err := os.Stat(appLog) + require.NoError(t, err) + sys := st.Sys().(*syscall.Stat_t) + allocated := int64(sys.Blocks) * 512 + apparent := st.Size() + // Allow one block of slack since stat blocks granularity is 512B. + assert.LessOrEqualf(t, apparent, allocated+4096, + "post-rotation app.log is sparse: apparent=%d allocated=%d (sparse_bytes=%d)", + apparent, allocated, apparent-allocated) + + head := make([]byte, 64) + f, err := os.Open(appLog) + require.NoError(t, err) + n, _ := f.Read(head) + _ = f.Close() + nulCount := 0 + for _, b := range head[:n] { + if b == 0 { + nulCount++ + } + } + assert.Lessf(t, nulCount, n/2, + "post-rotation app.log starts with too many NULs (%d/%d) — likely a sparse hole", nulCount, n) + }) + t.Log("All systemd mode tests passed!") } diff --git a/lib/hypervisor/cloudhypervisor/config.go b/lib/hypervisor/cloudhypervisor/config.go index d728036b..7eb24727 100644 --- a/lib/hypervisor/cloudhypervisor/config.go +++ b/lib/hypervisor/cloudhypervisor/config.go @@ -1,10 +1,28 @@ package cloudhypervisor import ( + "path/filepath" + "github.com/kernel/hypeman/lib/hypervisor" "github.com/kernel/hypeman/lib/vmm" ) +// serialSocketPath returns the unix socket path that Cloud Hypervisor +// connects to for serial output. The socket lives next to the serial log +// file in the instance directory. +// +// We route serial through a hypeman-owned socket reader (see serial.go) +// rather than letting CH open the file directly, because CH's File-mode +// serial opens without O_APPEND. Combined with copytruncate-style log +// rotation that leaves CH's fd offset stale, the next write lands past +// EOF and creates a sparse hole of NUL bytes from byte 0 onward. +func serialSocketPath(logPath string) string { + if logPath == "" { + return "" + } + return filepath.Join(filepath.Dir(logPath), "serial.sock") +} + // ToVMConfig converts hypervisor.VMConfig to Cloud Hypervisor's vmm.VmConfig. func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig { // Payload configuration (kernel + initramfs) @@ -66,10 +84,12 @@ func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig { disks = append(disks, disk) } - // Serial console configuration + // Serial console configuration. We route serial through a unix socket + // that hypeman listens on (see startSerialReader) instead of letting + // CH write to the file directly — see serialSocketPath for rationale. serial := vmm.ConsoleConfig{ - Mode: vmm.ConsoleConfigMode("File"), - File: ptr(cfg.SerialLogPath), + Mode: vmm.ConsoleConfigModeSocket, + Socket: ptr(serialSocketPath(cfg.SerialLogPath)), } // Console off (we use serial) diff --git a/lib/hypervisor/cloudhypervisor/fork_snapshot.go b/lib/hypervisor/cloudhypervisor/fork_snapshot.go index 831c600e..b11e6e62 100644 --- a/lib/hypervisor/cloudhypervisor/fork_snapshot.go +++ b/lib/hypervisor/cloudhypervisor/fork_snapshot.go @@ -88,7 +88,12 @@ func updateSerialConfig(config map[string]any, logPath string) { if !ok || serial == nil { return } - serial["file"] = logPath + // Forks always use the socket-based serial reader (see config.go), so + // rewrite to the new shape regardless of the source snapshot's mode. + // This also migrates legacy File-mode snapshots to Socket on fork. + delete(serial, "file") + serial["mode"] = "Socket" + serial["socket"] = serialSocketPath(logPath) } func updateNetworkConfig(config map[string]any, netCfg *hypervisor.ForkNetworkConfig) { diff --git a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go index b25b1034..699f7e89 100644 --- a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go +++ b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go @@ -17,7 +17,7 @@ func TestRewriteSnapshotConfigForFork(t *testing.T) { orig := map[string]any{ "disks": []any{map[string]any{"path": "/src/guests/a/overlay.raw"}}, - "serial": map[string]any{"file": "/src/guests/a/logs/app.log"}, + "serial": map[string]any{"mode": "Socket", "socket": "/src/guests/a/logs/serial.sock"}, "vsock": map[string]any{"cid": float64(100), "socket": "/src/guests/a/vsock.sock"}, "metadata": map[string]any{ "note": "keep-/src/guests/a-as-substring", @@ -59,7 +59,10 @@ func TestRewriteSnapshotConfigForFork(t *testing.T) { assert.Equal(t, "/dst/guests/b/overlay.raw", disk0["path"]) serial := updated["serial"].(map[string]any) - assert.Equal(t, "/dst/guests/b/logs/app.log", serial["file"]) + assert.Equal(t, "Socket", serial["mode"]) + assert.Equal(t, "/dst/guests/b/logs/serial.sock", serial["socket"]) + _, hasFile := serial["file"] + assert.False(t, hasFile, "fork rewrite should drop legacy serial.file") vsock := updated["vsock"].(map[string]any) assert.Equal(t, float64(100), vsock["cid"]) diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go index 0618de43..98837dc9 100644 --- a/lib/hypervisor/cloudhypervisor/process.go +++ b/lib/hypervisor/cloudhypervisor/process.go @@ -69,17 +69,26 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s return 0, nil, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) } + // 0. Start the serial reader before CH so the unix socket is bound by + // the time CH boots and tries to connect. + sr, err := startSerialReader(ctx, serialSocketPath(config.SerialLogPath), config.SerialLogPath) + if err != nil { + return 0, nil, fmt.Errorf("start serial reader: %w", err) + } + // 1. Start the Cloud Hypervisor process processCtx, processSpan := hypervisor.StartProcessSpan(ctx, hypervisor.TypeCloudHypervisor) pid, err := vmm.StartProcess(processCtx, p, chVersion, socketPath) hypervisor.FinishTraceSpan(processSpan, err) if err != nil { + sr.Close() return 0, nil, fmt.Errorf("start process: %w", err) } // Setup cleanup to kill the process if subsequent steps fail cu := cleanup.Make(func() { syscall.Kill(pid, syscall.SIGKILL) + sr.Close() }) defer cu.Clean() @@ -129,12 +138,21 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, return 0, nil, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) } + // 0. Start the serial reader before CH. The serial log path lives at + // a fixed offset from the CH API socket within the instance directory. + logPath := filepath.Join(filepath.Dir(socketPath), "logs", "app.log") + sr, err := startSerialReader(ctx, serialSocketPath(logPath), logPath) + if err != nil { + return 0, nil, fmt.Errorf("start serial reader: %w", err) + } + // 1. Start the Cloud Hypervisor process processStartTime := time.Now() processCtx, processSpan := hypervisor.StartProcessSpan(ctx, hypervisor.TypeCloudHypervisor) pid, err := vmm.StartProcess(processCtx, p, chVersion, socketPath) hypervisor.FinishTraceSpan(processSpan, err) if err != nil { + sr.Close() return 0, nil, fmt.Errorf("start process: %w", err) } log.DebugContext(ctx, "CH process started", "pid", pid, "duration_ms", time.Since(processStartTime).Milliseconds()) @@ -142,6 +160,7 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, // Setup cleanup to kill the process if subsequent steps fail cu := cleanup.Make(func() { syscall.Kill(pid, syscall.SIGKILL) + sr.Close() }) defer cu.Clean() diff --git a/lib/hypervisor/cloudhypervisor/serial.go b/lib/hypervisor/cloudhypervisor/serial.go new file mode 100644 index 00000000..0e94c988 --- /dev/null +++ b/lib/hypervisor/cloudhypervisor/serial.go @@ -0,0 +1,102 @@ +package cloudhypervisor + +import ( + "context" + "errors" + "fmt" + "io" + "net" + "os" + "path/filepath" + "syscall" + + "github.com/kernel/hypeman/lib/logger" +) + +// serialReader owns the listener for the per-instance unix socket that +// Cloud Hypervisor connects to for serial output, and copies bytes from +// the connection into the serial log file with O_APPEND. Owning the +// writer fd lets copytruncate log rotation work safely: O_APPEND +// atomically seeks to EOF on every write, so writes after a truncate +// land at byte 0 instead of the writer's stale offset. +type serialReader struct { + ln net.Listener + socketPath string + logPath string + done chan struct{} +} + +// startSerialReader binds the unix socket at socketPath, removing any +// stale predecessor, and starts a goroutine that accepts CH's connection +// and pipes serial output into logPath. The listener is closed after the +// first accept (CH only ever opens one connection per VM). Callers must +// call Close on failure paths to release the listener if CH never +// connects. +func startSerialReader(ctx context.Context, socketPath, logPath string) (*serialReader, error) { + if socketPath == "" || logPath == "" { + return nil, errors.New("serial: socket and log path are required") + } + + if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil { + return nil, fmt.Errorf("serial: create log dir: %w", err) + } + + if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) { + return nil, fmt.Errorf("serial: remove stale socket: %w", err) + } + + ln, err := net.Listen("unix", socketPath) + if err != nil { + return nil, fmt.Errorf("serial: listen %s: %w", socketPath, err) + } + + sr := &serialReader{ + ln: ln, + socketPath: socketPath, + logPath: logPath, + done: make(chan struct{}), + } + go sr.run(ctx) + return sr, nil +} + +func (s *serialReader) run(ctx context.Context) { + defer close(s.done) + defer os.Remove(s.socketPath) + + conn, err := s.ln.Accept() + // CH only opens one connection per VM, so close the listener now. + _ = s.ln.Close() + if err != nil { + // Closed before CH connected (e.g. boot failed) — nothing to do. + return + } + defer conn.Close() + + f, err := os.OpenFile(s.logPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + logger.FromContext(ctx).ErrorContext(ctx, "serial: open log", + "path", s.logPath, "err", err) + return + } + defer f.Close() + + if _, err := io.Copy(f, conn); err != nil && + !errors.Is(err, io.EOF) && + !errors.Is(err, net.ErrClosed) && + !errors.Is(err, syscall.EPIPE) { + logger.FromContext(ctx).WarnContext(ctx, "serial: copy ended with error", + "path", s.logPath, "err", err) + } +} + +// Close releases the listener if it is still open. Safe to call multiple +// times. Once CH has connected and closed the socket the goroutine exits +// on its own; Close is a best-effort signal for the failure path where +// CH never boots. +func (s *serialReader) Close() { + if s == nil { + return + } + _ = s.ln.Close() +} diff --git a/lib/hypervisor/cloudhypervisor/serial_test.go b/lib/hypervisor/cloudhypervisor/serial_test.go new file mode 100644 index 00000000..c24b1cb6 --- /dev/null +++ b/lib/hypervisor/cloudhypervisor/serial_test.go @@ -0,0 +1,110 @@ +package cloudhypervisor + +import ( + "context" + "io" + "net" + "os" + "path/filepath" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSerialReader_CopiesBytesToLog(t *testing.T) { + tmp := t.TempDir() + logPath := filepath.Join(tmp, "logs", "app.log") + sockPath := serialSocketPath(logPath) + + sr, err := startSerialReader(context.Background(), sockPath, logPath) + require.NoError(t, err) + t.Cleanup(sr.Close) + + conn, err := net.Dial("unix", sockPath) + require.NoError(t, err) + + payload := []byte("hello from cloud hypervisor\n") + _, err = conn.Write(payload) + require.NoError(t, err) + require.NoError(t, conn.Close()) + + require.Eventually(t, func() bool { + data, err := os.ReadFile(logPath) + return err == nil && len(data) == len(payload) + }, 2*time.Second, 10*time.Millisecond, "expected reader to flush bytes to log") + + data, err := os.ReadFile(logPath) + require.NoError(t, err) + assert.Equal(t, payload, data) +} + +// TestSerialReader_NoSparseHoleAfterCopytruncate is the regression test +// for the bug that motivated this fix: copytruncate against a file whose +// writer holds a non-O_APPEND fd creates a sparse hole of NUL bytes from +// byte 0 to the writer's stale offset. Hypeman now owns the writer fd +// with O_APPEND, so post-truncate writes correctly resume at byte 0. +func TestSerialReader_NoSparseHoleAfterCopytruncate(t *testing.T) { + tmp := t.TempDir() + logPath := filepath.Join(tmp, "logs", "app.log") + sockPath := serialSocketPath(logPath) + + sr, err := startSerialReader(context.Background(), sockPath, logPath) + require.NoError(t, err) + t.Cleanup(sr.Close) + + conn, err := net.Dial("unix", sockPath) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + pre := []byte("pre-rotate line\n") + _, err = conn.Write(pre) + require.NoError(t, err) + + require.Eventually(t, func() bool { + st, err := os.Stat(logPath) + return err == nil && st.Size() == int64(len(pre)) + }, 2*time.Second, 10*time.Millisecond) + + // Mimic rotateLogIfNeeded: copy then truncate the file out from under + // the writer. + require.NoError(t, copyFile(logPath, logPath+".1")) + require.NoError(t, os.Truncate(logPath, 0)) + + post := []byte("post-rotate line\n") + _, err = conn.Write(post) + require.NoError(t, err) + + require.Eventually(t, func() bool { + st, err := os.Stat(logPath) + return err == nil && st.Size() == int64(len(post)) + }, 2*time.Second, 10*time.Millisecond) + + st, err := os.Stat(logPath) + require.NoError(t, err) + sys := st.Sys().(*syscall.Stat_t) + allocated := int64(sys.Blocks) * 512 + apparent := st.Size() + assert.LessOrEqual(t, apparent, allocated, "post-truncate file is sparse: apparent=%d allocated=%d", apparent, allocated) + + data, err := os.ReadFile(logPath) + require.NoError(t, err) + assert.Equal(t, post, data, "post-truncate writes should land at byte 0, not at the writer's stale offset") +} + +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + _, err = io.Copy(out, in) + return err +} From 13d48cb4b08a6ca553cc187ef1b267542794eb20 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Thu, 7 May 2026 21:57:14 +0000 Subject: [PATCH 2/6] cloudhypervisor: connect to CH-bound serial socket as a client Cloud Hypervisor's mode=Socket calls UnixListener::bind() inside vm.create. The previous implementation made hypeman bind the socket first, which caused EADDRINUSE on CH side. Flip the design: hypeman opens the log with O_APPEND up front, then dials the socket with retry once CH binds it during boot. Also use a short /tmp temp dir in unit tests so socket paths stay under the platform sun_path limit (104 bytes on macOS). --- lib/hypervisor/cloudhypervisor/serial.go | 121 ++++++++++++------ lib/hypervisor/cloudhypervisor/serial_test.go | 30 ++++- 2 files changed, 109 insertions(+), 42 deletions(-) diff --git a/lib/hypervisor/cloudhypervisor/serial.go b/lib/hypervisor/cloudhypervisor/serial.go index 0e94c988..3c533e1b 100644 --- a/lib/hypervisor/cloudhypervisor/serial.go +++ b/lib/hypervisor/cloudhypervisor/serial.go @@ -5,33 +5,43 @@ import ( "errors" "fmt" "io" + "log/slog" "net" "os" "path/filepath" + "sync" "syscall" + "time" "github.com/kernel/hypeman/lib/logger" ) -// serialReader owns the listener for the per-instance unix socket that -// Cloud Hypervisor connects to for serial output, and copies bytes from -// the connection into the serial log file with O_APPEND. Owning the -// writer fd lets copytruncate log rotation work safely: O_APPEND +// serialReader connects to the per-instance unix socket that Cloud +// Hypervisor binds for serial output (mode=Socket), and copies bytes +// from that connection into the serial log file with O_APPEND. Owning +// the writer fd lets copytruncate log rotation work safely: O_APPEND // atomically seeks to EOF on every write, so writes after a truncate // land at byte 0 instead of the writer's stale offset. +// +// CH is the server here — it calls UnixListener::bind(socket) during +// VM creation. Hypeman is the client and dials with retry, since the +// reader is started before CH has had a chance to bind. type serialReader struct { - ln net.Listener socketPath string logPath string - done chan struct{} + logFile *os.File + + cancel context.CancelFunc + done chan struct{} + + mu sync.Mutex + conn net.Conn } -// startSerialReader binds the unix socket at socketPath, removing any -// stale predecessor, and starts a goroutine that accepts CH's connection -// and pipes serial output into logPath. The listener is closed after the -// first accept (CH only ever opens one connection per VM). Callers must -// call Close on failure paths to release the listener if CH never -// connects. +// startSerialReader removes any stale socket left over from a prior +// run, opens the log file with O_APPEND, and spawns a goroutine that +// dials the socket once CH binds it and pipes serial output into the +// log. Callers must call Close on failure paths and on shutdown. func startSerialReader(ctx context.Context, socketPath, logPath string) (*serialReader, error) { if socketPath == "" || logPath == "" { return nil, errors.New("serial: socket and log path are required") @@ -41,62 +51,97 @@ func startSerialReader(ctx context.Context, socketPath, logPath string) (*serial return nil, fmt.Errorf("serial: create log dir: %w", err) } + // CH refuses to start if the socket path already exists (it calls + // bind() on it). Wipe any leftover from a prior run. if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) { return nil, fmt.Errorf("serial: remove stale socket: %w", err) } - ln, err := net.Listen("unix", socketPath) + f, err := os.OpenFile(logPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) if err != nil { - return nil, fmt.Errorf("serial: listen %s: %w", socketPath, err) + return nil, fmt.Errorf("serial: open log: %w", err) } + runCtx, cancel := context.WithCancel(context.Background()) sr := &serialReader{ - ln: ln, socketPath: socketPath, logPath: logPath, + logFile: f, + cancel: cancel, done: make(chan struct{}), } - go sr.run(ctx) + go sr.run(runCtx, logger.FromContext(ctx)) return sr, nil } -func (s *serialReader) run(ctx context.Context) { +func (s *serialReader) run(ctx context.Context, log *slog.Logger) { defer close(s.done) - defer os.Remove(s.socketPath) - - conn, err := s.ln.Accept() - // CH only opens one connection per VM, so close the listener now. - _ = s.ln.Close() - if err != nil { - // Closed before CH connected (e.g. boot failed) — nothing to do. - return - } - defer conn.Close() + defer s.logFile.Close() - f, err := os.OpenFile(s.logPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + conn, err := dialUnixWithRetry(ctx, s.socketPath, 60*time.Second) if err != nil { - logger.FromContext(ctx).ErrorContext(ctx, "serial: open log", - "path", s.logPath, "err", err) + // Caller closed us before CH bound the socket, or CH never + // did. Either way, nothing more to do. return } - defer f.Close() + s.mu.Lock() + s.conn = conn + s.mu.Unlock() - if _, err := io.Copy(f, conn); err != nil && + if _, err := io.Copy(s.logFile, conn); err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, net.ErrClosed) && !errors.Is(err, syscall.EPIPE) { - logger.FromContext(ctx).WarnContext(ctx, "serial: copy ended with error", + log.WarnContext(ctx, "serial: copy ended with error", "path", s.logPath, "err", err) } + _ = conn.Close() +} + +// dialUnixWithRetry polls for the CH listener to come up. The socket +// path is created by CH inside vm.create, which happens after the +// reader is started, so we must wait. Returns the first successful +// connection or the last dial error after timeout. +func dialUnixWithRetry(ctx context.Context, path string, timeout time.Duration) (net.Conn, error) { + deadline := time.Now().Add(timeout) + var lastErr error + for { + if err := ctx.Err(); err != nil { + if lastErr != nil { + return nil, lastErr + } + return nil, err + } + conn, err := net.Dial("unix", path) + if err == nil { + return conn, nil + } + lastErr = err + if time.Now().After(deadline) { + return nil, fmt.Errorf("serial: dial %s: %w", path, lastErr) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(50 * time.Millisecond): + } + } } -// Close releases the listener if it is still open. Safe to call multiple -// times. Once CH has connected and closed the socket the goroutine exits -// on its own; Close is a best-effort signal for the failure path where -// CH never boots. +// Close stops the reader. Safe to call multiple times. func (s *serialReader) Close() { if s == nil { return } - _ = s.ln.Close() + s.cancel() + s.mu.Lock() + if s.conn != nil { + _ = s.conn.Close() + } + s.mu.Unlock() + select { + case <-s.done: + case <-time.After(2 * time.Second): + } + _ = os.Remove(s.socketPath) } diff --git a/lib/hypervisor/cloudhypervisor/serial_test.go b/lib/hypervisor/cloudhypervisor/serial_test.go index c24b1cb6..466595c4 100644 --- a/lib/hypervisor/cloudhypervisor/serial_test.go +++ b/lib/hypervisor/cloudhypervisor/serial_test.go @@ -14,16 +14,34 @@ import ( "github.com/stretchr/testify/require" ) +// shortTempDir returns a temp dir with a short prefix so unix socket +// paths stay under the platform sun_path limit (108 bytes on Linux, +// 104 on macOS). t.TempDir() under /var/folders on macOS overflows. +func shortTempDir(t *testing.T) string { + t.Helper() + dir, err := os.MkdirTemp("/tmp", "ch") + require.NoError(t, err) + t.Cleanup(func() { _ = os.RemoveAll(dir) }) + return dir +} + func TestSerialReader_CopiesBytesToLog(t *testing.T) { - tmp := t.TempDir() + tmp := shortTempDir(t) logPath := filepath.Join(tmp, "logs", "app.log") sockPath := serialSocketPath(logPath) + // Reader starts first (mirrors production order: hypeman starts the + // reader, then CH binds the socket during vm.create). The reader + // dials with retry until the listener comes up. sr, err := startSerialReader(context.Background(), sockPath, logPath) require.NoError(t, err) t.Cleanup(sr.Close) - conn, err := net.Dial("unix", sockPath) + ln, err := net.Listen("unix", sockPath) + require.NoError(t, err) + t.Cleanup(func() { _ = ln.Close() }) + + conn, err := ln.Accept() require.NoError(t, err) payload := []byte("hello from cloud hypervisor\n") @@ -47,7 +65,7 @@ func TestSerialReader_CopiesBytesToLog(t *testing.T) { // byte 0 to the writer's stale offset. Hypeman now owns the writer fd // with O_APPEND, so post-truncate writes correctly resume at byte 0. func TestSerialReader_NoSparseHoleAfterCopytruncate(t *testing.T) { - tmp := t.TempDir() + tmp := shortTempDir(t) logPath := filepath.Join(tmp, "logs", "app.log") sockPath := serialSocketPath(logPath) @@ -55,7 +73,11 @@ func TestSerialReader_NoSparseHoleAfterCopytruncate(t *testing.T) { require.NoError(t, err) t.Cleanup(sr.Close) - conn, err := net.Dial("unix", sockPath) + ln, err := net.Listen("unix", sockPath) + require.NoError(t, err) + t.Cleanup(func() { _ = ln.Close() }) + + conn, err := ln.Accept() require.NoError(t, err) t.Cleanup(func() { _ = conn.Close() }) From 0646edcd9a02d01c3c0d804cde634db94d91e3ad Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Thu, 7 May 2026 22:03:07 +0000 Subject: [PATCH 3/6] cloudhypervisor: move serial socket up one level to fit sun_path Long test temp paths plus /logs/serial.sock pushed the unix socket path over Linux's 108-byte sun_path limit, so CH's UnixListener::bind returned EINVAL ("path must be shorter than SUN_LEN"). Place the socket next to ch.sock at the instance dir instead of under logs/. --- lib/hypervisor/cloudhypervisor/config.go | 8 +++++--- lib/hypervisor/cloudhypervisor/fork_snapshot_test.go | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/hypervisor/cloudhypervisor/config.go b/lib/hypervisor/cloudhypervisor/config.go index 7eb24727..0c5a0ae5 100644 --- a/lib/hypervisor/cloudhypervisor/config.go +++ b/lib/hypervisor/cloudhypervisor/config.go @@ -8,8 +8,10 @@ import ( ) // serialSocketPath returns the unix socket path that Cloud Hypervisor -// connects to for serial output. The socket lives next to the serial log -// file in the instance directory. +// binds for serial output. The socket lives at the instance directory +// level, next to ch.sock and vsock.sock — not under logs/ — so the +// total path stays under the 108-byte sun_path limit on Linux (104 on +// macOS) when long test temp prefixes are involved. // // We route serial through a hypeman-owned socket reader (see serial.go) // rather than letting CH open the file directly, because CH's File-mode @@ -20,7 +22,7 @@ func serialSocketPath(logPath string) string { if logPath == "" { return "" } - return filepath.Join(filepath.Dir(logPath), "serial.sock") + return filepath.Join(filepath.Dir(filepath.Dir(logPath)), "serial.sock") } // ToVMConfig converts hypervisor.VMConfig to Cloud Hypervisor's vmm.VmConfig. diff --git a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go index 699f7e89..75a0c406 100644 --- a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go +++ b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go @@ -17,7 +17,7 @@ func TestRewriteSnapshotConfigForFork(t *testing.T) { orig := map[string]any{ "disks": []any{map[string]any{"path": "/src/guests/a/overlay.raw"}}, - "serial": map[string]any{"mode": "Socket", "socket": "/src/guests/a/logs/serial.sock"}, + "serial": map[string]any{"mode": "Socket", "socket": "/src/guests/a/serial.sock"}, "vsock": map[string]any{"cid": float64(100), "socket": "/src/guests/a/vsock.sock"}, "metadata": map[string]any{ "note": "keep-/src/guests/a-as-substring", @@ -60,7 +60,7 @@ func TestRewriteSnapshotConfigForFork(t *testing.T) { serial := updated["serial"].(map[string]any) assert.Equal(t, "Socket", serial["mode"]) - assert.Equal(t, "/dst/guests/b/logs/serial.sock", serial["socket"]) + assert.Equal(t, "/dst/guests/b/serial.sock", serial["socket"]) _, hasFile := serial["file"] assert.False(t, hasFile, "fork rewrite should drop legacy serial.file") From 1f1155e80ef3b2871bce67349283f77594f5e785 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Thu, 7 May 2026 22:26:31 +0000 Subject: [PATCH 4/6] cloudhypervisor: rewrite serial config on restore as well Pre-fix snapshots embed serial.mode=File, so a plain restore on an upgraded hypeman would still write directly to app.log without O_APPEND and reproduce the copytruncate sparse-hole bug. Mirror the fork-time rewrite in RestoreVM so legacy snapshots are migrated to mode=Socket pointing at the per-instance reader. Idempotent for post-fix snapshots already on Socket. --- .../cloudhypervisor/fork_snapshot.go | 25 ++++++++ .../cloudhypervisor/fork_snapshot_test.go | 59 +++++++++++++++++++ lib/hypervisor/cloudhypervisor/process.go | 8 +++ 3 files changed, 92 insertions(+) diff --git a/lib/hypervisor/cloudhypervisor/fork_snapshot.go b/lib/hypervisor/cloudhypervisor/fork_snapshot.go index b11e6e62..7db73ced 100644 --- a/lib/hypervisor/cloudhypervisor/fork_snapshot.go +++ b/lib/hypervisor/cloudhypervisor/fork_snapshot.go @@ -96,6 +96,31 @@ func updateSerialConfig(config map[string]any, logPath string) { serial["socket"] = serialSocketPath(logPath) } +// rewriteSerialConfigForRestore migrates the on-disk snapshot config so +// CH binds the socket-mode reader on restore. Pre-fix snapshots embed +// serial.mode=File, which keeps the original copytruncate sparse-hole +// bug alive after restore. New snapshots are already mode=Socket, so +// this is a cheap no-op for them. +func rewriteSerialConfigForRestore(configPath, logPath string) error { + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("read snapshot config: %w", err) + } + var config map[string]any + if err := json.Unmarshal(data, &config); err != nil { + return fmt.Errorf("unmarshal snapshot config: %w", err) + } + updateSerialConfig(config, logPath) + updated, err := json.MarshalIndent(config, "", " ") + if err != nil { + return fmt.Errorf("marshal snapshot config: %w", err) + } + if err := os.WriteFile(configPath, updated, 0644); err != nil { + return fmt.Errorf("write snapshot config: %w", err) + } + return nil +} + func updateNetworkConfig(config map[string]any, netCfg *hypervisor.ForkNetworkConfig) { nets, ok := config["net"].([]any) if !ok { diff --git a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go index 75a0c406..4f024b99 100644 --- a/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go +++ b/lib/hypervisor/cloudhypervisor/fork_snapshot_test.go @@ -77,3 +77,62 @@ func TestRewriteSnapshotConfigForFork(t *testing.T) { metadata := updated["metadata"].(map[string]any) assert.Equal(t, "keep-/src/guests/a-as-substring", metadata["note"]) } + +func TestRewriteSerialConfigForRestore(t *testing.T) { + t.Run("FileToSocket", func(t *testing.T) { + path := writeSnapshotConfig(t, map[string]any{ + "serial": map[string]any{"mode": "File", "file": "/old/logs/app.log"}, + }) + require.NoError(t, rewriteSerialConfigForRestore(path, "/inst/logs/app.log")) + + serial := readSerialConfig(t, path) + assert.Equal(t, "Socket", serial["mode"]) + assert.Equal(t, "/inst/serial.sock", serial["socket"]) + _, hasFile := serial["file"] + assert.False(t, hasFile, "legacy file field must be removed") + }) + + t.Run("AlreadySocketIsIdempotent", func(t *testing.T) { + path := writeSnapshotConfig(t, map[string]any{ + "serial": map[string]any{"mode": "Socket", "socket": "/inst/serial.sock"}, + }) + require.NoError(t, rewriteSerialConfigForRestore(path, "/inst/logs/app.log")) + + serial := readSerialConfig(t, path) + assert.Equal(t, "Socket", serial["mode"]) + assert.Equal(t, "/inst/serial.sock", serial["socket"]) + }) + + t.Run("NoSerialBlockIsNoOp", func(t *testing.T) { + path := writeSnapshotConfig(t, map[string]any{ + "disks": []any{map[string]any{"path": "/x"}}, + }) + require.NoError(t, rewriteSerialConfigForRestore(path, "/inst/logs/app.log")) + + var cfg map[string]any + data, err := os.ReadFile(path) + require.NoError(t, err) + require.NoError(t, json.Unmarshal(data, &cfg)) + _, hasSerial := cfg["serial"] + assert.False(t, hasSerial, "should not synthesize a serial block") + }) +} + +func writeSnapshotConfig(t *testing.T, cfg map[string]any) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "config.json") + data, err := json.Marshal(cfg) + require.NoError(t, err) + require.NoError(t, os.WriteFile(path, data, 0644)) + return path +} + +func readSerialConfig(t *testing.T, path string) map[string]any { + t.Helper() + data, err := os.ReadFile(path) + require.NoError(t, err) + var cfg map[string]any + require.NoError(t, json.Unmarshal(data, &cfg)) + return cfg["serial"].(map[string]any) +} diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go index 98837dc9..89962fb3 100644 --- a/lib/hypervisor/cloudhypervisor/process.go +++ b/lib/hypervisor/cloudhypervisor/process.go @@ -146,6 +146,14 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, return 0, nil, fmt.Errorf("start serial reader: %w", err) } + // Migrate legacy serial.mode=File snapshots to Socket so CH binds the + // reader's socket on restore. New snapshots are already Socket; the + // rewrite is idempotent. + if err := rewriteSerialConfigForRestore(filepath.Join(snapshotPath, "config.json"), logPath); err != nil { + sr.Close() + return 0, nil, fmt.Errorf("rewrite snapshot serial config: %w", err) + } + // 1. Start the Cloud Hypervisor process processStartTime := time.Now() processCtx, processSpan := hypervisor.StartProcessSpan(ctx, hypervisor.TypeCloudHypervisor) From 2755e50656433890c40cb7085514625a80bb79eb Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Fri, 8 May 2026 00:06:05 +0000 Subject: [PATCH 5/6] cloudhypervisor: own the serial reader so Shutdown stops it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugbot flagged that on the success path of StartVM/RestoreVM the serialReader was constructed but never retained anywhere — so its Close() could not be called from Shutdown, and the unix socket file persisted on disk after the VM went away (CH unlinks on Drop, but nothing guarantees CH actually exits cleanly). Hand the reader to the CloudHypervisor client and call Close() in Shutdown. Also defer os.Remove(socketPath) inside the run goroutine so the socket is unlinked on goroutine exit even if Close() is never called (e.g. CH crash). Co-Authored-By: Claude Opus 4.7 --- lib/hypervisor/cloudhypervisor/cloudhypervisor.go | 10 +++++++++- lib/hypervisor/cloudhypervisor/process.go | 10 ++++++++-- lib/hypervisor/cloudhypervisor/serial.go | 5 +++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go index 95e0792a..d9c8c30c 100644 --- a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go +++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go @@ -15,6 +15,11 @@ import ( type CloudHypervisor struct { client *vmm.VMM socketPath string + // serial is set on the Starter path (StartVM/RestoreVM) so Shutdown + // can stop the reader explicitly. When the client is constructed via + // the reconnect factory (New), there is no reader to own — the + // goroutine from the original process exited with that process. + serial *serialReader } var balloonTargetCache hypervisor.BalloonTargetCache @@ -73,10 +78,13 @@ func (c *CloudHypervisor) DeleteVM(ctx context.Context) error { // Shutdown stops the VMM process gracefully. func (c *CloudHypervisor) Shutdown(ctx context.Context) error { resp, err := c.client.ShutdownVMMWithResponse(ctx) + // Stop the serial reader regardless of API outcome — once Shutdown + // has been requested the VM is going away and the reader has no + // further work. + c.serial.Close() if err != nil { return fmt.Errorf("shutdown vmm: %w", err) } - // ShutdownVMM may return various codes, 204 is success if resp.StatusCode() != 204 { return fmt.Errorf("shutdown vmm failed with status %d", resp.StatusCode()) } diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go index 89962fb3..bd609c4f 100644 --- a/lib/hypervisor/cloudhypervisor/process.go +++ b/lib/hypervisor/cloudhypervisor/process.go @@ -121,7 +121,10 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s return 0, nil, fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), string(bootResp.Body)) } - // Success - release cleanup to prevent killing the process + // Success - release cleanup to prevent killing the process. Hand + // ownership of the serial reader to the client so Shutdown can + // stop it. + hv.serial = sr cu.Release() return pid, hv, nil } @@ -194,7 +197,10 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, } log.DebugContext(ctx, "CH restore API complete", "duration_ms", time.Since(restoreAPIStart).Milliseconds()) - // Success - release cleanup to prevent killing the process + // Success - release cleanup to prevent killing the process. Hand + // ownership of the serial reader to the client so Shutdown can + // stop it. + hv.serial = sr cu.Release() log.DebugContext(ctx, "CH restore complete", "pid", pid, "total_duration_ms", time.Since(startTime).Milliseconds()) return pid, hv, nil diff --git a/lib/hypervisor/cloudhypervisor/serial.go b/lib/hypervisor/cloudhypervisor/serial.go index 3c533e1b..58e2f2fd 100644 --- a/lib/hypervisor/cloudhypervisor/serial.go +++ b/lib/hypervisor/cloudhypervisor/serial.go @@ -77,6 +77,11 @@ func startSerialReader(ctx context.Context, socketPath, logPath string) (*serial func (s *serialReader) run(ctx context.Context, log *slog.Logger) { defer close(s.done) defer s.logFile.Close() + // Drop the socket file when the goroutine exits so it doesn't persist + // on disk after the VM is gone, regardless of whether Close was + // called explicitly. CH normally unlinks the path itself on Drop, + // but this is belt-and-suspenders for the leak-on-success case. + defer func() { _ = os.Remove(s.socketPath) }() conn, err := dialUnixWithRetry(ctx, s.socketPath, 60*time.Second) if err != nil { From 02bb2de116b01e7aa3ed7f6102f9948d12fe46e8 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Fri, 8 May 2026 00:18:06 +0000 Subject: [PATCH 6/6] cloudhypervisor: close race between dial completion and Close() Two related fixes in serialReader: 1. Race in run/Close. Between dialUnixWithRetry returning a conn and the goroutine assigning it under s.mu, Close() could acquire the lock, see s.conn == nil, and time out without closing the dialed conn. The goroutine would then publish the conn and io.Copy would block forever on a connection nobody can reach. Fix: re-check ctx.Err() under the lock and close the conn ourselves if Close already fired. 2. Use net.Dialer.DialContext instead of net.Dial so a single dial attempt is interruptible. The retry loop already checks ctx between attempts, but a single Dial syscall is not. In practice ENOENT returns instantly, but DialContext is the correct form. Co-Authored-By: Claude Opus 4.7 --- lib/hypervisor/cloudhypervisor/serial.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/hypervisor/cloudhypervisor/serial.go b/lib/hypervisor/cloudhypervisor/serial.go index 58e2f2fd..9fa3c7e2 100644 --- a/lib/hypervisor/cloudhypervisor/serial.go +++ b/lib/hypervisor/cloudhypervisor/serial.go @@ -89,7 +89,16 @@ func (s *serialReader) run(ctx context.Context, log *slog.Logger) { // did. Either way, nothing more to do. return } + // Re-check ctx under the lock before publishing the conn. If Close + // fired between dial returning and us taking the lock, it would have + // observed s.conn == nil and given up — without this check we'd + // publish the conn afterward and io.Copy would block forever. s.mu.Lock() + if ctx.Err() != nil { + s.mu.Unlock() + _ = conn.Close() + return + } s.conn = conn s.mu.Unlock() @@ -109,6 +118,7 @@ func (s *serialReader) run(ctx context.Context, log *slog.Logger) { // connection or the last dial error after timeout. func dialUnixWithRetry(ctx context.Context, path string, timeout time.Duration) (net.Conn, error) { deadline := time.Now().Add(timeout) + var dialer net.Dialer var lastErr error for { if err := ctx.Err(); err != nil { @@ -117,7 +127,7 @@ func dialUnixWithRetry(ctx context.Context, path string, timeout time.Duration) } return nil, err } - conn, err := net.Dial("unix", path) + conn, err := dialer.DialContext(ctx, "unix", path) if err == nil { return conn, nil }