From fe7f854d6d7d6f959c41b6752277478b72cd2cbd Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Mon, 23 Feb 2026 21:19:09 +0100 Subject: [PATCH 1/2] Add integration test for deferred vsock connection stability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercises the dialAgent() → gRPC RPC path with deliberate delays between creating the connection and making the first RPC call. This reproduces a crash where NIO hits a precondition failure (EBADF) in fcntl(F_SETNOSIGPIPE) because the VZVirtioSocketConnection was closed before the gRPC client created the NIO channel. --- Sources/Integration/ContainerTests.swift | 64 ++++++++++++++++++++++++ Sources/Integration/Suite.swift | 1 + 2 files changed, 65 insertions(+) diff --git a/Sources/Integration/ContainerTests.swift b/Sources/Integration/ContainerTests.swift index cf6af0dd..36a41494 100644 --- a/Sources/Integration/ContainerTests.swift +++ b/Sources/Integration/ContainerTests.swift @@ -3969,6 +3969,70 @@ extension IntegrationSuite { } } + /// Exercises the dialAgent() → gRPC RPC path that previously crashed with + /// EBADF when the VZVirtioSocketConnection was closed before the gRPC + /// client made its first call. + /// + /// Each exec() call creates a new vsock connection via dialAgent(). The + /// gRPC ClientConnection defers NIO channel creation until the first RPC + /// (createProcess). A delay between exec() and start() widens the window + /// where the fd must remain valid — if the VZVirtioSocketConnection is + /// closed prematurely, the fd may be invalidated by the time NIO tries + /// fcntl(F_SETNOSIGPIPE), causing a precondition failure. + func testExecDeferredConnectionStability() async throws { + let id = "test-exec-deferred-connection-stability" + + let bs = try await bootstrap(id) + let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in + config.process.arguments = ["/bin/sleep", "1000"] + config.bootLog = bs.bootLog + } + + do { + try await container.create() + try await container.start() + + // Run multiple sequential exec calls with delays between creating the + // gRPC connection (exec) and making the first RPC (start). This is the + // pattern that triggered the EBADF crash: the fd was dup'd, the + // VZVirtioSocketConnection was closed, and by the time NIO tried to + // create the channel the fd was invalid. + for i in 0..<10 { + let buffer = BufferWriter() + let exec = try await container.exec("deferred-\(i)") { config in + config.arguments = ["/bin/echo", "exec-\(i)"] + config.stdout = buffer + } + + // Delay between exec() (which calls dialAgent/creates gRPC connection) + // and start() (which triggers the first RPC/NIO channel creation). + try await Task.sleep(for: .milliseconds(100)) + + try await exec.start() + let status = try await exec.wait() + try await exec.delete() + + guard status.exitCode == 0 else { + throw IntegrationError.assert(msg: "exec deferred-\(i) status \(status) != 0") + } + + guard let output = String(data: buffer.data, encoding: .utf8) else { + throw IntegrationError.assert(msg: "failed to read output from deferred-\(i)") + } + guard output.trimmingCharacters(in: .whitespacesAndNewlines) == "exec-\(i)" else { + throw IntegrationError.assert(msg: "deferred-\(i) output mismatch: \(output)") + } + } + + try await container.kill(SIGKILL) + try await container.wait() + try await container.stop() + } catch { + try? await container.stop() + throw error + } + } + @available(macOS 26.0, *) func testNetworkingDisabled() async throws { let id = "test-networking-disabled" diff --git a/Sources/Integration/Suite.swift b/Sources/Integration/Suite.swift index 02f26354..8b07747d 100644 --- a/Sources/Integration/Suite.swift +++ b/Sources/Integration/Suite.swift @@ -369,6 +369,7 @@ struct IntegrationSuite: AsyncParsableCommand { Test("container useInit zombie reaping", testUseInitZombieReaping), Test("container useInit with terminal", testUseInitWithTerminal), Test("container useInit with stdin", testUseInitWithStdin), + Test("exec deferred connection stability", testExecDeferredConnectionStability), // Pods Test("pod single container", testPodSingleContainer), From 8c047dcf1eb127329a42135a78bf81bc464edfcf Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Mon, 23 Feb 2026 21:19:24 +0100 Subject: [PATCH 2/2] Fix EBADF crash by keeping VZVirtioSocketConnection alive When dialAgent() creates a gRPC connection via vsock, it dup's the file descriptor and immediately closes the VZVirtioSocketConnection. The Virtualization framework tears down the vsock endpoint when the connection is closed, which can invalidate the dup'd descriptor. Since gRPC defers NIO channel creation until the first RPC, the fd may be invalid by then, causing a precondition failure in NIO's fcntl(F_SETNOSIGPIPE). The fix introduces VsockTransport, a Sendable wrapper that retains the VZVirtioSocketConnection until Vminitd.close() explicitly shuts it down after the gRPC channel. A new dupFileDescriptor() method dups without closing, and dialAgent() passes the connection as transport. --- .../VZVirtualMachine+Helpers.swift | 26 ++- .../VZVirtualMachineInstance.swift | 11 +- Sources/Containerization/Vminitd.swift | 17 ++ Sources/Containerization/VsockListener.swift | 7 + Sources/Containerization/VsockTransport.swift | 55 ++++++ Sources/Integration/ContainerTests.swift | 6 + .../VsockTransportTests.swift | 156 ++++++++++++++++++ 7 files changed, 269 insertions(+), 9 deletions(-) create mode 100644 Sources/Containerization/VsockTransport.swift create mode 100644 Tests/ContainerizationTests/VsockTransportTests.swift diff --git a/Sources/Containerization/VZVirtualMachine+Helpers.swift b/Sources/Containerization/VZVirtualMachine+Helpers.swift index 2cbadb1c..816dad49 100644 --- a/Sources/Containerization/VZVirtualMachine+Helpers.swift +++ b/Sources/Containerization/VZVirtualMachine+Helpers.swift @@ -122,13 +122,15 @@ extension VZVirtualMachine { } extension VZVirtualMachine { - func waitForAgent(queue: DispatchQueue) async throws -> FileHandle { + func waitForAgent(queue: DispatchQueue) async throws -> (FileHandle, VsockTransport) { let agentConnectionRetryCount: Int = 200 let agentConnectionSleepDuration: Duration = .milliseconds(20) for _ in 0...agentConnectionRetryCount { do { - return try await self.connect(queue: queue, port: Vminitd.port).dupHandle() + let conn = try await self.connect(queue: queue, port: Vminitd.port) + let handle = try conn.dupFileDescriptor() + return (handle, VsockTransport(conn)) } catch { try await Task.sleep(for: agentConnectionSleepDuration) continue @@ -139,6 +141,12 @@ extension VZVirtualMachine { } extension VZVirtioSocketConnection { + /// Duplicates the file descriptor and immediately closes the connection. + /// + /// Only safe when the returned fd is used synchronously before any + /// suspension point. For deferred use (e.g., gRPC/NIO), use + /// ``dupFileDescriptor()`` and keep the connection alive via + /// ``VsockTransport``. func dupHandle() throws -> FileHandle { let fd = dup(self.fileDescriptor) if fd == -1 { @@ -147,6 +155,20 @@ extension VZVirtioSocketConnection { self.close() return FileHandle(fileDescriptor: fd, closeOnDealloc: false) } + + /// Duplicates the connection's file descriptor without closing the connection. + /// + /// The caller must keep the `VZVirtioSocketConnection` alive until the dup'd + /// descriptor is no longer needed. The Virtualization framework tears down the + /// vsock endpoint when the connection is closed, which invalidates dup'd + /// descriptors. + func dupFileDescriptor() throws -> FileHandle { + let fd = dup(self.fileDescriptor) + if fd == -1 { + throw POSIXError.fromErrno() + } + return FileHandle(fileDescriptor: fd, closeOnDealloc: false) + } } #endif diff --git a/Sources/Containerization/VZVirtualMachineInstance.swift b/Sources/Containerization/VZVirtualMachineInstance.swift index 9e429c5a..fe6d93b5 100644 --- a/Sources/Containerization/VZVirtualMachineInstance.swift +++ b/Sources/Containerization/VZVirtualMachineInstance.swift @@ -125,10 +125,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance { try await self.vm.start(queue: self.queue) - let agent = Vminitd( - connection: try await self.vm.waitForAgent(queue: self.queue), - group: self.group - ) + let (handle, transport) = try await self.vm.waitForAgent(queue: self.queue) + let agent = Vminitd(connection: handle, transport: transport, group: self.group) do { if self.config.rosetta { @@ -189,9 +187,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance { queue: queue, port: Vminitd.port ) - let handle = try conn.dupHandle() - let agent = Vminitd(connection: handle, group: self.group) - return agent + let handle = try conn.dupFileDescriptor() + return Vminitd(connection: handle, transport: VsockTransport(conn), group: self.group) } catch { if let err = error as? ContainerizationError { throw err diff --git a/Sources/Containerization/Vminitd.swift b/Sources/Containerization/Vminitd.swift index 2a177d66..62eec79c 100644 --- a/Sources/Containerization/Vminitd.swift +++ b/Sources/Containerization/Vminitd.swift @@ -33,16 +33,33 @@ public struct Vminitd: Sendable { let client: Client + /// Retains the underlying vsock connection to keep the file descriptor + /// valid for the gRPC client's lifetime. The Virtualization framework + /// tears down the vsock endpoint when the connection is closed, which + /// invalidates dup'd descriptors. Must remain open until the gRPC + /// channel is shut down. + private let transport: VsockTransport? + public init(client: Client) { self.client = client + self.transport = nil } public init(connection: FileHandle, group: EventLoopGroup) { self.client = .init(connection: connection, group: group) + self.transport = nil + } + + init(connection: FileHandle, transport: VsockTransport, group: EventLoopGroup) { + self.client = .init(connection: connection, group: group) + self.transport = transport } /// Close the connection to the guest agent. public func close() async throws { + // Shut down the gRPC channel first (NIO closes the dup'd fd), + // then close the vsock endpoint so the guest sees EOF immediately. + defer { transport?.close() } try await client.close() } } diff --git a/Sources/Containerization/VsockListener.swift b/Sources/Containerization/VsockListener.swift index 7a7b36fa..a7b44eb6 100644 --- a/Sources/Containerization/VsockListener.swift +++ b/Sources/Containerization/VsockListener.swift @@ -52,6 +52,13 @@ public final class VsockListener: NSObject, Sendable, AsyncSequence { #if os(macOS) extension VsockListener: VZVirtioSocketListenerDelegate { + /// Accepts a new vsock connection by dup'ing its fd and closing the original. + /// + /// The dup'd fd is yielded into the `AsyncStream` for immediate consumption. + /// Callers must use the `FileHandle` before any suspension point — the + /// Virtualization framework tears down the vsock endpoint when the connection + /// is closed, which can invalidate dup'd descriptors if the underlying kernel + /// object is reclaimed. For deferred use (e.g., gRPC/NIO), see `VsockTransport`. public func listener( _: VZVirtioSocketListener, shouldAcceptNewConnection conn: VZVirtioSocketConnection, from _: VZVirtioSocketDevice diff --git a/Sources/Containerization/VsockTransport.swift b/Sources/Containerization/VsockTransport.swift new file mode 100644 index 00000000..78520485 --- /dev/null +++ b/Sources/Containerization/VsockTransport.swift @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025-2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +#if os(macOS) +import Foundation +import Virtualization + +/// Manages the lifecycle of a VZVirtioSocketConnection for use as a gRPC transport. +/// +/// When a vsock connection's file descriptor is dup'd and handed to gRPC/NIO, +/// the original VZVirtioSocketConnection must remain open. The Virtualization +/// framework tears down the host-to-guest vsock mapping when the connection is +/// closed, which invalidates dup'd descriptors. This wrapper keeps the +/// connection alive and provides explicit close semantics. +/// +/// Uses `@unchecked Sendable` because VZVirtioSocketConnection is not Sendable, +/// which also prevents using Mutex (its init requires a `sending` parameter that +/// conflicts with the non-Sendable connection at async call sites). +final class VsockTransport: @unchecked Sendable { + private var connection: VZVirtioSocketConnection? + private let lock = NSLock() + + init(_ connection: VZVirtioSocketConnection) { + self.connection = connection + } + + /// Closes the underlying vsock connection, tearing down the host-side endpoint. + func close() { + lock.lock() + defer { lock.unlock() } + connection?.close() + connection = nil + } + + deinit { + // No lock needed: deinit runs only after all strong references are + // released, so no concurrent close() call is possible. + connection?.close() + } +} + +#endif diff --git a/Sources/Integration/ContainerTests.swift b/Sources/Integration/ContainerTests.swift index 36a41494..4882b861 100644 --- a/Sources/Integration/ContainerTests.swift +++ b/Sources/Integration/ContainerTests.swift @@ -3979,6 +3979,12 @@ extension IntegrationSuite { /// where the fd must remain valid — if the VZVirtioSocketConnection is /// closed prematurely, the fd may be invalidated by the time NIO tries /// fcntl(F_SETNOSIGPIPE), causing a precondition failure. + /// + /// The same VsockTransport fix also applies to the waitForAgent() startup + /// path (where the first RPC is setTime via TimeSyncer). That path is + /// implicitly exercised by every integration test that boots a container, + /// but isn't stress-tested with an artificial delay here because the timing + /// depends on VM boot and Rosetta setup, which aren't controllable. func testExecDeferredConnectionStability() async throws { let id = "test-exec-deferred-connection-stability" diff --git a/Tests/ContainerizationTests/VsockTransportTests.swift b/Tests/ContainerizationTests/VsockTransportTests.swift new file mode 100644 index 00000000..7f2ffaef --- /dev/null +++ b/Tests/ContainerizationTests/VsockTransportTests.swift @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025-2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +#if os(macOS) + +import Darwin +import Foundation +import Testing + +@testable import Containerization + +/// Tests for the VsockTransport fd lifecycle fix. +/// +/// The Virtualization framework tears down the vsock endpoint when a +/// VZVirtioSocketConnection is closed, invalidating dup'd descriptors. +/// The fix keeps the connection alive via VsockTransport until the gRPC +/// channel is shut down. +/// +/// These tests use Unix socket pairs to verify: +/// 1. A dup'd fd is fully functional when the original is kept alive. +/// 2. The specific fcntl call that triggers the NIO crash (F_SETNOSIGPIPE) +/// works on the dup'd fd. +/// 3. The correct teardown order (close dup'd fd first, then original) +/// preserves the connection for the peer until the original is closed. +@Suite("VsockTransport tests") +struct VsockTransportTests { + + /// Creates a connected Unix socket pair. Returns (fd0, fd1). + private func makeSocketPair() throws -> (Int32, Int32) { + var fds: [Int32] = [0, 0] + let result = socketpair(AF_UNIX, SOCK_STREAM, 0, &fds) + try #require(result == 0, "socketpair should succeed") + return (fds[0], fds[1]) + } + + // MARK: - fd lifecycle tests + + /// Verifies that F_SETNOSIGPIPE (the exact fcntl call where NIO crashes) + /// succeeds on a dup'd fd when the original is kept alive. + @Test func dupdDescriptorSupportsFcntlWhenOriginalAlive() throws { + let (fd0, fd1) = try makeSocketPair() + defer { + close(fd0) + close(fd1) + } + + let dupdFd = dup(fd0) + try #require(dupdFd != -1) + defer { close(dupdFd) } + + // This is the exact operation that triggers the NIO EBADF crash + // when the underlying vsock endpoint has been torn down. + let result = fcntl(dupdFd, F_SETNOSIGPIPE, 1) + #expect(result == 0, "F_SETNOSIGPIPE should succeed on dup'd fd when original is alive") + } + + /// Verifies that a dup'd fd can read data written by the peer when the + /// original fd is kept alive. + @Test func dupdDescriptorCanReadWhenOriginalAlive() throws { + let (fd0, fd1) = try makeSocketPair() + defer { + close(fd0) + close(fd1) + } + + let dupdFd = dup(fd0) + try #require(dupdFd != -1) + defer { close(dupdFd) } + + // Peer writes data. + let message: [UInt8] = [1, 2, 3] + let writeResult = message.withUnsafeBufferPointer { buf in + write(fd1, buf.baseAddress, buf.count) + } + try #require(writeResult == 3) + + // Dup'd fd can read because the original keeps the connection alive. + var readBuf = [UInt8](repeating: 0, count: 3) + let readResult = readBuf.withUnsafeMutableBufferPointer { buf in + read(dupdFd, buf.baseAddress, buf.count) + } + #expect(readResult == 3) + #expect(readBuf == [1, 2, 3]) + } + + /// Verifies the correct teardown order: closing the dup'd fd first (gRPC + /// channel shutdown) does not break the connection for the peer, because + /// the original fd (transport) is still alive. + @Test func peerCanWriteAfterDupdFdClosedWhileOriginalAlive() throws { + let (fd0, fd1) = try makeSocketPair() + defer { + close(fd0) + close(fd1) + } + + let dupdFd = dup(fd0) + try #require(dupdFd != -1) + + // Close the dup'd fd (simulates gRPC channel shutdown). + close(dupdFd) + + // The peer can still write because the original fd keeps the + // connection alive. This matters for orderly shutdown: the guest + // doesn't see an unexpected EOF while the host is still tearing + // down the gRPC channel. + let message: [UInt8] = [42] + let writeResult = message.withUnsafeBufferPointer { buf in + write(fd1, buf.baseAddress, buf.count) + } + #expect(writeResult == 1, "Peer can still write after dup'd fd is closed") + + // Read from the original to confirm data arrived. + var readBuf = [UInt8](repeating: 0, count: 1) + let readResult = readBuf.withUnsafeMutableBufferPointer { buf in + read(fd0, buf.baseAddress, buf.count) + } + #expect(readResult == 1) + #expect(readBuf == [42]) + } + + /// Verifies that after both the dup'd fd and the original are closed, + /// the peer sees EOF (read returns 0). + @Test func peerSeesEOFAfterBothDescriptorsClosed() throws { + let (fd0, fd1) = try makeSocketPair() + defer { close(fd1) } + + let dupdFd = dup(fd0) + try #require(dupdFd != -1) + + // Close dup'd fd first (gRPC shutdown), then original (transport.close()). + close(dupdFd) + close(fd0) + + // Peer should see EOF. + var readBuf = [UInt8](repeating: 0, count: 1) + let readResult = readBuf.withUnsafeMutableBufferPointer { buf in + read(fd1, buf.baseAddress, buf.count) + } + #expect(readResult == 0, "Peer should see EOF after both descriptors are closed") + } +} + +#endif