From 4fa84f90dc5358ff14a483143657c06461d76c27 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 7 May 2026 11:13:41 -0500 Subject: [PATCH] rutabaga_gfx/cross_domain: fix audio glitches by dropping write lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous write() held item_state.lock() across the write_volatile call that performs the actual syscall, serializing every cross-domain CMD_WRITE behind any other operation on the items table — including add_item from process_receive, which fires for every new fd received via SCM_RIGHTS as guests open additional channels. For per-period eventfd wakeups (e.g. PipeWire audio streams signaling host playback every audio period), this means the write completes only after any in-flight item_state operation finishes. Under stream-create churn — many guest applications opening streams concurrently, each delivering new fds via SCM_RIGHTS that hit add_item under the same lock — the wait can exceed the audio period budget and produce missed-deadline glitches at the host's audio output. This change shrinks the critical section to a brief fd dup() under the lock, performs the syscall lock-free, and only re-acquires the lock if hang_up indicates the item should be removed. In the common case (hang_up == 0, e.g. repeated eventfd wakeups for an active stream) the table is no longer touched per write, eliminating both the contention and the previous "remove + conditional re-insert" churn. Behavioral changes vs the old code: - Common case (hang_up == 0): item stays in the table; we hand out a dup'd fd for the write. Net behavior identical, lock hold time bounded by dup(). - hang_up == 1: item removed in a separate phase after the write, instead of "removed unconditionally then re-inserted on hang_up == 0". Same observed end state. - Concurrent writes to the same id (no longer serialized): the kernel guarantees atomicity for eventfd writes (8 bytes) and pipe writes <= PIPE_BUF, which are the only two CrossDomainItem variants this branch handles. Each caller dup's its own fd and writes through it independently. Verified with a synthetic reproducer: a sustained 1 kHz sine playing through a guest PipeWire stream while ~200 short-lived guest streams open and close concurrently (each issuing SCM_RIGHTS for new eventfds, contending with the sustained stream's per-period writes for item_state). Capturing the host sink monitor and counting sample-to-sample deltas exceeding a clean-sine threshold, the stress workload produced ~10 distinct glitch bursts in an 8-second capture before this change, and zero across five consecutive runs after. Signed-off-by: Adam Ford --- src/rutabaga_gfx/src/cross_domain/mod.rs | 77 +++++++++++------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/src/rutabaga_gfx/src/cross_domain/mod.rs b/src/rutabaga_gfx/src/cross_domain/mod.rs index 81d7b71c7..72ffd4742 100644 --- a/src/rutabaga_gfx/src/cross_domain/mod.rs +++ b/src/rutabaga_gfx/src/cross_domain/mod.rs @@ -965,56 +965,47 @@ impl CrossDomainContext { } fn write(&self, cmd_write: &CrossDomainReadWrite, opaque_data: &[u8]) -> RutabagaResult<()> { - let mut items = self.item_state.lock().unwrap(); - - // Most of the time, hang-up and writing will be paired. In lieu of this, remove the - // item rather than getting a reference. In case of an error, there's not much to do - // besides reporting it. - let item = items - .table - .remove(&cmd_write.identifier) - .ok_or(RutabagaError::InvalidCrossDomainItemId)?; - let len: usize = cmd_write.opaque_data_size.try_into()?; - match item { - CrossDomainItem::WaylandWritePipe(file) => { - if len != 0 { - write_volatile(&file, opaque_data)?; - } - if cmd_write.hang_up == 0 { - items.table.insert( - cmd_write.identifier, - CrossDomainItem::WaylandWritePipe(file), - ); + // Phase 1 (under lock): look up the item and dup() its fd. We release + // the lock before the actual write syscall so unrelated operations on + // item_state aren't serialized behind it. The previous design held + // the lock across write_volatile, which was the dominant contention + // source for per-period eventfd writes from active audio streams + // competing with concurrent stream-create churn (process_receive's + // add_item path also takes this lock). Cloning the fd is a cheap + // dup() syscall; the actual write — which can be longer for pipe + // writes, and rate-limited for eventfds — happens lock-free. + let fd = { + let items = self.item_state.lock().unwrap(); + let item = items + .table + .get(&cmd_write.identifier) + .ok_or(RutabagaError::InvalidCrossDomainItemId)?; + match item { + CrossDomainItem::WaylandWritePipe(file) | CrossDomainItem::Eventfd(file) => { + file.try_clone()? } - - Ok(()) + _ => return Err(RutabagaError::InvalidCrossDomainItemType), } - // Handle writes to Eventfd items. PipeWire (and other clients - // that pass eventfds via SCM_RIGHTS) uses these for per-period - // wakeups: an 8-byte write to the eventfd's counter signals - // the host. Without this arm, the first such write hits the - // catch-all below, returning InvalidCrossDomainItemType after - // the unconditional remove() above has already dropped the - // item — leaving every subsequent write to the same id - // failing with InvalidCrossDomainItemId. Mirrors the - // WaylandWritePipe re-insert semantics on hang_up == 0. - CrossDomainItem::Eventfd(file) => { - if len != 0 { - write_volatile(&file, opaque_data)?; - } + }; - if cmd_write.hang_up == 0 { - items - .table - .insert(cmd_write.identifier, CrossDomainItem::Eventfd(file)); - } + // Phase 2 (lock-free): do the actual write. + if len != 0 { + write_volatile(&fd, opaque_data)?; + } - Ok(()) - } - _ => Err(RutabagaError::InvalidCrossDomainItemType), + // Phase 3 (under lock): on hang_up, drop the item from the table. + // Without hang_up the item stays — common for Eventfd repeated wakeups + // and for any pipe being held open for further writes. This matches + // the previous "remove + conditional re-insert" behavior, but in the + // common (hang_up == 0) case avoids touching the table at all. + if cmd_write.hang_up != 0 { + let mut items = self.item_state.lock().unwrap(); + items.table.remove(&cmd_write.identifier); } + + Ok(()) } fn process_cmd_send(