From 817b1a63c554ba34e7161b26bb26916ca5804b94 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Mon, 15 Jun 2026 16:04:58 +0200 Subject: [PATCH] Recreate audio source and republish track on sample-rate change Commit 1 restarts capture on a device change but the native source's rate stays fixed at construction, so a device whose rate differs from the original silently has every frame dropped. Add RtcAudioSource.Reconfigure(sampleRate, channels): it disposes the old native handle, rebuilds the source at the new format, and raises a new FormatChanged event. The native source stays alive via the track's reference until the track is dropped, so disposing the handle before the old track is unpublished is safe. The FFI exposes no in-place source reconfigure, so a fresh source (and re-bound track) is required. MicrophoneSource detects the format change via ResolveDeviceFormat in its config-changed handler and calls Reconfigure inside the restart while capture is paused (no AudioRead callbacks in flight). MeetManager subscribes to FormatChanged and republishes the local audio track against the source's new handle. Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/MicrophoneSource.cs | 29 +++++++---- Runtime/Scripts/RtcAudioSource.cs | 56 +++++++++++++++++++-- Samples~/Meet/Assets/Runtime/MeetManager.cs | 41 +++++++++++++++ 3 files changed, 112 insertions(+), 14 deletions(-) diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 6b30900c..25626da4 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -238,24 +238,27 @@ private void OnAudioConfigurationChanged(bool deviceWasChanged) if (!_started) return; - // The native source's rate is fixed at construction and RtcAudioSource drops frames - // whose rate doesn't match it. If the device change moved Unity's DSP output rate, - // restarting capture alone won't recover audio — warn so the silence is diagnosable. - // Full recovery (recreating the native source at the new rate) is handled separately. - var outputSampleRate = (uint)AudioSettings.outputSampleRate; - if (outputSampleRate != _expectedSampleRate) + // The native source rejects frames whose rate/channels don't match how it was + // created. If the device change moved Unity's output format, the source must be + // recreated at the new format (and its track re-bound) — otherwise restarting capture + // alone won't recover audio. RtcAudioSource.Reconfigure handles the recreation; we + // run it inside the restart while capture is paused. + var (newRate, newChannels) = ResolveDeviceFormat(); + bool formatChanged = newRate != _expectedSampleRate || newChannels != _expectedChannels; + + if (formatChanged) { - Utils.Warning($"MicrophoneSource: audio device change moved the DSP output rate to {outputSampleRate}Hz, but the native source is fixed at {_expectedSampleRate}Hz. Captured frames will be dropped until the track is recreated at the new rate."); + Utils.Debug($"MicrophoneSource: DSP format changed to {newRate}/{newChannels}, recreating native source and restarting capture"); + MonoBehaviourContext.RunCoroutine(RestartMicrophone(newRate, newChannels)); } - - if (deviceWasChanged) + else if (deviceWasChanged) { Utils.Debug("MicrophoneSource: audio device changed, restarting capture on the current default device"); MonoBehaviourContext.RunCoroutine(RestartMicrophone()); } } - private IEnumerator RestartMicrophone() + private IEnumerator RestartMicrophone(uint reconfigureRate = 0, uint reconfigureChannels = 0) { // The device-change event can fire several times around a single hardware swap; // ignore re-entrant restarts so overlapping Stop/Start coroutines don't race. @@ -265,6 +268,12 @@ private IEnumerator RestartMicrophone() yield return StopMicrophone(); + // With capture stopped (no AudioRead callbacks in flight), it's safe to recreate the + // native source at the new format. This raises FormatChanged so the owning track is + // re-bound to the new handle. + if (reconfigureRate > 0 && reconfigureChannels > 0) + Reconfigure(reconfigureRate, reconfigureChannels); + // Wait for iOS audio session to be ready before attempting to restart. // On iOS, after app resumes from background, the audio session needs time to // recover from interruption. Poll for readiness instead of using arbitrary delay. diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 9a80b99e..f9e22b98 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -49,12 +49,22 @@ private sealed class PendingAudioFrame private readonly RtcAudioSourceType _sourceType; public RtcAudioSourceType SourceType => _sourceType; private readonly int _debugId = Interlocked.Increment(ref nextDebugId); - internal readonly uint _expectedSampleRate; - internal readonly uint _expectedChannels; - internal readonly FfiHandle Handle; + // The format the native source is configured for. Mutable because Reconfigure() can + // recreate the source at a new format when the audio device's rate/channels change. + internal uint _expectedSampleRate; + internal uint _expectedChannels; + + internal FfiHandle Handle; protected AudioSourceInfo _info; + /// + /// Raised after the native audio source has been recreated at a new format (see + /// ). The source's changes, so any track + /// bound to the previous handle must be recreated against the new one. + /// + public event Action FormatChanged; + // CaptureAudioFrame is asynchronous: the native side can continue reading from the PCM // pointer after request.Send() returns and encode it later on another queue. Because of // that, a single reusable NativeArray is unsafe here; the next AudioRead callback can @@ -94,6 +104,14 @@ protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, ui (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat(); } + CreateNativeSource(); + } + + // Creates the native FFI audio source for the current _expectedSampleRate/_expectedChannels + // and stores its handle. Called once from the constructor and again from Reconfigure() when + // the format changes. + private void CreateNativeSource() + { using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; @@ -111,11 +129,41 @@ protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, ui Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); } + /// + /// Recreates the native audio source at a new format. The Rust FFI source does not + /// resample and rejects frames whose rate/channels differ from how it was created, so when + /// the capture device moves Unity's output format we must build a fresh source. + /// + /// + /// Must be called while capture is paused (no callbacks in flight), + /// because it disposes and replaces . Raises + /// on success so the owner can re-bind any track to the new handle. + /// + /// True if the source was recreated; false if the format was unchanged or invalid. + public bool Reconfigure(uint sampleRate, uint channels) + { + if (_disposed) return false; + if (sampleRate == 0 || channels == 0) return false; + if (sampleRate == _expectedSampleRate && channels == _expectedChannels) return false; + + Utils.Debug($"{DebugTag} reconfigure {_expectedSampleRate}/{_expectedChannels} -> {sampleRate}/{channels}"); + + // The native source stays alive as long as a track references it, so disposing our + // handle here is safe even before the old track is unpublished. + Handle?.Dispose(); + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + CreateNativeSource(); + + FormatChanged?.Invoke(); + return true; + } + // Reads Unity's actual output audio configuration. The capture path delivers buffers at the // DSP output rate/channel count (see AudioProbe), so this is the format the native source // must match. Falls back to the platform defaults when Unity cannot report a configuration // (e.g. batch mode without an audio device). - private (uint sampleRate, uint channels) ResolveDeviceFormat() + protected (uint sampleRate, uint channels) ResolveDeviceFormat() { var config = UnityEngine.AudioSettings.GetConfiguration(); var sampleRate = (uint)config.sampleRate; diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index c024b973..241e47aa 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -477,14 +477,53 @@ private IEnumerator PublishLocalMicrophone() _microphoneActive = true; _audioObjects[LocalAudioTrackName] = audioObject; _localRtcAudioSource = rtcSource; + // When the capture device changes to one with a different sample rate, the source + // recreates its native handle; re-bind the published track to the new handle. + rtcSource.FormatChanged += OnLocalMicrophoneFormatChanged; rtcSource.Start(); if (_participantTiles.TryGetValue(_localId, out var tile)) tile.SetMicMuted(false); } + // Raised (on the main thread) after the local microphone source recreated its native handle + // at a new format. The old track is bound to the now-disposed handle, so republish. + private void OnLocalMicrophoneFormatChanged() + { + StartCoroutine(RepublishLocalMicrophone()); + } + + private IEnumerator RepublishLocalMicrophone() + { + if (_localRtcAudioSource == null || _room == null) yield break; + + if (_localAudioTrack != null) + { + _room.LocalParticipant.UnpublishTrack(_localAudioTrack, false); + _localAudioTrack = null; + } + + _localAudioTrack = LocalAudioTrack.CreateAudioTrack(LocalAudioTrackName, _localRtcAudioSource, _room); + + var options = new TrackPublishOptions + { + AudioEncoding = new AudioEncoding { MaxBitrate = 64000 }, + Source = TrackSource.SourceMicrophone + }; + + var publish = _room.LocalParticipant.PublishTrack(_localAudioTrack, options); + yield return publish; + + if (publish.IsError) + Debug.LogError("Failed to republish local microphone after format change"); + else + Debug.Log("Republished local microphone track after audio format change"); + } + private void UnpublishLocalMicrophone() { + if (_localRtcAudioSource != null) + _localRtcAudioSource.FormatChanged -= OnLocalMicrophoneFormatChanged; DisposeSource(ref _localRtcAudioSource); if (_audioObjects.TryGetValue(LocalAudioTrackName, out var obj)) @@ -562,6 +601,8 @@ private static void DisposeSource(ref T source) where T : class, System.IDisp private void CleanUpAllTracks() { + if (_localRtcAudioSource != null) + _localRtcAudioSource.FormatChanged -= OnLocalMicrophoneFormatChanged; DisposeSource(ref _localRtcAudioSource); DisposeSource(ref _localRtcVideoSource);