Skip to content

Commit 2756bcc

Browse files
committed
feat: preserve volume when overriding speed in Prosody configuration
Signed-off-by: James Ding <jamesding365@gmail.com>
1 parent e31b4c9 commit 2756bcc

3 files changed

Lines changed: 32 additions & 10 deletions

File tree

src/fishaudio/resources/tts.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def convert(
147147
request.latency = latency
148148

149149
if speed is not None:
150-
request.prosody = Prosody(speed=speed)
150+
request.prosody = Prosody.from_speed_override(speed, base=config.prosody)
151151

152152
payload = request.model_dump(exclude_none=True)
153153

@@ -263,7 +263,9 @@ def text_generator():
263263
tts_request.latency = latency
264264

265265
if speed is not None:
266-
tts_request.prosody = Prosody(speed=speed)
266+
tts_request.prosody = Prosody.from_speed_override(
267+
speed, base=config.prosody
268+
)
267269

268270
executor = ThreadPoolExecutor(max_workers=max_workers)
269271

@@ -394,7 +396,7 @@ async def convert(
394396
request.latency = latency
395397

396398
if speed is not None:
397-
request.prosody = Prosody(speed=speed)
399+
request.prosody = Prosody.from_speed_override(speed, base=config.prosody)
398400

399401
payload = request.model_dump(exclude_none=True)
400402

@@ -508,7 +510,9 @@ async def text_generator():
508510
tts_request.latency = latency
509511

510512
if speed is not None:
511-
tts_request.prosody = Prosody(speed=speed)
513+
tts_request.prosody = Prosody.from_speed_override(
514+
speed, base=config.prosody
515+
)
512516

513517
ws: AsyncWebSocketSession
514518
async with aconnect_ws(

src/fishaudio/types/tts.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,24 @@ class Prosody(BaseModel):
2020
speed: float = 1.0
2121
volume: float = 0.0
2222

23+
@classmethod
24+
def from_speed_override(
25+
cls, speed: float, base: Optional["Prosody"] = None
26+
) -> "Prosody":
27+
"""
28+
Create Prosody with speed override, preserving volume from base.
29+
30+
Args:
31+
speed: Speed value to use
32+
base: Base prosody to preserve volume from (if any)
33+
34+
Returns:
35+
New Prosody instance with overridden speed
36+
"""
37+
if base:
38+
return cls(speed=speed, volume=base.volume)
39+
return cls(speed=speed)
40+
2341

2442
class TTSConfig(BaseModel):
2543
"""

tests/unit/test_tts.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -359,19 +359,19 @@ def test_convert_parameter_format_overrides_config(
359359
def test_convert_parameter_speed_overrides_config_prosody(
360360
self, tts_client, mock_client_wrapper
361361
):
362-
"""Test that parameter speed overrides config.prosody."""
362+
"""Test that parameter speed overrides config.prosody speed but preserves volume."""
363363
mock_response = Mock()
364364
mock_response.iter_bytes.return_value = iter([b"audio"])
365365
mock_client_wrapper.request.return_value = mock_response
366366

367367
config = TTSConfig(prosody=Prosody(speed=2.0, volume=0.5))
368368
list(tts_client.convert(text="Hello", speed=1.5, config=config))
369369

370-
# Verify parameter speed takes precedence
370+
# Verify parameter speed takes precedence but volume is preserved
371371
call_args = mock_client_wrapper.request.call_args
372372
payload = ormsgpack.unpackb(call_args[1]["content"])
373373
assert payload["prosody"]["speed"] == 1.5
374-
# Note: volume from config.prosody is lost when speed parameter is used
374+
assert payload["prosody"]["volume"] == 0.5 # Preserved from config!
375375

376376
def test_convert_combined_convenience_parameters(
377377
self, tts_client, mock_client_wrapper
@@ -721,7 +721,7 @@ async def async_iter_bytes():
721721
async def test_convert_parameter_speed_overrides_config_prosody(
722722
self, async_tts_client, async_mock_client_wrapper
723723
):
724-
"""Test that parameter speed overrides config.prosody (async)."""
724+
"""Test that parameter speed overrides config.prosody speed but preserves volume (async)."""
725725
mock_response = Mock()
726726

727727
async def async_iter_bytes():
@@ -737,11 +737,11 @@ async def async_iter_bytes():
737737
):
738738
audio_chunks.append(chunk)
739739

740-
# Verify parameter speed takes precedence
740+
# Verify parameter speed takes precedence but volume is preserved
741741
call_args = async_mock_client_wrapper.request.call_args
742742
payload = ormsgpack.unpackb(call_args[1]["content"])
743743
assert payload["prosody"]["speed"] == 1.5
744-
# Note: volume from config.prosody is lost when speed parameter is used
744+
assert payload["prosody"]["volume"] == 0.5 # Preserved from config!
745745

746746
@pytest.mark.asyncio
747747
async def test_convert_combined_convenience_parameters(

0 commit comments

Comments
 (0)