Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions livekit-agents/livekit/agents/tts/stream_pacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import time
from collections import deque
from dataclasses import dataclass

from .. import utils
Expand Down Expand Up @@ -53,7 +54,7 @@ def __init__(

self._closing = False
self._input_ended = False
self._sentences: list[str] = []
self._sentences: deque[str] = deque()
self._wakeup_event = asyncio.Event()
self._wakeup_timer: asyncio.TimerHandle | None = None

Expand Down Expand Up @@ -135,11 +136,14 @@ async def _send_task(self) -> None:
generation_stopped and remaining_audio <= self._options.min_remaining_audio
):
batch: list[str] = []
batch_text_len = 0
while self._sentences:
batch.append(self._sentences.pop(0))
sentence = self._sentences.popleft()
batch.append(sentence)
batch_text_len += len(sentence)
if (
first_sentence # send first sentence immediately
or sum(len(s) for s in batch) >= self._options.max_text_length
or batch_text_len >= self._options.max_text_length
):
break

Expand Down
12 changes: 4 additions & 8 deletions livekit-agents/livekit/agents/voice/audio_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,8 +805,7 @@ async def _commit_user_turn() -> None:

transcript = self._audio_transcript
self._audio_interim_transcript = ""
chat_ctx = self._hooks.retrieve_chat_ctx().copy()
self._run_eou_detection(chat_ctx, skip_reply=skip_reply)
self._run_eou_detection(self._hooks.retrieve_chat_ctx(), skip_reply=skip_reply)
self._user_turn_committed = True
if not fut.done():
fut.set_result(transcript)
Expand Down Expand Up @@ -949,8 +948,7 @@ async def _on_stt_event(self, ev: stt.SpeechEvent) -> None:
)

if not self._speaking:
chat_ctx = self._hooks.retrieve_chat_ctx().copy()
self._run_eou_detection(chat_ctx)
self._run_eou_detection(self._hooks.retrieve_chat_ctx())

elif ev.type == stt.SpeechEventType.PREFLIGHT_TRANSCRIPT:
self._hooks.on_interim_transcript(
Expand Down Expand Up @@ -1036,8 +1034,7 @@ async def _on_stt_event(self, ev: stt.SpeechEvent) -> None:
# vad disabled or missed a speech, use stt timestamp
self._last_speaking_time = stt_last_speaking_time

chat_ctx = self._hooks.retrieve_chat_ctx().copy()
self._run_eou_detection(chat_ctx)
self._run_eou_detection(self._hooks.retrieve_chat_ctx())

elif ev.type == stt.SpeechEventType.START_OF_SPEECH and self._turn_detection_mode == "stt":
# If the plugin provided a server onset timestamp, use it;
Expand Down Expand Up @@ -1093,8 +1090,7 @@ async def _on_vad_event(self, ev: vad.VADEvent) -> None:
if self._vad_base_turn_detection or (
self._turn_detection_mode == "stt" and self._user_turn_committed
):
chat_ctx = self._hooks.retrieve_chat_ctx().copy()
self._run_eou_detection(chat_ctx)
self._run_eou_detection(self._hooks.retrieve_chat_ctx())

if self._session.amd is not None:
self._session.amd._on_user_speech_ended(ev.silence_duration)
Expand Down
Loading