From bbc6f46590b7fcec2a634cdafccf78f4ae14228a Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 11 Jun 2026 10:33:41 +0200 Subject: [PATCH] fix for splitted utf-8 signs, based on genai solution --- src/llm/ovms_text_streamer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llm/ovms_text_streamer.cpp b/src/llm/ovms_text_streamer.cpp index 68c6c98dfa..bd65e9cef9 100644 --- a/src/llm/ovms_text_streamer.cpp +++ b/src/llm/ovms_text_streamer.cpp @@ -70,13 +70,15 @@ ov::genai::StreamingStatus OVMSTextStreamer::write(int64_t token) { // 2. Incomplete UTF-8: decoded length did not advance — last bytes are a // partial multibyte sequence. Mark this slot as -1 so the delay check // skips it (matching TextStreamer's own handling). - const size_t n = m_decoded_lengths.size(); - if (n >= 2 && m_decoded_lengths[n - 1] == m_decoded_lengths[n - 2]) { + const size_t text_size = text.size(); + char replacement[] = "\xef\xbf\xbd"; + if (text_size >= 3 && text.compare(text_size - 3, 3, replacement) == 0) { m_decoded_lengths.back() = -1; return ov::genai::StreamingStatus::RUNNING; } // 3. Delay buffer: need at least DELAY_N_TOKENS entries before flushing. + const size_t n = m_decoded_lengths.size(); if (n < DELAY_N_TOKENS) { return ov::genai::StreamingStatus::RUNNING; }