Skip to content

Commit 79e239a

Browse files
authored
fix: handle list format content from OpenAI-compatible APIs (#5128)
* fix: handle list format content from OpenAI-compatible APIs Some LLM providers (e.g., GLM-4.5V via SiliconFlow) return content as list[dict] format like [{'type': 'text', 'text': '...'}] instead of plain string. This causes the raw list representation to be displayed to users. Changes: - Add _normalize_content() helper to extract text from various content formats - Use json.loads instead of ast.literal_eval for safer parsing - Add size limit check (8KB) before attempting JSON parsing - Only convert lists that match OpenAI content-part schema (has 'type': 'text') to avoid collapsing legitimate list-literal replies like ['foo', 'bar'] - Add strip parameter to preserve whitespace in streaming chunks - Clean up orphan </think> tags that may leak from some models Fixes #5124 * fix: improve content normalization safety - Try json.loads first, fallback to ast.literal_eval for single-quoted Python literals to avoid corrupting apostrophes (e.g., "don't") - Coerce text values to str to handle null or non-string text fields
1 parent 8abaf10 commit 79e239a

1 file changed

Lines changed: 85 additions & 3 deletions

File tree

astrbot/core/provider/sources/openai_source.py

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,8 @@ async def _query_stream(
323323
llm_response.reasoning_content = reasoning
324324
_y = True
325325
if delta.content:
326-
completion_text = delta.content
326+
# Don't strip streaming chunks to preserve spaces between words
327+
completion_text = self._normalize_content(delta.content, strip=False)
327328
llm_response.result_chain = MessageChain(
328329
chain=[Comp.Plain(completion_text)],
329330
)
@@ -371,6 +372,86 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage:
371372
output=completion_tokens,
372373
)
373374

375+
@staticmethod
376+
def _normalize_content(raw_content: Any, strip: bool = True) -> str:
377+
"""Normalize content from various formats to plain string.
378+
379+
Some LLM providers return content as list[dict] format
380+
like [{'type': 'text', 'text': '...'}] instead of
381+
plain string. This method handles both formats.
382+
383+
Args:
384+
raw_content: The raw content from LLM response, can be str, list, or other.
385+
strip: Whether to strip whitespace from the result. Set to False for
386+
streaming chunks to preserve spaces between words.
387+
388+
Returns:
389+
Normalized plain text string.
390+
"""
391+
if isinstance(raw_content, list):
392+
# Check if this looks like OpenAI content-part format
393+
# Only process if at least one item has {'type': 'text', 'text': ...} structure
394+
has_content_part = any(
395+
isinstance(part, dict) and part.get("type") == "text"
396+
for part in raw_content
397+
)
398+
if has_content_part:
399+
text_parts = []
400+
for part in raw_content:
401+
if isinstance(part, dict) and part.get("type") == "text":
402+
text_val = part.get("text", "")
403+
# Coerce to str in case text is null or non-string
404+
text_parts.append(str(text_val) if text_val is not None else "")
405+
return "".join(text_parts)
406+
# Not content-part format, return string representation
407+
return str(raw_content)
408+
409+
if isinstance(raw_content, str):
410+
content = raw_content.strip() if strip else raw_content
411+
# Check if the string is a JSON-encoded list (e.g., "[{'type': 'text', ...}]")
412+
# This can happen when streaming concatenates content that was originally list format
413+
# Only check if it looks like a complete JSON array (requires strip for check)
414+
check_content = raw_content.strip()
415+
if (
416+
check_content.startswith("[")
417+
and check_content.endswith("]")
418+
and len(check_content) < 8192
419+
):
420+
try:
421+
# First try standard JSON parsing
422+
parsed = json.loads(check_content)
423+
except json.JSONDecodeError:
424+
# If that fails, try parsing as Python literal (handles single quotes)
425+
# This is safer than blind replace("'", '"') which corrupts apostrophes
426+
try:
427+
import ast
428+
429+
parsed = ast.literal_eval(check_content)
430+
except (ValueError, SyntaxError):
431+
parsed = None
432+
433+
if isinstance(parsed, list):
434+
# Only convert if it matches OpenAI content-part schema
435+
# i.e., at least one item has {'type': 'text', 'text': ...}
436+
has_content_part = any(
437+
isinstance(part, dict) and part.get("type") == "text"
438+
for part in parsed
439+
)
440+
if has_content_part:
441+
text_parts = []
442+
for part in parsed:
443+
if isinstance(part, dict) and part.get("type") == "text":
444+
text_val = part.get("text", "")
445+
# Coerce to str in case text is null or non-string
446+
text_parts.append(
447+
str(text_val) if text_val is not None else ""
448+
)
449+
if text_parts:
450+
return "".join(text_parts)
451+
return content
452+
453+
return str(raw_content)
454+
374455
async def _parse_openai_completion(
375456
self, completion: ChatCompletion, tools: ToolSet | None
376457
) -> LLMResponse:
@@ -383,8 +464,7 @@ async def _parse_openai_completion(
383464

384465
# parse the text completion
385466
if choice.message.content is not None:
386-
# text completion
387-
completion_text = str(choice.message.content).strip()
467+
completion_text = self._normalize_content(choice.message.content)
388468
# specially, some providers may set <think> tags around reasoning content in the completion text,
389469
# we use regex to remove them, and store then in reasoning_content field
390470
reasoning_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
@@ -394,6 +474,8 @@ async def _parse_openai_completion(
394474
[match.strip() for match in matches],
395475
)
396476
completion_text = reasoning_pattern.sub("", completion_text).strip()
477+
# Also clean up orphan </think> tags that may leak from some models
478+
completion_text = re.sub(r"</think>\s*$", "", completion_text).strip()
397479
llm_response.result_chain = MessageChain().message(completion_text)
398480

399481
# parse the reasoning content if any

0 commit comments

Comments
 (0)