From 8e5223e7c7786b6a925a914d0cf9c6dc94a20e4a Mon Sep 17 00:00:00 2001 From: NayukiChiba Date: Sun, 15 Feb 2026 22:40:52 +0800 Subject: [PATCH 1/2] fix: handle list format content from OpenAI-compatible APIs Some LLM providers (e.g., GLM-4.5V via SiliconFlow) return content as list[dict] format like [{'type': 'text', 'text': '...'}] instead of plain string. This causes the raw list representation to be displayed to users. Changes: - Add _normalize_content() helper to extract text from various content formats - Use json.loads instead of ast.literal_eval for safer parsing - Add size limit check (8KB) before attempting JSON parsing - Only convert lists that match OpenAI content-part schema (has 'type': 'text') to avoid collapsing legitimate list-literal replies like ['foo', 'bar'] - Add strip parameter to preserve whitespace in streaming chunks - Clean up orphan tags that may leak from some models Fixes #5124 --- .../core/provider/sources/openai_source.py | 77 ++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 328da2573..45bd69217 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -323,7 +323,8 @@ async def _query_stream( llm_response.reasoning_content = reasoning _y = True if delta.content: - completion_text = delta.content + # Don't strip streaming chunks to preserve spaces between words + completion_text = self._normalize_content(delta.content, strip=False) llm_response.result_chain = MessageChain( chain=[Comp.Plain(completion_text)], ) @@ -371,6 +372,75 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage: output=completion_tokens, ) + @staticmethod + def _normalize_content(raw_content: Any, strip: bool = True) -> str: + """Normalize content from various formats to plain string. + + Some LLM providers return content as list[dict] format + like [{'type': 'text', 'text': '...'}] instead of + plain string. This method handles both formats. + + Args: + raw_content: The raw content from LLM response, can be str, list, or other. + strip: Whether to strip whitespace from the result. Set to False for + streaming chunks to preserve spaces between words. + + Returns: + Normalized plain text string. + """ + if isinstance(raw_content, list): + # Check if this looks like OpenAI content-part format + # Only process if at least one item has {'type': 'text', 'text': ...} structure + has_content_part = any( + isinstance(part, dict) and part.get("type") == "text" + for part in raw_content + ) + if has_content_part: + text_parts = [] + for part in raw_content: + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + return "".join(text_parts) + # Not content-part format, return string representation + return str(raw_content) + + if isinstance(raw_content, str): + content = raw_content.strip() if strip else raw_content + # Check if the string is a JSON-encoded list (e.g., "[{'type': 'text', ...}]") + # This can happen when streaming concatenates content that was originally list format + # Only check if it looks like a complete JSON array (requires strip for check) + check_content = raw_content.strip() + if ( + check_content.startswith("[") + and check_content.endswith("]") + and len(check_content) < 8192 + ): + try: + parsed = json.loads(check_content.replace("'", '"')) + if isinstance(parsed, list): + # Only convert if it matches OpenAI content-part schema + # i.e., at least one item has {'type': 'text', 'text': ...} + has_content_part = any( + isinstance(part, dict) and part.get("type") == "text" + for part in parsed + ) + if has_content_part: + text_parts = [] + for part in parsed: + if ( + isinstance(part, dict) + and part.get("type") == "text" + ): + text_parts.append(part.get("text", "")) + if text_parts: + return "".join(text_parts) + except (json.JSONDecodeError, TypeError): + # Not a valid JSON, keep original string + pass + return content + + return str(raw_content) + async def _parse_openai_completion( self, completion: ChatCompletion, tools: ToolSet | None ) -> LLMResponse: @@ -383,8 +453,7 @@ async def _parse_openai_completion( # parse the text completion if choice.message.content is not None: - # text completion - completion_text = str(choice.message.content).strip() + completion_text = self._normalize_content(choice.message.content) # specially, some providers may set tags around reasoning content in the completion text, # we use regex to remove them, and store then in reasoning_content field reasoning_pattern = re.compile(r"(.*?)", re.DOTALL) @@ -394,6 +463,8 @@ async def _parse_openai_completion( [match.strip() for match in matches], ) completion_text = reasoning_pattern.sub("", completion_text).strip() + # Also clean up orphan tags that may leak from some models + completion_text = re.sub(r"\s*$", "", completion_text).strip() llm_response.result_chain = MessageChain().message(completion_text) # parse the reasoning content if any From 8af47d0987c5eec12c064676c597cd76cccc2dd9 Mon Sep 17 00:00:00 2001 From: NayukiChiba Date: Sun, 15 Feb 2026 23:10:20 +0800 Subject: [PATCH 2/2] fix: improve content normalization safety - Try json.loads first, fallback to ast.literal_eval for single-quoted Python literals to avoid corrupting apostrophes (e.g., "don't") - Coerce text values to str to handle null or non-string text fields --- .../core/provider/sources/openai_source.py | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 45bd69217..5378385e5 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -399,7 +399,9 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str: text_parts = [] for part in raw_content: if isinstance(part, dict) and part.get("type") == "text": - text_parts.append(part.get("text", "")) + text_val = part.get("text", "") + # Coerce to str in case text is null or non-string + text_parts.append(str(text_val) if text_val is not None else "") return "".join(text_parts) # Not content-part format, return string representation return str(raw_content) @@ -416,27 +418,36 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str: and len(check_content) < 8192 ): try: - parsed = json.loads(check_content.replace("'", '"')) - if isinstance(parsed, list): - # Only convert if it matches OpenAI content-part schema - # i.e., at least one item has {'type': 'text', 'text': ...} - has_content_part = any( - isinstance(part, dict) and part.get("type") == "text" - for part in parsed - ) - if has_content_part: - text_parts = [] - for part in parsed: - if ( - isinstance(part, dict) - and part.get("type") == "text" - ): - text_parts.append(part.get("text", "")) - if text_parts: - return "".join(text_parts) - except (json.JSONDecodeError, TypeError): - # Not a valid JSON, keep original string - pass + # First try standard JSON parsing + parsed = json.loads(check_content) + except json.JSONDecodeError: + # If that fails, try parsing as Python literal (handles single quotes) + # This is safer than blind replace("'", '"') which corrupts apostrophes + try: + import ast + + parsed = ast.literal_eval(check_content) + except (ValueError, SyntaxError): + parsed = None + + if isinstance(parsed, list): + # Only convert if it matches OpenAI content-part schema + # i.e., at least one item has {'type': 'text', 'text': ...} + has_content_part = any( + isinstance(part, dict) and part.get("type") == "text" + for part in parsed + ) + if has_content_part: + text_parts = [] + for part in parsed: + if isinstance(part, dict) and part.get("type") == "text": + text_val = part.get("text", "") + # Coerce to str in case text is null or non-string + text_parts.append( + str(text_val) if text_val is not None else "" + ) + if text_parts: + return "".join(text_parts) return content return str(raw_content)