From 8e5223e7c7786b6a925a914d0cf9c6dc94a20e4a Mon Sep 17 00:00:00 2001
From: NayukiChiba <MekoNayuki@outlook.com>
Date: Sun, 15 Feb 2026 22:40:52 +0800
Subject: [PATCH 1/2] fix: handle list format content from OpenAI-compatible
 APIs

Some LLM providers (e.g., GLM-4.5V via SiliconFlow) return content as
list[dict] format like [{'type': 'text', 'text': '...'}] instead of
plain string. This causes the raw list representation to be displayed
to users.

Changes:
- Add _normalize_content() helper to extract text from various content formats
- Use json.loads instead of ast.literal_eval for safer parsing
- Add size limit check (8KB) before attempting JSON parsing
- Only convert lists that match OpenAI content-part schema (has 'type': 'text')
  to avoid collapsing legitimate list-literal replies like ['foo', 'bar']
- Add strip parameter to preserve whitespace in streaming chunks
- Clean up orphan </think> tags that may leak from some models

Fixes #5124
---
 .../core/provider/sources/openai_source.py    | 77 ++++++++++++++++++-
 1 file changed, 74 insertions(+), 3 deletions(-)
diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
index 328da2573..45bd69217 100644
--- a/astrbot/core/provider/sources/openai_source.py
+++ b/astrbot/core/provider/sources/openai_source.py
@@ -323,7 +323,8 @@ async def _query_stream(
                 llm_response.reasoning_content = reasoning
                 _y = True
             if delta.content:
-                completion_text = delta.content
+                # Don't strip streaming chunks to preserve spaces between words
+                completion_text = self._normalize_content(delta.content, strip=False)
                 llm_response.result_chain = MessageChain(
                     chain=[Comp.Plain(completion_text)],
                 )
@@ -371,6 +372,75 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage:
             output=completion_tokens,
         )
 
+    @staticmethod
+    def _normalize_content(raw_content: Any, strip: bool = True) -> str:
+        """Normalize content from various formats to plain string.
+
+        Some LLM providers return content as list[dict] format
+        like [{'type': 'text', 'text': '...'}] instead of
+        plain string. This method handles both formats.
+
+        Args:
+            raw_content: The raw content from LLM response, can be str, list, or other.
+            strip: Whether to strip whitespace from the result. Set to False for
+                   streaming chunks to preserve spaces between words.
+
+        Returns:
+            Normalized plain text string.
+        """
+        if isinstance(raw_content, list):
+            # Check if this looks like OpenAI content-part format
+            # Only process if at least one item has {'type': 'text', 'text': ...} structure
+            has_content_part = any(
+                isinstance(part, dict) and part.get("type") == "text"
+                for part in raw_content
+            )
+            if has_content_part:
+                text_parts = []
+                for part in raw_content:
+                    if isinstance(part, dict) and part.get("type") == "text":
+                        text_parts.append(part.get("text", ""))
+                return "".join(text_parts)
+            # Not content-part format, return string representation
+            return str(raw_content)
+
+        if isinstance(raw_content, str):
+            content = raw_content.strip() if strip else raw_content
+            # Check if the string is a JSON-encoded list (e.g., "[{'type': 'text', ...}]")
+            # This can happen when streaming concatenates content that was originally list format
+            # Only check if it looks like a complete JSON array (requires strip for check)
+            check_content = raw_content.strip()
+            if (
+                check_content.startswith("[")
+                and check_content.endswith("]")
+                and len(check_content) < 8192
+            ):
+                try:
+                    parsed = json.loads(check_content.replace("'", '"'))
+                    if isinstance(parsed, list):
+                        # Only convert if it matches OpenAI content-part schema
+                        # i.e., at least one item has {'type': 'text', 'text': ...}
+                        has_content_part = any(
+                            isinstance(part, dict) and part.get("type") == "text"
+                            for part in parsed
+                        )
+                        if has_content_part:
+                            text_parts = []
+                            for part in parsed:
+                                if (
+                                    isinstance(part, dict)
+                                    and part.get("type") == "text"
+                                ):
+                                    text_parts.append(part.get("text", ""))
+                            if text_parts:
+                                return "".join(text_parts)
+                except (json.JSONDecodeError, TypeError):
+                    # Not a valid JSON, keep original string
+                    pass
+            return content
+
+        return str(raw_content)
+
     async def _parse_openai_completion(
         self, completion: ChatCompletion, tools: ToolSet | None
     ) -> LLMResponse:
@@ -383,8 +453,7 @@ async def _parse_openai_completion(
 
         # parse the text completion
         if choice.message.content is not None:
-            # text completion
-            completion_text = str(choice.message.content).strip()
+            completion_text = self._normalize_content(choice.message.content)
             # specially, some providers may set <think> tags around reasoning content in the completion text,
             # we use regex to remove them, and store then in reasoning_content field
             reasoning_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
@@ -394,6 +463,8 @@ async def _parse_openai_completion(
                     [match.strip() for match in matches],
                 )
                 completion_text = reasoning_pattern.sub("", completion_text).strip()
+            # Also clean up orphan </think> tags that may leak from some models
+            completion_text = re.sub(r"</think>\s*$", "", completion_text).strip()
             llm_response.result_chain = MessageChain().message(completion_text)
 
         # parse the reasoning content if any

From 8af47d0987c5eec12c064676c597cd76cccc2dd9 Mon Sep 17 00:00:00 2001
From: NayukiChiba <MekoNayuki@outlook.com>
Date: Sun, 15 Feb 2026 23:10:20 +0800
Subject: [PATCH 2/2] fix: improve content normalization safety

- Try json.loads first, fallback to ast.literal_eval for single-quoted
  Python literals to avoid corrupting apostrophes (e.g., "don't")
- Coerce text values to str to handle null or non-string text fields
---
 .../core/provider/sources/openai_source.py    | 55 +++++++++++--------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
index 45bd69217..5378385e5 100644
--- a/astrbot/core/provider/sources/openai_source.py
+++ b/astrbot/core/provider/sources/openai_source.py
@@ -399,7 +399,9 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str:
                 text_parts = []
                 for part in raw_content:
                     if isinstance(part, dict) and part.get("type") == "text":
-                        text_parts.append(part.get("text", ""))
+                        text_val = part.get("text", "")
+                        # Coerce to str in case text is null or non-string
+                        text_parts.append(str(text_val) if text_val is not None else "")
                 return "".join(text_parts)
             # Not content-part format, return string representation
             return str(raw_content)
@@ -416,27 +418,36 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str:
                 and len(check_content) < 8192
             ):
                 try:
-                    parsed = json.loads(check_content.replace("'", '"'))
-                    if isinstance(parsed, list):
-                        # Only convert if it matches OpenAI content-part schema
-                        # i.e., at least one item has {'type': 'text', 'text': ...}
-                        has_content_part = any(
-                            isinstance(part, dict) and part.get("type") == "text"
-                            for part in parsed
-                        )
-                        if has_content_part:
-                            text_parts = []
-                            for part in parsed:
-                                if (
-                                    isinstance(part, dict)
-                                    and part.get("type") == "text"
-                                ):
-                                    text_parts.append(part.get("text", ""))
-                            if text_parts:
-                                return "".join(text_parts)
-                except (json.JSONDecodeError, TypeError):
-                    # Not a valid JSON, keep original string
-                    pass
+                    # First try standard JSON parsing
+                    parsed = json.loads(check_content)
+                except json.JSONDecodeError:
+                    # If that fails, try parsing as Python literal (handles single quotes)
+                    # This is safer than blind replace("'", '"') which corrupts apostrophes
+                    try:
+                        import ast
+
+                        parsed = ast.literal_eval(check_content)
+                    except (ValueError, SyntaxError):
+                        parsed = None
+
+                if isinstance(parsed, list):
+                    # Only convert if it matches OpenAI content-part schema
+                    # i.e., at least one item has {'type': 'text', 'text': ...}
+                    has_content_part = any(
+                        isinstance(part, dict) and part.get("type") == "text"
+                        for part in parsed
+                    )
+                    if has_content_part:
+                        text_parts = []
+                        for part in parsed:
+                            if isinstance(part, dict) and part.get("type") == "text":
+                                text_val = part.get("text", "")
+                                # Coerce to str in case text is null or non-string
+                                text_parts.append(
+                                    str(text_val) if text_val is not None else ""
+                                )
+                        if text_parts:
+                            return "".join(text_parts)
             return content
 
         return str(raw_content)