fix: handle list format content from OpenAI-compatible APIs

NayukiChiba · NayukiChiba · commit dfea71833865 · 2026-02-15T22:49:08.000+08:00
Some LLM providers return content as list[dict] format like [{'type': 'text', 'text': '...'}] instead of plain string. This causes the raw list representation to be displayed to users. Changes: - Add _normalize_content() helper to extract text from various content formats - Use json.loads instead of ast.literal_eval for safer parsing - Add size limit check (8KB) before attempting JSON parsing - Clean up orphan </think> tags that may leak from some models Fixes #5124
diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
@@ -323,7 +323,7 @@ async def _query_stream(
                 llm_response.reasoning_content = reasoning
                 _y = True
             if delta.content:
-                completion_text = delta.content
+                completion_text = self._normalize_content(delta.content)
                 llm_response.result_chain = MessageChain(
                     chain=[Comp.Plain(completion_text)],
                 )
@@ -371,6 +371,57 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage:
             output=completion_tokens,
         )
 
+    @staticmethod
+    def _normalize_content(raw_content: Any) -> str:
+        """Normalize content from various formats to plain string.
+
+        Some LLM providers return content as list[dict] format
+        like [{'type': 'text', 'text': '...'}] instead of
+        plain string. This method handles both formats.
+
+        Args:
+            raw_content: The raw content from LLM response, can be str, list, or other.
+
+        Returns:
+            Normalized plain text string.
+        """
+        if isinstance(raw_content, list):
+            # Extract text from list of content parts
+            text_parts = []
+            for part in raw_content:
+                if isinstance(part, dict) and part.get("type") == "text":
+                    text_parts.append(part.get("text", ""))
+                elif isinstance(part, str):
+                    text_parts.append(part)
+            return "".join(text_parts)
+
+        if isinstance(raw_content, str):
+            content = raw_content.strip()
+            # Check if the string is a JSON-encoded list (e.g., "[{'type': 'text', ...}]")
+            # This can happen when streaming concatenates content that was originally list format
+            if (
+                content.startswith("[")
+                and content.endswith("]")
+                and len(content) < 8192
+            ):
+                try:
+                    parsed = json.loads(content.replace("'", '"'))
+                    if isinstance(parsed, list):
+                        text_parts = []
+                        for part in parsed:
+                            if isinstance(part, dict) and part.get("type") == "text":
+                                text_parts.append(part.get("text", ""))
+                            elif isinstance(part, str):
+                                text_parts.append(part)
+                        if text_parts:
+                            return "".join(text_parts)
+                except (json.JSONDecodeError, TypeError):
+                    # Not a valid JSON, keep original string
+                    pass
+            return content
+
+        return str(raw_content)
+
     async def _parse_openai_completion(
         self, completion: ChatCompletion, tools: ToolSet | None
     ) -> LLMResponse:
@@ -383,8 +434,7 @@ async def _parse_openai_completion(
 
         # parse the text completion
         if choice.message.content is not None:
-            # text completion
-            completion_text = str(choice.message.content).strip()
+            completion_text = self._normalize_content(choice.message.content)
             # specially, some providers may set <think> tags around reasoning content in the completion text,
             # we use regex to remove them, and store then in reasoning_content field
             reasoning_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
@@ -394,6 +444,8 @@ async def _parse_openai_completion(
                     [match.strip() for match in matches],
                 )
                 completion_text = reasoning_pattern.sub("", completion_text).strip()
+            # Also clean up orphan </think> tags that may leak from some models
+            completion_text = re.sub(r"</think>\s*$", "", completion_text).strip()
             llm_response.result_chain = MessageChain().message(completion_text)
 
         # parse the reasoning content if any