AstrBotDevs · xunxiing · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/astrbot/core/agent/context/compressor.py b/astrbot/core/agent/context/compressor.py
@@ -193,38 +193,80 @@ def should_compress(
         usage_rate = current_tokens / max_tokens
         return usage_rate > self.compression_threshold
 
+    def _supports_native_compact(self) -> bool:
+        support_native_compact = getattr(self.provider, "supports_native_compact", None)
+        if not callable(support_native_compact):
+            return False
+        try:
+            return bool(support_native_compact())
+        except Exception:
+            return False
+
+    async def _try_native_compact(
+        self,
+        system_messages: list[Message],
+        messages_to_summarize: list[Message],
+        recent_messages: list[Message],
+    ) -> list[Message] | None:
+        compact_context = getattr(self.provider, "compact_context", None)
+        if not callable(compact_context):
+            return None
+
+        try:
+            compacted_messages = await compact_context(messages_to_summarize)
+        except Exception as e:
+            logger.warning(
+                f"Native compact failed, fallback to summary compression: {e}"
+            )
+            return None
+
+        if not compacted_messages:
+            return None
+
+        result: list[Message] = []
+        result.extend(system_messages)
+        result.extend(compacted_messages)
+        result.extend(recent_messages)
+        return result
+
     async def __call__(self, messages: list[Message]) -> list[Message]:
         """Use LLM to generate a summary of the conversation history.
 
         Process:
         1. Divide messages: keep the system message and the latest N messages.
-        2. Send the old messages + the instruction message to the LLM.
-        3. Reconstruct the message list: [system message, summary message, latest messages].
+        2. Prefer native compact when provider supports it.
+        3. Fallback to LLM summary and reconstruct message list.
         """
         if len(messages) <= self.keep_recent + 1:
             return messages
 
         system_messages, messages_to_summarize, recent_messages = split_history(
             messages, self.keep_recent
         )
-
         if not messages_to_summarize:
             return messages
 
-        # build payload
+        native_compact_supported = self._supports_native_compact()
+
+        if native_compact_supported:
+            compacted = await self._try_native_compact(
+                system_messages,
+                messages_to_summarize,
+                recent_messages,
+            )
+            if compacted is not None:
+                return compacted
         instruction_message = Message(role="user", content=self.instruction_text)
         llm_payload = messages_to_summarize + [instruction_message]
 
-        # generate summary
         try:
             response = await self.provider.text_chat(contexts=llm_payload)
             summary_content = response.completion_text
         except Exception as e:
             logger.error(f"Failed to generate summary: {e}")
             return messages
 
-        # build result
-        result = []
+        result: list[Message] = []
         result.extend(system_messages)
 
         result.append(

diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py
@@ -659,24 +659,24 @@ async def _handle_function_tools(
                     ),
                 )
 
-        # yield the last tool call result
-        if tool_call_result_blocks:
-            last_tcr_content = str(tool_call_result_blocks[-1].content)
-            yield _HandleFunctionToolsResult.from_message_chain(
-                MessageChain(
-                    type="tool_call_result",
-                    chain=[
-                        Json(
-                            data={
-                                "id": func_tool_id,
-                                "ts": time.time(),
-                                "result": last_tcr_content,
-                            }
-                        )
-                    ],
+            # yield the tool call result
+            if tool_call_result_blocks:
+                last_tcr_content = str(tool_call_result_blocks[-1].content)
+                yield _HandleFunctionToolsResult.from_message_chain(
+                    MessageChain(
+                        type="tool_call_result",
+                        chain=[
+                            Json(
+                                data={
+                                    "id": func_tool_id,
+                                    "ts": time.time(),
+                                    "result": last_tcr_content,
+                                }
+                            )
+                        ],
+                    )
                 )
-            )
-            logger.info(f"Tool `{func_tool_name}` Result: {last_tcr_content}")
+                logger.info(f"Tool `{func_tool_name}` Result: {last_tcr_content}")
 
         # 处理函数调用响应
         if tool_call_result_blocks:

diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py
@@ -91,6 +91,8 @@ class MainAgentBuildConfig:
     """The number of most recent turns to keep during llm_compress strategy."""
     llm_compress_provider_id: str = ""
     """The provider ID for the LLM used in context compression."""
+    llm_compress_use_compact_api: bool = True
+    """Whether to prefer provider native compact API when available."""
     max_context_length: int = -1
     """The maximum number of turns to keep in context. -1 means no limit.
     This enforce max turns before compression"""
@@ -742,17 +744,22 @@ async def _handle_webchat(
     if not user_prompt or not chatui_session_id or not session or session.display_name:
         return
 
-    llm_resp = await prov.text_chat(
-        system_prompt=(
-            "You are a conversation title generator. "
-            "Generate a concise title in the same language as the user’s input, "
-            "no more than 10 words, capturing only the core topic."
-            "If the input is a greeting, small talk, or has no clear topic, "
-            "(e.g., “hi”, “hello”, “haha”), return <None>. "
-            "Output only the title itself or <None>, with no explanations."
-        ),
-        prompt=f"Generate a concise title for the following user query:\n{user_prompt}",
-    )
+    try:
+        llm_resp = await prov.text_chat(
+            system_prompt=(
+                "You are a conversation title generator. "
+                "Generate a concise title in the same language as the user's input, "
+                "no more than 10 words, capturing only the core topic."
+                "If the input is a greeting, small talk, or has no clear topic, "
+                '(e.g., "hi", "hello", "haha"), return <None>. '
+                "Output only the title itself or <None>, with no explanations."
+            ),
+            prompt=f"Generate a concise title for the following user query:\n{user_prompt}",
+        )
+    except Exception as e:
+        logger.warning("Failed to generate chatui title: %s", e)
+        return
+
     if llm_resp and llm_resp.completion_text:
         title = llm_resp.completion_text.strip()
         if not title or "<None>" in title:
@@ -807,26 +814,33 @@ def _proactive_cron_job_tools(req: ProviderRequest) -> None:
 
 
 def _get_compress_provider(
-    config: MainAgentBuildConfig, plugin_context: Context
+    config: MainAgentBuildConfig,
+    plugin_context: Context,
+    active_provider: Provider | None,
 ) -> Provider | None:
-    if not config.llm_compress_provider_id:
-        return None
     if config.context_limit_reached_strategy != "llm_compress":
         return None
-    provider = plugin_context.get_provider_by_id(config.llm_compress_provider_id)
-    if provider is None:
+
+    if not config.llm_compress_provider_id:
+        return None
+
+    selected_provider = plugin_context.get_provider_by_id(
+        config.llm_compress_provider_id
+    )
+    if selected_provider is None:
         logger.warning(
             "未找到指定的上下文压缩模型 %s，将跳过压缩。",
             config.llm_compress_provider_id,
         )
         return None
-    if not isinstance(provider, Provider):
+    if not isinstance(selected_provider, Provider):
         logger.warning(
             "指定的上下文压缩模型 %s 不是对话模型，将跳过压缩。",
             config.llm_compress_provider_id,
         )
         return None
-    return provider
+
+    return selected_provider
 
 
 async def build_main_agent(
@@ -970,7 +984,7 @@ async def build_main_agent(
         streaming=config.streaming_response,
         llm_compress_instruction=config.llm_compress_instruction,
         llm_compress_keep_recent=config.llm_compress_keep_recent,
-        llm_compress_provider=_get_compress_provider(config, plugin_context),
+        llm_compress_provider=_get_compress_provider(config, plugin_context, provider),
         truncate_turns=config.dequeue_context_length,
         enforce_max_turns=config.max_context_length,
         tool_schema_mode=config.tool_schema_mode,

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -94,6 +94,7 @@
         ),
         "llm_compress_keep_recent": 6,
         "llm_compress_provider_id": "",
+        "llm_compress_use_compact_api": True,
         "max_context_length": -1,
         "dequeue_context_length": 1,
         "streaming_response": False,
@@ -929,6 +930,19 @@ class ChatProviderTemplate(TypedDict):
                         "proxy": "",
                         "custom_headers": {},
                     },
+                    "OpenAI Responses": {
+                        "id": "openai_responses",
+                        "provider": "openai",
+                        "type": "openai_responses",
+                        "provider_type": "chat_completion",
+                        "enable": True,
+                        "key": [],
+                        "api_base": "https://api.openai.com/v1",
+                        "timeout": 120,
+                        "proxy": "",
+                        "custom_headers": {},
+                        "custom_extra_body": {},
+                    },
                     "Google Gemini": {
                         "id": "google_gemini",
                         "provider": "google",
@@ -2828,6 +2842,15 @@ class ChatProviderTemplate(TypedDict):
                             "provider_settings.agent_runner_type": "local",
                         },
                     },
+                    "provider_settings.llm_compress_use_compact_api": {
+                        "description": "Prefer compact API when available",
+                        "type": "bool",
+                        "hint": "When enabled, local runner first tries provider native compact API and falls back to LLM summary compression.",
+                        "condition": {
+                            "provider_settings.context_limit_reached_strategy": "llm_compress",
+                            "provider_settings.agent_runner_type": "local",
+                        },
+                    },
                 },
                 "condition": {
                     "provider_settings.agent_runner_type": "local",

diff --git a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py
@@ -79,6 +79,9 @@ async def initialize(self, ctx: PipelineContext) -> None:
         self.llm_compress_provider_id: str = settings.get(
             "llm_compress_provider_id", ""
         )
+        self.llm_compress_use_compact_api: bool = settings.get(
+            "llm_compress_use_compact_api", True
+        )
         self.max_context_length = settings["max_context_length"]  # int
         self.dequeue_context_length: int = min(
             max(1, settings["dequeue_context_length"]),
@@ -113,6 +116,7 @@ async def initialize(self, ctx: PipelineContext) -> None:
             llm_compress_instruction=self.llm_compress_instruction,
             llm_compress_keep_recent=self.llm_compress_keep_recent,
             llm_compress_provider_id=self.llm_compress_provider_id,
+            llm_compress_use_compact_api=self.llm_compress_use_compact_api,
             max_context_length=self.max_context_length,
             dequeue_context_length=self.dequeue_context_length,
             llm_safety_mode=self.llm_safety_mode,

diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py
@@ -9,6 +9,7 @@
 from anthropic.types import Message as AnthropicMessage
 from google.genai.types import GenerateContentResponse
 from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.responses.response import Response as OpenAIResponse
 
 import astrbot.core.message.components as Comp
 from astrbot import logger
@@ -276,7 +277,11 @@ class LLMResponse:
     """The signature of the reasoning content, if any."""
 
     raw_completion: (
-        ChatCompletion | GenerateContentResponse | AnthropicMessage | None
+        ChatCompletion
+        | GenerateContentResponse
+        | AnthropicMessage
+        | OpenAIResponse
+        | None
     ) = None
     """The raw completion response from the LLM provider."""
 
@@ -305,6 +310,7 @@ def __init__(
         raw_completion: ChatCompletion
         | GenerateContentResponse
         | AnthropicMessage
+        | OpenAIResponse
         | None = None,
         is_chunk: bool = False,
         id: str | None = None,

diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
@@ -291,6 +291,10 @@ def dynamic_import_provider(self, type: str) -> None:
                 from .sources.openai_source import (
                     ProviderOpenAIOfficial as ProviderOpenAIOfficial,
                 )
+            case "openai_responses":
+                from .sources.openai_responses_source import (
+                    ProviderOpenAIResponses as ProviderOpenAIResponses,
+                )
             case "zhipu_chat_completion":
                 from .sources.zhipu_source import ProviderZhipu as ProviderZhipu
             case "groq_chat_completion":