From 9b7cbb5849878ca0a7e9787ea430f4b244cb3e30 Mon Sep 17 00:00:00 2001 From: eutopia <940758055@qq.com> Date: Wed, 27 May 2026 16:29:37 +0800 Subject: [PATCH] fix(llm): merge extra system messages for vllm compatibility --- backend/app/services/llm/caller.py | 21 ++++++++++++++-- backend/tests/test_finish_protocol.py | 35 +++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/backend/app/services/llm/caller.py b/backend/app/services/llm/caller.py index 898dc23bd..6c0925c91 100644 --- a/backend/app/services/llm/caller.py +++ b/backend/app/services/llm/caller.py @@ -452,9 +452,26 @@ async def call_llm( tools_for_llm = await get_agent_tools_for_llm(agent_id) if agent_id else AGENT_TOOLS allowed_tool_names = _allowed_tool_names(tools_for_llm) - # Convert messages to LLMMessage format - api_messages = [LLMMessage(role="system", content=static_prompt, dynamic_content=dynamic_prompt)] + # Convert messages to LLMMessage format. Some OpenAI-compatible local + # providers only allow one leading system message, so fold caller-supplied + # system instructions into the base agent context. + conversation_messages: list[dict] = [] + extra_system_parts: list[str] = [] for msg in messages: + if msg.get("role") == "system": + content = msg.get("content") + if isinstance(content, str): + extra_system_parts.append(content) + elif content: + extra_system_parts.append(json.dumps(content, ensure_ascii=False)) + continue + conversation_messages.append(msg) + if extra_system_parts: + extra_system = "\n\n".join(part for part in extra_system_parts if part) + dynamic_prompt = f"{dynamic_prompt}\n\n{extra_system}" if dynamic_prompt else extra_system + + api_messages = [LLMMessage(role="system", content=static_prompt, dynamic_content=dynamic_prompt)] + for msg in conversation_messages: api_messages.append(LLMMessage( role=msg.get("role", "user"), content=msg.get("content"), diff --git a/backend/tests/test_finish_protocol.py b/backend/tests/test_finish_protocol.py index e36b6dd0d..fe770dcdd 100644 --- a/backend/tests/test_finish_protocol.py +++ b/backend/tests/test_finish_protocol.py @@ -266,6 +266,41 @@ async def test_skip_tools_still_exposes_finish(monkeypatch): assert tool_names == ["finish"] +@pytest.mark.asyncio +async def test_call_llm_merges_extra_system_messages(monkeypatch): + from app.services.llm import caller + + fake_client = FakeStreamClient([_finish_response("Hello.")]) + + monkeypatch.setattr(caller, "_get_agent_config", lambda _agent_id: _async_return((1, None))) + monkeypatch.setattr(caller, "_get_user_name", lambda _user_id: _async_return("Ray")) + monkeypatch.setattr( + "app.services.agent_context.build_agent_context", + lambda *_args, **_kwargs: _async_return(("static", "dynamic")), + ) + monkeypatch.setattr(caller, "create_llm_client", lambda **_kwargs: fake_client) + monkeypatch.setattr(caller, "record_token_usage", lambda *_args, **_kwargs: _async_return(None)) + + result = await caller.call_llm( + _model(), + [ + {"role": "system", "content": "onboarding instructions"}, + {"role": "user", "content": "Please begin the onboarding."}, + ], + "Agent", + "", + agent_id=uuid.uuid4(), + user_id=uuid.uuid4(), + skip_tools=True, + ) + + assert result == "Hello." + first_round_messages = fake_client.messages_seen[0] + assert [msg.role for msg in first_round_messages] == ["system", "user"] + assert first_round_messages[0].content == "static" + assert first_round_messages[0].dynamic_content == "dynamic\n\nonboarding instructions" + + @pytest.mark.asyncio async def test_execute_tool_finish_is_noop_control_signal(monkeypatch): from app.services import agent_tools