-
Notifications
You must be signed in to change notification settings - Fork 116
fix(backends): raise error when OpenAI backend receives content=None #1062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1119,6 +1119,39 @@ async def post_processing( | |
| mot.generation.model = self._model_id | ||
| mot.generation.provider = "openai" | ||
|
|
||
| # content=None with stop+tokens means thinking-only mode; surface it rather than returning "". | ||
| finish_reason = choice_response.get("finish_reason") | ||
| completion_tokens = usage.get("completion_tokens", 0) if usage else 0 | ||
| if ( | ||
| not mot._underlying_value | ||
| and finish_reason == "stop" | ||
| and completion_tokens > 0 | ||
| and not mot.tool_calls | ||
| ): | ||
| thinking_note = ( | ||
| f" Reasoning content ({len(mot._thinking)} chars) is in mot._thinking." | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: the error message points users at |
||
| if mot._thinking | ||
| else "" | ||
| ) | ||
| err = RuntimeError( | ||
| "OpenAI backend received an empty response (content=None) with " | ||
| f"finish_reason=stop and completion_tokens={completion_tokens}. " | ||
| "This typically indicates a thinking-mode model (e.g. Qwen3 via vLLM " | ||
| "with --reasoning-parser) that emitted only reasoning tokens." | ||
| + thinking_note | ||
| + " For vLLM/Qwen3, disable thinking via model_options, e.g.: " | ||
| 'model_options={"extra_body": {"chat_template_kwargs": ' | ||
| '{"enable_thinking": False}}}.' | ||
| " For other providers, consult your runtime's documentation." | ||
| ) | ||
| span = mot._meta.pop("_telemetry_span", None) | ||
| if span is not None: | ||
| from ..telemetry import end_backend_span, set_span_error | ||
|
|
||
| set_span_error(span, err) | ||
| end_backend_span(span) | ||
| raise err | ||
|
|
||
| # Record telemetry now that response is available | ||
| span = mot._meta.get("_telemetry_span") | ||
| if span is not None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,16 @@ | ||
| """Unit tests for OpenAI backend pure-logic helpers — no API calls required. | ||
|
|
||
| Covers filter_openai_client_kwargs, filter_chat_completions_kwargs, | ||
| _simplify_and_merge, and _make_backend_specific_and_remove. | ||
| _simplify_and_merge, _make_backend_specific_and_remove, and post_processing | ||
| error detection for empty thinking-mode responses. | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from mellea.backends import ModelOption | ||
| from mellea.backends.openai import OpenAIBackend | ||
| from mellea.core import ModelOutputThunk | ||
| from mellea.stdlib.components import Message | ||
|
|
||
|
|
||
| def _make_backend(model_options: dict | None = None) -> OpenAIBackend: | ||
|
|
@@ -168,5 +171,124 @@ def test_make_backend_specific_unknown_mellea_keys_removed(backend): | |
| assert ModelOption.SYSTEM_PROMPT not in result | ||
|
|
||
|
|
||
| # --- post_processing: empty thinking-mode response detection --- | ||
|
|
||
|
|
||
| def _build_mot_for_empty_content_check( | ||
| finish_reason: str = "stop", | ||
| content: str | None = None, | ||
| completion_tokens: int = 9, | ||
| tool_calls: list[dict] | None = None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optional: the helper hand-assembles |
||
| ) -> ModelOutputThunk: | ||
| """Construct a ModelOutputThunk in the state post_processing expects after processing().""" | ||
| mot = ModelOutputThunk(value=None) | ||
| mot._action = Message("user", "What is 2 + 2?") | ||
| mot._model_options = {} | ||
| mot._underlying_value = content if content is not None else "" | ||
| choice = { | ||
| "finish_reason": finish_reason, | ||
| "index": 0, | ||
| "message": {"content": content, "role": "assistant", "tool_calls": tool_calls}, | ||
| } | ||
| full_response = { | ||
| "id": "chatcmpl-test", | ||
| "object": "chat.completion", | ||
| "choices": [choice], | ||
| "usage": { | ||
| "prompt_tokens": 10, | ||
| "completion_tokens": completion_tokens, | ||
| "total_tokens": 10 + completion_tokens, | ||
| }, | ||
| } | ||
| mot._meta["oai_chat_response"] = full_response | ||
| mot._meta["oai_chat_response_choice"] = choice | ||
| return mot | ||
|
|
||
|
|
||
| async def test_post_processing_raises_on_empty_content_with_tokens(backend): | ||
| """Thinking model with content=None, finish_reason=stop, non-zero tokens -> RuntimeError.""" | ||
| mot = _build_mot_for_empty_content_check() | ||
| with pytest.raises(RuntimeError, match="enable_thinking"): | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| async def test_post_processing_raises_on_empty_string_content(backend): | ||
| """content='' is treated the same as None when finish_reason=stop and tokens>0.""" | ||
| mot = _build_mot_for_empty_content_check(content="") | ||
| with pytest.raises(RuntimeError, match="empty response"): | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| async def test_post_processing_accepts_empty_content_with_zero_tokens(backend): | ||
| """Empty content with zero completion_tokens is not a thinking-mode failure.""" | ||
| mot = _build_mot_for_empty_content_check(completion_tokens=0) | ||
| # Should not raise. | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| async def test_post_processing_accepts_empty_content_with_length_finish(backend): | ||
| """finish_reason=length (truncated) is a different failure mode, not raised here.""" | ||
| mot = _build_mot_for_empty_content_check(finish_reason="length") | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| async def test_post_processing_accepts_non_empty_content(backend): | ||
| """Normal response with content is unaffected.""" | ||
| mot = _build_mot_for_empty_content_check(content="The answer is 4.") | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
| assert mot._underlying_value == "The answer is 4." | ||
|
|
||
|
|
||
| async def test_post_processing_streaming_raises_on_empty_content(backend): | ||
| """Streaming path: oai_chat_response is a choice-shaped dict (chat_completion_delta_merge output); guard still fires.""" | ||
| mot = ModelOutputThunk(value=None) | ||
| mot._action = Message("user", "What is 2 + 2?") | ||
| mot._model_options = {} | ||
| mot._underlying_value = "" | ||
| # Streaming: oai_chat_response is the merged choice dict — finish_reason at the top level. | ||
| mot._meta["oai_chat_response"] = { | ||
| "finish_reason": "stop", | ||
| "index": 0, | ||
| "logprobs": None, | ||
| "stop_reason": None, | ||
| "message": { | ||
| "content": None, | ||
| "reasoning_content": "2+2=4", | ||
| "role": "assistant", | ||
| "tool_calls": [], | ||
| }, | ||
| } | ||
| mot._meta["oai_streaming_usage"] = { | ||
| "prompt_tokens": 10, | ||
| "completion_tokens": 9, | ||
| "total_tokens": 19, | ||
| } | ||
| # oai_chat_response_choice intentionally absent — this is the streaming code path. | ||
| with pytest.raises(RuntimeError, match="enable_thinking"): | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| async def test_post_processing_skips_when_tool_calls_present(backend): | ||
| """Empty content with active tool calls must not raise — tool calls legitimately have no text.""" | ||
| mot = _build_mot_for_empty_content_check() | ||
| mot.tool_calls = {"get_weather": {"name": "get_weather", "arguments": "{}"}} # type: ignore[assignment] | ||
| # Should not raise. | ||
| await backend.post_processing( | ||
| mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None | ||
| ) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| pytest.main([__file__, "-v"]) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When usage is unavailable (e.g. streaming without
stream_options={"include_usage": True}, or an OpenAI-compatible provider that omits it),completion_tokensfalls back to 0 and the guard silently passes — same empty-string symptom the fix is meant to surface. Not a regression, but worth a comment here or a line in the error so the next person hitting it knows why the guard didn't fire for them.