generative-computing · planetf1 · May 12, 2026 · May 13, 2026 · ajbozarth · May 13, 2026
@@ -1119,6 +1119,39 @@ async def post_processing(
         mot.generation.model = self._model_id
         mot.generation.provider = "openai"
 
+        # content=None with stop+tokens means thinking-only mode; surface it rather than returning "".
+        finish_reason = choice_response.get("finish_reason")
+        completion_tokens = usage.get("completion_tokens", 0) if usage else 0
+        if (
+            not mot._underlying_value
+            and finish_reason == "stop"
+            and completion_tokens > 0
+            and not mot.tool_calls
+        ):
+            thinking_note = (
+                f" Reasoning content ({len(mot._thinking)} chars) is in mot._thinking."
+                if mot._thinking
+                else ""
+            )
+            err = RuntimeError(
+                "OpenAI backend received an empty response (content=None) with "
+                f"finish_reason=stop and completion_tokens={completion_tokens}. "
+                "This typically indicates a thinking-mode model (e.g. Qwen3 via vLLM "
+                "with --reasoning-parser) that emitted only reasoning tokens."
+                + thinking_note
+                + " For vLLM/Qwen3, disable thinking via model_options, e.g.: "
+                'model_options={"extra_body": {"chat_template_kwargs": '
+                '{"enable_thinking": False}}}.'
+                " For other providers, consult your runtime's documentation."
+            )
+            span = mot._meta.pop("_telemetry_span", None)
+            if span is not None:
+                from ..telemetry import end_backend_span, set_span_error
+
+                set_span_error(span, err)
+                end_backend_span(span)
+            raise err
+
         # Record telemetry now that response is available
         span = mot._meta.get("_telemetry_span")
         if span is not None:

@@ -1,13 +1,16 @@
 """Unit tests for OpenAI backend pure-logic helpers — no API calls required.
 
 Covers filter_openai_client_kwargs, filter_chat_completions_kwargs,
-_simplify_and_merge, and _make_backend_specific_and_remove.
+_simplify_and_merge, _make_backend_specific_and_remove, and post_processing
+error detection for empty thinking-mode responses.
 """
 
 import pytest
 
 from mellea.backends import ModelOption
 from mellea.backends.openai import OpenAIBackend
+from mellea.core import ModelOutputThunk
+from mellea.stdlib.components import Message
 
 
 def _make_backend(model_options: dict | None = None) -> OpenAIBackend:
@@ -168,5 +171,124 @@ def test_make_backend_specific_unknown_mellea_keys_removed(backend):
     assert ModelOption.SYSTEM_PROMPT not in result
 
 
+# --- post_processing: empty thinking-mode response detection ---
+
+
+def _build_mot_for_empty_content_check(
+    finish_reason: str = "stop",
+    content: str | None = None,
+    completion_tokens: int = 9,
+    tool_calls: list[dict] | None = None,
+) -> ModelOutputThunk:
+    """Construct a ModelOutputThunk in the state post_processing expects after processing()."""
+    mot = ModelOutputThunk(value=None)
+    mot._action = Message("user", "What is 2 + 2?")
+    mot._model_options = {}
+    mot._underlying_value = content if content is not None else ""
+    choice = {
+        "finish_reason": finish_reason,
+        "index": 0,
+        "message": {"content": content, "role": "assistant", "tool_calls": tool_calls},
+    }
+    full_response = {
+        "id": "chatcmpl-test",
+        "object": "chat.completion",
+        "choices": [choice],
+        "usage": {
+            "prompt_tokens": 10,
+            "completion_tokens": completion_tokens,
+            "total_tokens": 10 + completion_tokens,
+        },
+    }
+    mot._meta["oai_chat_response"] = full_response
+    mot._meta["oai_chat_response_choice"] = choice
+    return mot
+
+
+async def test_post_processing_raises_on_empty_content_with_tokens(backend):
+    """Thinking model with content=None, finish_reason=stop, non-zero tokens -> RuntimeError."""
+    mot = _build_mot_for_empty_content_check()
+    with pytest.raises(RuntimeError, match="enable_thinking"):
+        await backend.post_processing(
+            mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+        )
+
+
+async def test_post_processing_raises_on_empty_string_content(backend):
+    """content='' is treated the same as None when finish_reason=stop and tokens>0."""
+    mot = _build_mot_for_empty_content_check(content="")
+    with pytest.raises(RuntimeError, match="empty response"):
+        await backend.post_processing(
+            mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+        )
+
+
+async def test_post_processing_accepts_empty_content_with_zero_tokens(backend):
+    """Empty content with zero completion_tokens is not a thinking-mode failure."""
+    mot = _build_mot_for_empty_content_check(completion_tokens=0)
+    # Should not raise.
+    await backend.post_processing(
+        mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+    )
+
+
+async def test_post_processing_accepts_empty_content_with_length_finish(backend):
+    """finish_reason=length (truncated) is a different failure mode, not raised here."""
+    mot = _build_mot_for_empty_content_check(finish_reason="length")
+    await backend.post_processing(
+        mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+    )
+
+
+async def test_post_processing_accepts_non_empty_content(backend):
+    """Normal response with content is unaffected."""
+    mot = _build_mot_for_empty_content_check(content="The answer is 4.")
+    await backend.post_processing(
+        mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+    )
+    assert mot._underlying_value == "The answer is 4."
+
+
+async def test_post_processing_streaming_raises_on_empty_content(backend):
+    """Streaming path: oai_chat_response is a choice-shaped dict (chat_completion_delta_merge output); guard still fires."""
+    mot = ModelOutputThunk(value=None)
+    mot._action = Message("user", "What is 2 + 2?")
+    mot._model_options = {}
+    mot._underlying_value = ""
+    # Streaming: oai_chat_response is the merged choice dict — finish_reason at the top level.
+    mot._meta["oai_chat_response"] = {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": None,
+        "stop_reason": None,
+        "message": {
+            "content": None,
+            "reasoning_content": "2+2=4",
+            "role": "assistant",
+            "tool_calls": [],
+        },
+    }
+    mot._meta["oai_streaming_usage"] = {
+        "prompt_tokens": 10,
+        "completion_tokens": 9,
+        "total_tokens": 19,
+    }
+    # oai_chat_response_choice intentionally absent — this is the streaming code path.
+    with pytest.raises(RuntimeError, match="enable_thinking"):
+        await backend.post_processing(
+            mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+        )
+
+
+async def test_post_processing_skips_when_tool_calls_present(backend):
+    """Empty content with active tool calls must not raise — tool calls legitimately have no text."""
+    mot = _build_mot_for_empty_content_check()
+    mot.tool_calls = {"get_weather": {"name": "get_weather", "arguments": "{}"}}  # type: ignore[assignment]
+    # Should not raise.
+    await backend.post_processing(
+        mot=mot, tools={}, conversation=[], thinking=None, seed=None, _format=None
+    )
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])