fix examples

akihikokuroda · akihikokuroda · commit b78f8822b5e5 · 2026-02-01T19:17:12.000-05:00
Signed-off-by: Akihiko Kuroda &lt;akihikokuroda2020@gmail.com&gt;
diff --git a/docs/examples/streaming/README.md b/docs/examples/streaming/README.md
@@ -31,11 +31,11 @@ uv run --with mellea docs/examples/streaming/advanced_streaming.py
 ## Key Concepts
 
 ### Streaming Requires Async
-Streaming is only available with async functions (`ainstruct`, `aact`) using `await_result=False`:
+Streaming is only available with async functions (`ainstruct`, `aact`) using `await_result=False` and `strategy=None`:
 
 ```python
-# This works - async with await_result=False
-thunk = await m.ainstruct("Hello", await_result=False)
+# This works - async with await_result=False and strategy=None
+thunk = await m.ainstruct("Hello", await_result=False, strategy=None)
 last_length = 0
 while not thunk.is_computed():
     current_value = await thunk.astream()
@@ -54,9 +54,10 @@ result = m.instruct("Hello")  # Already computed, cannot stream
 - **`ComputedModelOutputThunk`**: Already computed, cannot be streamed
 
 ### Limitations
-- Cannot stream when using `SamplingStrategy` (validation requires complete output)
+- Cannot stream when using `SamplingStrategy` (validation requires complete output) - must set `strategy=None`
 - Cannot stream from synchronous functions (would cause deadlock)
 - Streaming requires an async context
+- Default `strategy=RejectionSamplingStrategy(loop_budget=2)` must be disabled for streaming
 
 ## See Also
 - [Tutorial Chapter 13: Streaming Model Outputs](../../tutorial.md#chapter-13-streaming-model-outputs)
diff --git a/docs/examples/streaming/advanced_streaming.py b/docs/examples/streaming/advanced_streaming.py
@@ -91,14 +91,22 @@ async def compare_streaming_vs_blocking():
     # Streaming
     print("\n2. Streaming mode (await_result=False):")
     print("   Tokens appear as generated: ", end="", flush=True)
-    thunk = await m.ainstruct("Write a haiku about programming.", await_result=False)
+    thunk = await m.ainstruct(
+        "Write a haiku about programming.",
+        await_result=False,
+        strategy=None,  # Must disable strategy for streaming
+    )
 
+    # Stream until complete - call astream() at least once even if already computed
     last_length = 0
-    while not thunk.is_computed():
+    while True:
         current_value = await thunk.astream()
         new_content = current_value[last_length:]
         print(new_content, end="", flush=True)
         last_length = len(current_value)
+
+        if thunk.is_computed():
+            break
     print()
 
 
diff --git a/docs/examples/streaming/basic_streaming.py b/docs/examples/streaming/basic_streaming.py
@@ -18,7 +18,9 @@ async def stream_story():
 
     # Get uncomputed thunk for streaming
     thunk = await m.ainstruct(
-        "Write a short story about a robot learning to paint.", await_result=False
+        "cont up 1 through 100.",
+        await_result=False,
+        strategy=None,  # Must disable strategy for streaming
     )
 
     # Stream the output - astream() returns accumulated value so far
diff --git a/docs/examples/streaming/interactive_chat.py b/docs/examples/streaming/interactive_chat.py
@@ -2,19 +2,23 @@
 
 This example shows how to build an interactive chat application where
 the AI's responses are streamed incrementally for a better user experience.
+
+Note: This example uses ChatContext which triggers a warning about async usage.
+The warning is expected but safe here because we await the result after streaming.
+For production use, consider using SimpleContext or handling the context updates manually.
 """
 
 # pytest: ollama, llm
 
 import asyncio
 
 import mellea
-from mellea.stdlib.context import ChatContext
+from mellea.stdlib.context import SimpleContext
 
 
 async def interactive_chat():
     """Run an interactive chat session with streaming responses."""
-    m = mellea.start_session(ctx=ChatContext())
+    m = mellea.start_session(ctx=SimpleContext())
 
     print("Chat with the AI (type 'quit' to exit)")
     print("-" * 50)
@@ -27,7 +31,11 @@ async def interactive_chat():
         print("AI: ", end="", flush=True)
 
         # Stream the response
-        thunk = await m.ainstruct(user_input, await_result=False)
+        thunk = await m.ainstruct(
+            user_input,
+            await_result=False,
+            strategy=None,  # Must disable strategy for streaming
+        )
 
         last_length = 0
         while not thunk.is_computed():
@@ -37,6 +45,8 @@ async def interactive_chat():
             print(new_content, end="", flush=True)
             last_length = len(current_value)
 
+        # Await the final result to update context properly
+        await thunk.avalue()
         print()  # New line after response
 
 
diff --git a/docs/tutorial.md b/docs/tutorial.md
@@ -1434,7 +1434,7 @@ Mellea supports streaming model outputs, allowing you to process tokens as they
 
 ### Streaming with Async Functions
 
-To enable streaming, use the async versions of session functions (`ainstruct`, `aact`) with the `await_result=False` parameter:
+To enable streaming, use the async versions of session functions (`ainstruct`, `aact`) with the `await_result=False` parameter and `strategy=None`:
 
 ```python
 # file: https://github.com/generative-computing/mellea/blob/main/docs/examples/streaming/basic_streaming.py#L1-L35
@@ -1447,7 +1447,8 @@ async def stream_story():
     # Get uncomputed thunk for streaming
     thunk = await m.ainstruct(
         "Write a short story about a robot learning to paint.",
-        await_result=False
+        await_result=False,
+        strategy=None  # Must disable strategy for streaming
     )
     
     # Stream the output - astream() returns accumulated value so far
@@ -1496,7 +1497,7 @@ async for chunk in thunk.astream():  # Stream the generation
 
 Therefore, sync functions always await the result internally and return `ComputedModelOutputThunk`.
 
-**Streaming and sampling are incompatible**: When using `SamplingStrategy` or `return_sampling_results=True`, the function must await the complete result to perform validation. In these cases, the function always returns a computed result regardless of the `await_result` parameter.
+**Streaming and sampling are incompatible**: When using `SamplingStrategy` or `return_sampling_results=True`, the function must await the complete result to perform validation. In these cases, the function always returns a computed result regardless of the `await_result` parameter. To enable streaming, you must explicitly set `strategy=None` (the default is `RejectionSamplingStrategy(loop_budget=2)`).
 
 ### Practical Example: Interactive Chat
 

Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,9 @@ async def stream_story():`
`18`	`18`
`19`	`19`	`# Get uncomputed thunk for streaming`
`20`	`20`	`thunk = await m.ainstruct(`
`21`		`- "Write a short story about a robot learning to paint.", await_result=False`
	`21`	`+ "cont up 1 through 100.",`
	`22`	`+ await_result=False,`
	`23`	`+ strategy=None, # Must disable strategy for streaming`
`22`	`24`	`)`
`23`	`25`
`24`	`26`	`# Stream the output - astream() returns accumulated value so far`