-
Notifications
You must be signed in to change notification settings - Fork 818
Expand file tree
/
Copy pathopenai.py
More file actions
788 lines (658 loc) · 32.8 KB
/
openai.py
File metadata and controls
788 lines (658 loc) · 32.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
"""OpenAI model provider.
- Docs: https://platform.openai.com/docs/overview
"""
import base64
import json
import logging
import mimetypes
from collections.abc import AsyncGenerator, AsyncIterator
from contextlib import asynccontextmanager
from typing import Any, Protocol, TypedDict, TypeVar, cast
import openai
from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
from pydantic import BaseModel
from typing_extensions import Unpack, override
from ..types.content import ContentBlock, Messages, SystemContentBlock
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
from ..types.streaming import StreamEvent
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
from ._validation import _has_location_source, validate_config_keys
from .model import Model
logger = logging.getLogger(__name__)
T = TypeVar("T", bound=BaseModel)
# Alternative context overflow error messages
# These are commonly returned by OpenAI-compatible endpoints wrapping other providers
# (e.g., Databricks serving Bedrock models)
_CONTEXT_OVERFLOW_MESSAGES = [
"Input is too long for requested model",
"input length and `max_tokens` exceed context limit",
"too many total text bytes",
]
class Client(Protocol):
"""Protocol defining the OpenAI-compatible interface for the underlying provider client."""
@property
# pragma: no cover
def chat(self) -> Any:
"""Chat completions interface."""
...
class OpenAIModel(Model):
"""OpenAI model provider implementation."""
client: Client
class OpenAIConfig(TypedDict, total=False):
"""Configuration options for OpenAI models.
Attributes:
model_id: Model ID (e.g., "gpt-4o").
For a complete list of supported models, see https://platform.openai.com/docs/models.
params: Model parameters (e.g., max_tokens).
For a complete list of supported parameters, see
https://platform.openai.com/docs/api-reference/chat/create.
"""
model_id: str
params: dict[str, Any] | None
def __init__(
self,
client: Client | None = None,
client_args: dict[str, Any] | None = None,
**model_config: Unpack[OpenAIConfig],
) -> None:
"""Initialize provider instance.
Args:
client: Pre-configured OpenAI-compatible client to reuse across requests.
When provided, this client will be reused for all requests and will NOT be closed
by the model. The caller is responsible for managing the client lifecycle.
This is useful for:
- Injecting custom client wrappers (e.g., GuardrailsAsyncOpenAI)
- Reusing connection pools within a single event loop/worker
- Centralizing observability, retries, and networking policy
- Pointing to custom model gateways
Note: The client should not be shared across different asyncio event loops.
client_args: Arguments for the OpenAI client (legacy approach).
For a complete list of supported arguments, see https://pypi.org/project/openai/.
The ``http_client`` key accepts either an ``httpx.AsyncClient`` instance or a
zero-argument callable that returns one. When a callable (factory) is provided,
it is invoked on every request to produce a fresh client, avoiding the
"closed client" error that occurs when the same instance is reused.
**model_config: Configuration options for the OpenAI model.
Raises:
ValueError: If both `client` and `client_args` are provided.
"""
validate_config_keys(model_config, self.OpenAIConfig)
self.config = dict(model_config)
# Validate that only one client configuration method is provided
if client is not None and client_args is not None and len(client_args) > 0:
raise ValueError("Only one of 'client' or 'client_args' should be provided, not both.")
self._custom_client = client
self.client_args = client_args or {}
logger.debug("config=<%s> | initializing", self.config)
@override
def update_config(self, **model_config: Unpack[OpenAIConfig]) -> None: # type: ignore[override]
"""Update the OpenAI model configuration with the provided arguments.
Args:
**model_config: Configuration overrides.
"""
validate_config_keys(model_config, self.OpenAIConfig)
self.config.update(model_config)
@override
def get_config(self) -> OpenAIConfig:
"""Get the OpenAI model configuration.
Returns:
The OpenAI model configuration.
"""
return cast(OpenAIModel.OpenAIConfig, self.config)
@classmethod
def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) -> dict[str, Any]:
"""Format an OpenAI compatible content block.
Args:
content: Message content.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
OpenAI compatible content block.
Raises:
TypeError: If the content block type cannot be converted to an OpenAI-compatible format.
"""
if "document" in content:
mime_type = mimetypes.types_map.get(f".{content['document']['format']}", "application/octet-stream")
file_data = base64.b64encode(content["document"]["source"]["bytes"]).decode("utf-8")
return {
"file": {
"file_data": f"data:{mime_type};base64,{file_data}",
"filename": content["document"]["name"],
},
"type": "file",
}
if "image" in content:
mime_type = mimetypes.types_map.get(f".{content['image']['format']}", "application/octet-stream")
image_data = base64.b64encode(content["image"]["source"]["bytes"]).decode("utf-8")
return {
"image_url": {
"detail": "auto",
"format": mime_type,
"url": f"data:{mime_type};base64,{image_data}",
},
"type": "image_url",
}
if "text" in content:
return {"text": content["text"], "type": "text"}
raise TypeError(f"content_type=<{next(iter(content))}> | unsupported type")
@classmethod
def format_request_message_tool_call(cls, tool_use: ToolUse, **kwargs: Any) -> dict[str, Any]:
"""Format an OpenAI compatible tool call.
Args:
tool_use: Tool use requested by the model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
OpenAI compatible tool call.
"""
return {
"function": {
"arguments": json.dumps(tool_use["input"]),
"name": tool_use["name"],
},
"id": tool_use["toolUseId"],
"type": "function",
}
@classmethod
def format_request_tool_message(cls, tool_result: ToolResult, **kwargs: Any) -> dict[str, Any]:
"""Format an OpenAI compatible tool message.
Args:
tool_result: Tool result collected from a tool execution.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
OpenAI compatible tool message.
"""
contents = cast(
list[ContentBlock],
[
{"text": json.dumps(content["json"])} if "json" in content else content
for content in tool_result["content"]
],
)
formatted_contents = [cls.format_request_message_content(content) for content in contents]
# If single text content, use string format for better model compatibility
if len(formatted_contents) == 1 and formatted_contents[0].get("type") == "text":
content: str | list[dict[str, Any]] = formatted_contents[0]["text"]
else:
content = formatted_contents
return {
"role": "tool",
"tool_call_id": tool_result["toolUseId"],
"content": content,
}
@classmethod
def _split_tool_message_images(cls, tool_message: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any] | None]:
"""Split a tool message into text-only tool message and optional user message with images.
OpenAI API restricts images to user role messages only. This method extracts any image
content from a tool message and returns it separately as a user message.
Args:
tool_message: A formatted tool message that may contain images.
Returns:
A tuple of (tool_message_without_images, user_message_with_images_or_None).
"""
if tool_message.get("role") != "tool":
return tool_message, None
content = tool_message.get("content", [])
if not isinstance(content, list):
return tool_message, None
# Separate image and non-image content
text_content = []
image_content = []
for item in content:
if isinstance(item, dict) and item.get("type") == "image_url":
image_content.append(item)
else:
text_content.append(item)
# If no images found, return original message
if not image_content:
return tool_message, None
# Let the user know that we are modifying the messages for OpenAI compatibility
logger.warning(
"tool_call_id=<%s> | Moving image from tool message to a new user message for OpenAI compatibility",
tool_message["tool_call_id"],
)
# Append a message to the text content to inform the model about the upcoming image
text_content.append(
{
"type": "text",
"text": (
"Tool successfully returned an image. The image is being provided in the following user message."
),
}
)
# Create the clean tool message with the updated text content
tool_message_clean = {
"role": "tool",
"tool_call_id": tool_message["tool_call_id"],
"content": text_content,
}
# Create user message with only images
user_message_with_images = {"role": "user", "content": image_content}
return tool_message_clean, user_message_with_images
@classmethod
def _format_request_tool_choice(cls, tool_choice: ToolChoice | None) -> dict[str, Any]:
"""Format a tool choice for OpenAI compatibility.
Args:
tool_choice: Tool choice configuration in Bedrock format.
Returns:
OpenAI compatible tool choice format.
"""
if not tool_choice:
return {}
match tool_choice:
case {"auto": _}:
return {"tool_choice": "auto"} # OpenAI SDK doesn't define constants for these values
case {"any": _}:
return {"tool_choice": "required"}
case {"tool": {"name": tool_name}}:
return {"tool_choice": {"type": "function", "function": {"name": tool_name}}}
case _:
# This should not happen with proper typing, but handle gracefully
return {"tool_choice": "auto"}
@classmethod
def _format_system_messages(
cls,
system_prompt: str | None = None,
*,
system_prompt_content: list[SystemContentBlock] | None = None,
**kwargs: Any,
) -> list[dict[str, Any]]:
"""Format system messages for OpenAI-compatible providers.
Args:
system_prompt: System prompt to provide context to the model.
system_prompt_content: System prompt content blocks to provide context to the model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
List of formatted system messages.
"""
# Handle backward compatibility: if system_prompt is provided but system_prompt_content is None
if system_prompt and system_prompt_content is None:
system_prompt_content = [{"text": system_prompt}]
# TODO: Handle caching blocks https://github.com/strands-agents/sdk-python/issues/1140
return [
{"role": "system", "content": content["text"]}
for content in system_prompt_content or []
if "text" in content
]
@classmethod
def _format_regular_messages(cls, messages: Messages, **kwargs: Any) -> list[dict[str, Any]]:
"""Format regular messages for OpenAI-compatible providers.
Args:
messages: List of message objects to be processed by the model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
List of formatted messages.
"""
formatted_messages = []
for message in messages:
contents = message["content"]
# Check for reasoningContent and warn user
if any("reasoningContent" in content for content in contents):
logger.warning(
"reasoningContent is not supported in multi-turn conversations with the Chat Completions API."
)
# Filter out content blocks that shouldn't be formatted
filtered_contents = []
for content in contents:
if any(block_type in content for block_type in ["toolResult", "toolUse", "reasoningContent"]):
continue
if _has_location_source(content):
logger.warning("Location sources are not supported by OpenAI | skipping content block")
continue
filtered_contents.append(content)
formatted_contents = [cls.format_request_message_content(content) for content in filtered_contents]
formatted_tool_calls = [
cls.format_request_message_tool_call(content["toolUse"]) for content in contents if "toolUse" in content
]
formatted_tool_messages = [
cls.format_request_tool_message(content["toolResult"])
for content in contents
if "toolResult" in content
]
formatted_message = {
"role": message["role"],
**({"content": formatted_contents} if formatted_contents else {}),
**({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}),
}
formatted_messages.append(formatted_message)
# Process tool messages to extract images into separate user messages
# OpenAI API requires images to be in user role messages only
# All tool messages must be grouped together before any user messages with images
user_messages_with_images = []
for tool_msg in formatted_tool_messages:
tool_msg_clean, user_msg_with_images = cls._split_tool_message_images(tool_msg)
formatted_messages.append(tool_msg_clean)
if user_msg_with_images:
user_messages_with_images.append(user_msg_with_images)
formatted_messages.extend(user_messages_with_images)
return formatted_messages
@classmethod
def format_request_messages(
cls,
messages: Messages,
system_prompt: str | None = None,
*,
system_prompt_content: list[SystemContentBlock] | None = None,
**kwargs: Any,
) -> list[dict[str, Any]]:
"""Format an OpenAI compatible messages array.
Args:
messages: List of message objects to be processed by the model.
system_prompt: System prompt to provide context to the model.
system_prompt_content: System prompt content blocks to provide context to the model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
An OpenAI compatible messages array.
"""
formatted_messages = cls._format_system_messages(system_prompt, system_prompt_content=system_prompt_content)
formatted_messages.extend(cls._format_regular_messages(messages))
return [message for message in formatted_messages if "content" in message or "tool_calls" in message]
def format_request(
self,
messages: Messages,
tool_specs: list[ToolSpec] | None = None,
system_prompt: str | None = None,
tool_choice: ToolChoice | None = None,
*,
system_prompt_content: list[SystemContentBlock] | None = None,
**kwargs: Any,
) -> dict[str, Any]:
"""Format an OpenAI compatible chat streaming request.
Args:
messages: List of message objects to be processed by the model.
tool_specs: List of tool specifications to make available to the model.
system_prompt: System prompt to provide context to the model.
tool_choice: Selection strategy for tool invocation.
system_prompt_content: System prompt content blocks to provide context to the model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
An OpenAI compatible chat streaming request.
Raises:
TypeError: If a message contains a content block type that cannot be converted to an OpenAI-compatible
format.
"""
return {
"messages": self.format_request_messages(
messages, system_prompt, system_prompt_content=system_prompt_content
),
"model": self.config["model_id"],
"stream": True,
"stream_options": {"include_usage": True},
"tools": [
{
"type": "function",
"function": {
"name": tool_spec["name"],
"description": tool_spec["description"],
"parameters": tool_spec["inputSchema"]["json"],
},
}
for tool_spec in tool_specs or []
],
**(self._format_request_tool_choice(tool_choice)),
**cast(dict[str, Any], self.config.get("params", {})),
}
def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
"""Format an OpenAI response event into a standardized message chunk.
Args:
event: A response event from the OpenAI compatible model.
**kwargs: Additional keyword arguments for future extensibility.
Returns:
The formatted chunk.
Raises:
RuntimeError: If chunk_type is not recognized.
This error should never be encountered as chunk_type is controlled in the stream method.
"""
match event["chunk_type"]:
case "message_start":
return {"messageStart": {"role": "assistant"}}
case "content_start":
if event["data_type"] == "tool":
return {
"contentBlockStart": {
"start": {
"toolUse": {
"name": event["data"].function.name,
"toolUseId": event["data"].id,
}
}
}
}
return {"contentBlockStart": {"start": {}}}
case "content_delta":
if event["data_type"] == "tool":
return {
"contentBlockDelta": {"delta": {"toolUse": {"input": event["data"].function.arguments or ""}}}
}
if event["data_type"] == "reasoning_content":
return {"contentBlockDelta": {"delta": {"reasoningContent": {"text": event["data"]}}}}
return {"contentBlockDelta": {"delta": {"text": event["data"]}}}
case "content_stop":
return {"contentBlockStop": {}}
case "message_stop":
match event["data"]:
case "tool_calls":
return {"messageStop": {"stopReason": "tool_use"}}
case "length":
return {"messageStop": {"stopReason": "max_tokens"}}
case _:
return {"messageStop": {"stopReason": "end_turn"}}
case "metadata":
return {
"metadata": {
"usage": {
"inputTokens": event["data"].prompt_tokens,
"outputTokens": event["data"].completion_tokens,
"totalTokens": event["data"].total_tokens,
},
"metrics": {
"latencyMs": 0, # TODO
},
},
}
case _:
raise RuntimeError(f"chunk_type=<{event['chunk_type']} | unknown type")
@asynccontextmanager
async def _get_client(self) -> AsyncIterator[Any]:
"""Get an OpenAI client for making requests.
This context manager handles client lifecycle management:
- If an injected client was provided during initialization, it yields that client
without closing it (caller manages lifecycle).
- Otherwise, creates a new AsyncOpenAI client from client_args and automatically
closes it when the context exits.
If ``http_client`` in *client_args* is a callable (factory), it is invoked on each
request to produce a fresh ``httpx.AsyncClient``, preventing the "closed client" error
that occurs when the same client instance is reused across ``async with`` blocks.
Note: We create a new client per request to avoid connection sharing in the underlying
httpx client, as the asyncio event loop does not allow connections to be shared.
For more details, see https://github.com/encode/httpx/discussions/2959.
Yields:
Client: An OpenAI-compatible client instance.
"""
if self._custom_client is not None:
# Use the injected client (caller manages lifecycle)
yield self._custom_client
else:
# Create a new client from client_args
# We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying
# httpx client. The asyncio event loop does not allow connections to be shared. For more details, please
# refer to https://github.com/encode/httpx/discussions/2959.
resolved_args = dict(self.client_args)
http_client = resolved_args.get("http_client")
if http_client is not None and callable(http_client) and not hasattr(http_client, "send"):
resolved_args["http_client"] = http_client()
async with openai.AsyncOpenAI(**resolved_args) as client:
yield client
@override
async def stream(
self,
messages: Messages,
tool_specs: list[ToolSpec] | None = None,
system_prompt: str | None = None,
*,
tool_choice: ToolChoice | None = None,
**kwargs: Any,
) -> AsyncGenerator[StreamEvent, None]:
"""Stream conversation with the OpenAI model.
Args:
messages: List of message objects to be processed by the model.
tool_specs: List of tool specifications to make available to the model.
system_prompt: System prompt to provide context to the model.
tool_choice: Selection strategy for tool invocation.
**kwargs: Additional keyword arguments for future extensibility.
Yields:
Formatted message chunks from the model.
Raises:
ContextWindowOverflowException: If the input exceeds the model's context window.
ModelThrottledException: If the request is throttled by OpenAI (rate limits).
"""
logger.debug("formatting request")
request = self.format_request(messages, tool_specs, system_prompt, tool_choice)
logger.debug("formatted request=<%s>", request)
logger.debug("invoking model")
# We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying httpx
# client. The asyncio event loop does not allow connections to be shared. For more details, please refer to
# https://github.com/encode/httpx/discussions/2959.
async with self._get_client() as client:
try:
response = await client.chat.completions.create(**request)
except openai.BadRequestError as e:
# Check if this is a context length exceeded error
if hasattr(e, "code") and e.code == "context_length_exceeded":
logger.warning("OpenAI threw context window overflow error")
raise ContextWindowOverflowException(str(e)) from e
# Re-raise other BadRequestError exceptions
raise
except openai.RateLimitError as e:
# All rate limit errors should be treated as throttling, not context overflow
# Rate limits (including TPM) require waiting/retrying, not context reduction
logger.warning("OpenAI threw rate limit error")
raise ModelThrottledException(str(e)) from e
except openai.APIError as e:
# Check for alternative context overflow error messages
error_message = str(e)
if any(overflow_msg in error_message for overflow_msg in _CONTEXT_OVERFLOW_MESSAGES):
logger.warning("context window overflow error detected")
raise ContextWindowOverflowException(error_message) from e
# Re-raise other APIError exceptions
raise
logger.debug("got response from model")
yield self.format_chunk({"chunk_type": "message_start"})
tool_calls: dict[int, list[Any]] = {}
data_type = None
finish_reason = None # Store finish_reason for later use
event = None # Initialize for scope safety
async for event in response:
# Defensive: skip events with empty or missing choices
if not getattr(event, "choices", None):
continue
choice = event.choices[0]
if hasattr(choice.delta, "reasoning_content") and choice.delta.reasoning_content:
chunks, data_type = self._stream_switch_content("reasoning_content", data_type)
for chunk in chunks:
yield chunk
yield self.format_chunk(
{
"chunk_type": "content_delta",
"data_type": data_type,
"data": choice.delta.reasoning_content,
}
)
if choice.delta.content:
chunks, data_type = self._stream_switch_content("text", data_type)
for chunk in chunks:
yield chunk
yield self.format_chunk(
{"chunk_type": "content_delta", "data_type": data_type, "data": choice.delta.content}
)
for tool_call in choice.delta.tool_calls or []:
tool_calls.setdefault(tool_call.index, []).append(tool_call)
if choice.finish_reason:
finish_reason = choice.finish_reason # Store for use outside loop
if data_type:
yield self.format_chunk({"chunk_type": "content_stop", "data_type": data_type})
break
for tool_deltas in tool_calls.values():
yield self.format_chunk({"chunk_type": "content_start", "data_type": "tool", "data": tool_deltas[0]})
for tool_delta in tool_deltas:
yield self.format_chunk({"chunk_type": "content_delta", "data_type": "tool", "data": tool_delta})
yield self.format_chunk({"chunk_type": "content_stop", "data_type": "tool"})
yield self.format_chunk({"chunk_type": "message_stop", "data": finish_reason or "end_turn"})
# Skip remaining events as we don't have use for anything except the final usage payload
async for event in response:
_ = event
if event and hasattr(event, "usage") and event.usage:
yield self.format_chunk({"chunk_type": "metadata", "data": event.usage})
logger.debug("finished streaming response from model")
def _stream_switch_content(self, data_type: str, prev_data_type: str | None) -> tuple[list[StreamEvent], str]:
"""Handle switching to a new content stream.
Args:
data_type: The next content data type.
prev_data_type: The previous content data type.
Returns:
Tuple containing:
- Stop block for previous content and the start block for the next content.
- Next content data type.
"""
chunks = []
if data_type != prev_data_type:
if prev_data_type is not None:
chunks.append(self.format_chunk({"chunk_type": "content_stop", "data_type": prev_data_type}))
chunks.append(self.format_chunk({"chunk_type": "content_start", "data_type": data_type}))
return chunks, data_type
@override
async def structured_output(
self, output_model: type[T], prompt: Messages, system_prompt: str | None = None, **kwargs: Any
) -> AsyncGenerator[dict[str, T | Any], None]:
"""Get structured output from the model.
Args:
output_model: The output model to use for the agent.
prompt: The prompt messages to use for the agent.
system_prompt: System prompt to provide context to the model.
**kwargs: Additional keyword arguments for future extensibility.
Yields:
Model events with the last being the structured output.
Raises:
ContextWindowOverflowException: If the input exceeds the model's context window.
ModelThrottledException: If the request is throttled by OpenAI (rate limits).
"""
# We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying httpx
# client. The asyncio event loop does not allow connections to be shared. For more details, please refer to
# https://github.com/encode/httpx/discussions/2959.
async with self._get_client() as client:
try:
response: ParsedChatCompletion = await client.beta.chat.completions.parse(
model=self.get_config()["model_id"],
messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
response_format=output_model,
)
except openai.BadRequestError as e:
# Check if this is a context length exceeded error
if hasattr(e, "code") and e.code == "context_length_exceeded":
logger.warning("OpenAI threw context window overflow error")
raise ContextWindowOverflowException(str(e)) from e
# Re-raise other BadRequestError exceptions
raise
except openai.RateLimitError as e:
# All rate limit errors should be treated as throttling, not context overflow
# Rate limits (including TPM) require waiting/retrying, not context reduction
logger.warning("OpenAI threw rate limit error")
raise ModelThrottledException(str(e)) from e
except openai.APIError as e:
# Check for alternative context overflow error messages
error_message = str(e)
if any(overflow_msg in error_message for overflow_msg in _CONTEXT_OVERFLOW_MESSAGES):
logger.warning("context window overflow error detected")
raise ContextWindowOverflowException(error_message) from e
# Re-raise other APIError exceptions
raise
parsed: T | None = None
# Find the first choice with tool_calls
if len(response.choices) > 1:
raise ValueError("Multiple choices found in the OpenAI response.")
for choice in response.choices:
if isinstance(choice.message.parsed, output_model):
parsed = choice.message.parsed
break
if parsed:
yield {"output": parsed}
else:
raise ValueError("No valid tool use or tool use input was found in the OpenAI response.")