Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion livekit-plugins/livekit-plugins-openai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Support for OpenAI Realtime API, Responses API, LLM, TTS, and STT APIs.

Also includes support for a large number of OpenAI-compatible APIs including [Azure OpenAI](https://docs.livekit.io/agents/integrations/llm/azure-openai/), [Cerebras](https://docs.livekit.io/agents/integrations/cerebras/), [Fireworks](https://docs.livekit.io/agents/integrations/llm/fireworks/), [Perplexity](https://docs.livekit.io/agents/integrations/llm/perplexity/), [Telnyx](https://docs.livekit.io/agents/integrations/llm/telnyx/), [xAI](https://docs.livekit.io/agents/integrations/llm/xai/), [Ollama](https://docs.livekit.io/agents/integrations/llm/ollama/), [DeepSeek](https://docs.livekit.io/agents/integrations/llm/deepseek/), and [SambaNova](https://docs.livekit.io/agents/integrations/llm/sambanova/).
Also includes support for a large number of OpenAI-compatible APIs including [Azure OpenAI](https://docs.livekit.io/agents/integrations/llm/azure-openai/), [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-openai.html), [Cerebras](https://docs.livekit.io/agents/integrations/cerebras/), [Fireworks](https://docs.livekit.io/agents/integrations/llm/fireworks/), [Perplexity](https://docs.livekit.io/agents/integrations/llm/perplexity/), [Telnyx](https://docs.livekit.io/agents/integrations/llm/telnyx/), [xAI](https://docs.livekit.io/agents/integrations/llm/xai/), [Ollama](https://docs.livekit.io/agents/integrations/llm/ollama/), [DeepSeek](https://docs.livekit.io/agents/integrations/llm/deepseek/), and [SambaNova](https://docs.livekit.io/agents/integrations/llm/sambanova/).

See [https://docs.livekit.io/agents/integrations/openai/](https://docs.livekit.io/agents/integrations/openai/) for more information.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

Support for OpenAI Realtime API, LLM, TTS, and STT APIs.

Also includes support for a large number of OpenAI-compatible APIs including Azure OpenAI, Cerebras,
Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, and OVHcloud AI Endpoints.
Also includes support for a large number of OpenAI-compatible APIs including Azure OpenAI, Amazon
Bedrock, Cerebras, Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, and OVHcloud
AI Endpoints.

See https://docs.livekit.io/agents/integrations/openai/ and
https://docs.livekit.io/agents/integrations/llm/ for more information.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from openai.types.chat import ChatCompletionToolChoiceOptionParam, completion_create_params

from .models import (
BedrockChatModels,
CerebrasChatModels,
ChatModels,
CometAPIChatModels,
Expand All @@ -55,7 +56,11 @@
XAIChatModels,
_supports_reasoning_effort,
)
from .utils import AsyncAzureADTokenProvider
from .utils import (
AsyncAzureADTokenProvider,
AsyncBedrockTokenProvider,
resolve_bedrock_base_url,
)

lk_oai_debug = int(os.getenv("LK_OPENAI_DEBUG", 0))

Expand Down Expand Up @@ -124,7 +129,7 @@ def __init__(
super().__init__()

if not is_given(reasoning_effort) and _supports_reasoning_effort(model):
if model in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
if model.removeprefix("openai.") in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
reasoning_effort = "none"
else:
reasoning_effort = "minimal"
Expand Down Expand Up @@ -255,6 +260,85 @@ def with_azure(
llm._owns_client = True
return llm

@staticmethod
def with_aws_bedrock(
*,
model: str | BedrockChatModels = "openai.gpt-oss-120b",
api_key: str | None = None,
bedrock_token_provider: AsyncBedrockTokenProvider | None = None,
aws_region: str | None = None,
base_url: str | None = None,
organization: str | None = None,
project: str | None = None,
user: NotGivenOr[str] = NOT_GIVEN,
safety_identifier: NotGivenOr[str] = NOT_GIVEN,
prompt_cache_key: NotGivenOr[str] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
timeout: httpx.Timeout | None = None,
reasoning_effort: NotGivenOr[ReasoningEffort] = NOT_GIVEN,
top_p: NotGivenOr[float] = NOT_GIVEN,
verbosity: NotGivenOr[Verbosity] = NOT_GIVEN,
max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
) -> LLM:
"""
Create a new instance of a Chat Completions LLM backed by OpenAI models on
Amazon Bedrock.

Amazon Bedrock exposes an OpenAI-compatible endpoint for OpenAI's open-weight
models (e.g. ``openai.gpt-oss-120b``). This builds an
``openai.AsyncBedrockOpenAI`` client, which resolves the regional Bedrock
endpoint (``https://bedrock-mantle.<region>.api.aws/openai/v1``) and sends the
Bedrock bearer token in the ``Authorization`` header.

Only the ``gpt-oss`` models are available over Chat Completions on Bedrock. The
``gpt-5.x`` models are Responses-only — use
:meth:`livekit.plugins.openai.responses.LLM.with_aws_bedrock` for those.

This automatically infers the following arguments from their corresponding
environment variables if they are not provided:
- ``api_key`` from ``AWS_BEARER_TOKEN_BEDROCK``
- ``aws_region`` from ``AWS_REGION`` or ``AWS_DEFAULT_REGION``
- ``base_url`` from ``AWS_BEDROCK_BASE_URL``

For refreshable credentials, pass ``bedrock_token_provider`` (for example
``aws_bedrock_token_generator.provide_token``) instead of ``api_key``; the two
are mutually exclusive.
"""
bedrock_client = openai.AsyncBedrockOpenAI(
api_key=api_key,
bedrock_token_provider=bedrock_token_provider,
aws_region=aws_region,
base_url=resolve_bedrock_base_url(model, aws_region, base_url),
organization=organization,
project=project,
max_retries=0,
timeout=timeout
if timeout
else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
)

llm = LLM(
model=model,
client=bedrock_client,
user=user,
temperature=temperature,
parallel_tool_calls=parallel_tool_calls,
tool_choice=tool_choice,
reasoning_effort=reasoning_effort,
safety_identifier=safety_identifier,
prompt_cache_key=prompt_cache_key,
top_p=top_p,
verbosity=verbosity,
max_completion_tokens=max_completion_tokens,
# Bedrock currently serves the gpt-oss open-weight models behind an
# OpenAI-compatible shim, like the other open-model providers above.
_strict_tool_schema=False,
)
llm._owns_client = True
return llm

@staticmethod
def with_cerebras(
*,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,32 @@
"E5-Mistral-7B-Instruct",
]

# OpenAI models served through Amazon Bedrock's OpenAI-compatible `bedrock-mantle`
# endpoint. Model IDs on this endpoint omit the `-1:0` Bedrock-native version suffix
# (that suffix is only used by the bedrock-runtime InvokeModel/Converse APIs).

# Models exposed over the Chat Completions API (gpt-oss only).
# See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-openai-gpt-oss-120b.html
BedrockChatModels = Literal[
"openai.gpt-oss-20b",
"openai.gpt-oss-120b",
]

# Models exposed over the Responses API. The gpt-5.x models are Responses-only on
# Bedrock; gpt-oss supports both Responses and Chat Completions.
# See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-openai-gpt-55.html
BedrockResponsesModels = Literal[
"openai.gpt-5.5",
"openai.gpt-5.4",
"openai.gpt-oss-120b",
"openai.gpt-oss-20b",
]


def _supports_reasoning_effort(model: ChatModels | str) -> bool:
# Amazon Bedrock exposes the same models under an ``openai.`` prefix
# (e.g. ``openai.gpt-5.4``); normalize it so the defaults below still apply.
model = model.removeprefix("openai.")
return model in [
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
"gpt-5.4",
"gpt-5.4-mini",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
from openai.types.shared_params import ResponsesModel

from ..log import logger
from ..models import _supports_reasoning_effort
from ..models import BedrockResponsesModels, _supports_reasoning_effort
from ..tools import OpenAITool
from ..utils import AsyncBedrockTokenProvider, resolve_bedrock_base_url

ServiceTier = Literal["auto", "default", "flex", "scale", "priority"]
Verbosity = Literal["low", "medium", "high"]
Expand Down Expand Up @@ -186,7 +187,7 @@ def __init__(
super().__init__()

if not is_given(reasoning) and _supports_reasoning_effort(model):
if model in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
if model.removeprefix("openai.") in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
reasoning = Reasoning(effort="none")
else:
reasoning = Reasoning(effort="minimal")
Expand Down Expand Up @@ -250,6 +251,79 @@ def __init__(
),
)

@staticmethod
def with_aws_bedrock(
*,
model: str | BedrockResponsesModels = "openai.gpt-5.5",
api_key: str | None = None,
bedrock_token_provider: AsyncBedrockTokenProvider | None = None,
aws_region: str | None = None,
base_url: str | None = None,
organization: str | None = None,
project: str | None = None,
user: NotGivenOr[str] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
reasoning: NotGivenOr[Reasoning] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice | Literal["auto", "required", "none"]] = NOT_GIVEN,
store: NotGivenOr[bool] = NOT_GIVEN,
metadata: NotGivenOr[dict[str, str]] = NOT_GIVEN,
service_tier: NotGivenOr[ServiceTier] = NOT_GIVEN,
verbosity: NotGivenOr[Verbosity] = NOT_GIVEN,
max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
timeout: httpx.Timeout | None = None,
) -> LLM:
"""
Create a new instance of a Responses API LLM backed by OpenAI models on Amazon
Bedrock.

Bedrock serves the ``gpt-5.x`` models (e.g. ``openai.gpt-5.5``,
``openai.gpt-5.4``) and the ``gpt-oss`` models over the OpenAI-compatible
Responses API on the regional ``bedrock-mantle`` endpoint. This builds an
``openai.AsyncBedrockOpenAI`` client; the WebSocket transport is disabled
automatically since Bedrock only exposes the HTTP Responses path.

This automatically infers the following arguments from their corresponding
environment variables if they are not provided:
- ``api_key`` from ``AWS_BEARER_TOKEN_BEDROCK``
- ``aws_region`` from ``AWS_REGION`` or ``AWS_DEFAULT_REGION``
- ``base_url`` from ``AWS_BEDROCK_BASE_URL``

For refreshable credentials, pass ``bedrock_token_provider`` (for example
``aws_bedrock_token_generator.provide_token``) instead of ``api_key``; the two
are mutually exclusive.
"""
bedrock_client = openai.AsyncBedrockOpenAI(
api_key=api_key,
bedrock_token_provider=bedrock_token_provider,
aws_region=aws_region,
base_url=resolve_bedrock_base_url(model, aws_region, base_url),
organization=organization,
project=project,
max_retries=0,
timeout=timeout
if timeout
else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
)

llm = LLM(
model=model,
client=bedrock_client,
use_websocket=False,
user=user,
temperature=temperature,
parallel_tool_calls=parallel_tool_calls,
reasoning=reasoning,
tool_choice=tool_choice,
store=store,
metadata=metadata,
service_tier=service_tier,
verbosity=verbosity,
max_output_tokens=max_output_tokens,
)
llm._owns_client = True
return llm

async def aclose(self) -> None:
if self._ws:
await self._ws.aclose()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,42 @@

AsyncAzureADTokenProvider = Callable[[], str | Awaitable[str]]

# Returns a fresh Amazon Bedrock bearer token for each request. Useful for
# short-lived credentials, e.g. ``aws_bedrock_token_generator.provide_token``.
AsyncBedrockTokenProvider = Callable[[], str | Awaitable[str]]


def get_base_url(base_url: str | None) -> str:
if not base_url:
base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
return base_url


__all__ = ["get_base_url", "AsyncAzureADTokenProvider"]
def resolve_bedrock_base_url(
model: str, aws_region: str | None, base_url: str | None
) -> str | None:
"""Resolve the ``bedrock-mantle`` base URL for ``model``.

On Bedrock's mantle endpoint the ``gpt-oss`` open-weight models are served on the
``/v1`` path, while the ``gpt-5.x`` models are served on ``/openai/v1``. The openai
SDK's ``AsyncBedrockOpenAI`` only ever derives ``/openai/v1``, so resolve the ``/v1``
URL for ``gpt-oss`` here. For every other case (explicit ``base_url``, an
``AWS_BEDROCK_BASE_URL`` override, a non-gpt-oss model, or an unresolved region) the
value is returned unchanged so the SDK keeps its default behaviour.
"""
if base_url is not None or not model.startswith("openai.gpt-oss"):
return base_url
if os.environ.get("AWS_BEDROCK_BASE_URL"):
return base_url
region = aws_region or os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
if not region:
return base_url
return f"https://bedrock-mantle.{region}.api.aws/v1"


__all__ = [
"get_base_url",
"resolve_bedrock_base_url",
"AsyncAzureADTokenProvider",
"AsyncBedrockTokenProvider",
]
2 changes: 1 addition & 1 deletion livekit-plugins/livekit-plugins-openai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
]
dependencies = [
"livekit-agents[codecs, images]>=1.5.18",
"openai[realtime]>=2.36",
"openai[realtime]>=2.40",
]


Expand Down
Loading
Loading