livekit · piyush-gambhir · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026 · Jun 6, 2026
diff --git a/livekit-plugins/livekit-plugins-openai/README.md b/livekit-plugins/livekit-plugins-openai/README.md
@@ -2,7 +2,7 @@
 
 Support for OpenAI Realtime API, Responses API, LLM, TTS, and STT APIs.
 
-Also includes support for a large number of OpenAI-compatible APIs including [Azure OpenAI](https://docs.livekit.io/agents/integrations/llm/azure-openai/), [Cerebras](https://docs.livekit.io/agents/integrations/cerebras/), [Fireworks](https://docs.livekit.io/agents/integrations/llm/fireworks/), [Perplexity](https://docs.livekit.io/agents/integrations/llm/perplexity/), [Telnyx](https://docs.livekit.io/agents/integrations/llm/telnyx/), [xAI](https://docs.livekit.io/agents/integrations/llm/xai/), [Ollama](https://docs.livekit.io/agents/integrations/llm/ollama/), [DeepSeek](https://docs.livekit.io/agents/integrations/llm/deepseek/), and [SambaNova](https://docs.livekit.io/agents/integrations/llm/sambanova/).
+Also includes support for a large number of OpenAI-compatible APIs including [Azure OpenAI](https://docs.livekit.io/agents/integrations/llm/azure-openai/), [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-openai.html), [Cerebras](https://docs.livekit.io/agents/integrations/cerebras/), [Fireworks](https://docs.livekit.io/agents/integrations/llm/fireworks/), [Perplexity](https://docs.livekit.io/agents/integrations/llm/perplexity/), [Telnyx](https://docs.livekit.io/agents/integrations/llm/telnyx/), [xAI](https://docs.livekit.io/agents/integrations/llm/xai/), [Ollama](https://docs.livekit.io/agents/integrations/llm/ollama/), [DeepSeek](https://docs.livekit.io/agents/integrations/llm/deepseek/), and [SambaNova](https://docs.livekit.io/agents/integrations/llm/sambanova/).
 
 See [https://docs.livekit.io/agents/integrations/openai/](https://docs.livekit.io/agents/integrations/openai/) for more information.
 

diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/__init__.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/__init__.py
@@ -16,8 +16,9 @@
 
 Support for OpenAI Realtime API, LLM, TTS, and STT APIs.
 
-Also includes support for a large number of OpenAI-compatible APIs including Azure OpenAI, Cerebras,
-Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, and OVHcloud AI Endpoints.
+Also includes support for a large number of OpenAI-compatible APIs including Azure OpenAI, Amazon
+Bedrock, Cerebras, Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, and OVHcloud
+AI Endpoints.
 
 See https://docs.livekit.io/agents/integrations/openai/ and
 https://docs.livekit.io/agents/integrations/llm/ for more information.

diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/llm.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/llm.py
@@ -40,6 +40,7 @@
 from openai.types.chat import ChatCompletionToolChoiceOptionParam, completion_create_params
 
 from .models import (
+    BedrockChatModels,
     CerebrasChatModels,
     ChatModels,
     CometAPIChatModels,
@@ -55,7 +56,11 @@
     XAIChatModels,
     _supports_reasoning_effort,
 )
-from .utils import AsyncAzureADTokenProvider
+from .utils import (
+    AsyncAzureADTokenProvider,
+    AsyncBedrockTokenProvider,
+    resolve_bedrock_base_url,
+)
 
 lk_oai_debug = int(os.getenv("LK_OPENAI_DEBUG", 0))
 
@@ -124,7 +129,7 @@ def __init__(
         super().__init__()
 
         if not is_given(reasoning_effort) and _supports_reasoning_effort(model):
-            if model in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
+            if model.removeprefix("openai.") in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
                 reasoning_effort = "none"
             else:
                 reasoning_effort = "minimal"
@@ -255,6 +260,85 @@ def with_azure(
         llm._owns_client = True
         return llm
 
+    @staticmethod
+    def with_aws_bedrock(
+        *,
+        model: str | BedrockChatModels = "openai.gpt-oss-120b",
+        api_key: str | None = None,
+        bedrock_token_provider: AsyncBedrockTokenProvider | None = None,
+        aws_region: str | None = None,
+        base_url: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        user: NotGivenOr[str] = NOT_GIVEN,
+        safety_identifier: NotGivenOr[str] = NOT_GIVEN,
+        prompt_cache_key: NotGivenOr[str] = NOT_GIVEN,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+        parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
+        timeout: httpx.Timeout | None = None,
+        reasoning_effort: NotGivenOr[ReasoningEffort] = NOT_GIVEN,
+        top_p: NotGivenOr[float] = NOT_GIVEN,
+        verbosity: NotGivenOr[Verbosity] = NOT_GIVEN,
+        max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
+    ) -> LLM:
+        """
+        Create a new instance of a Chat Completions LLM backed by OpenAI models on
+        Amazon Bedrock.
+
+        Amazon Bedrock exposes an OpenAI-compatible endpoint for OpenAI's open-weight
+        models (e.g. ``openai.gpt-oss-120b``). This builds an
+        ``openai.AsyncBedrockOpenAI`` client, which resolves the regional Bedrock
+        endpoint (``https://bedrock-mantle.<region>.api.aws/openai/v1``) and sends the
+        Bedrock bearer token in the ``Authorization`` header.
+
+        Only the ``gpt-oss`` models are available over Chat Completions on Bedrock. The
+        ``gpt-5.x`` models are Responses-only — use
+        :meth:`livekit.plugins.openai.responses.LLM.with_aws_bedrock` for those.
+
+        This automatically infers the following arguments from their corresponding
+        environment variables if they are not provided:
+        - ``api_key`` from ``AWS_BEARER_TOKEN_BEDROCK``
+        - ``aws_region`` from ``AWS_REGION`` or ``AWS_DEFAULT_REGION``
+        - ``base_url`` from ``AWS_BEDROCK_BASE_URL``
+
+        For refreshable credentials, pass ``bedrock_token_provider`` (for example
+        ``aws_bedrock_token_generator.provide_token``) instead of ``api_key``; the two
+        are mutually exclusive.
+        """
+        bedrock_client = openai.AsyncBedrockOpenAI(
+            api_key=api_key,
+            bedrock_token_provider=bedrock_token_provider,
+            aws_region=aws_region,
+            base_url=resolve_bedrock_base_url(model, aws_region, base_url),
+            organization=organization,
+            project=project,
+            max_retries=0,
+            timeout=timeout
+            if timeout
+            else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
+        )
+
+        llm = LLM(
+            model=model,
+            client=bedrock_client,
+            user=user,
+            temperature=temperature,
+            parallel_tool_calls=parallel_tool_calls,
+            tool_choice=tool_choice,
+            reasoning_effort=reasoning_effort,
+            safety_identifier=safety_identifier,
+            prompt_cache_key=prompt_cache_key,
+            top_p=top_p,
+            verbosity=verbosity,
+            max_completion_tokens=max_completion_tokens,
+            # Bedrock currently serves the gpt-oss open-weight models behind an
+            # OpenAI-compatible shim, like the other open-model providers above.
+            _strict_tool_schema=False,
+        )
+        llm._owns_client = True
+        return llm
+
     @staticmethod
     def with_cerebras(
         *,

diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/models.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/models.py
@@ -292,8 +292,32 @@
     "E5-Mistral-7B-Instruct",
 ]
 
+# OpenAI models served through Amazon Bedrock's OpenAI-compatible `bedrock-mantle`
+# endpoint. Model IDs on this endpoint omit the `-1:0` Bedrock-native version suffix
+# (that suffix is only used by the bedrock-runtime InvokeModel/Converse APIs).
+
+# Models exposed over the Chat Completions API (gpt-oss only).
+# See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-openai-gpt-oss-120b.html
+BedrockChatModels = Literal[
+    "openai.gpt-oss-20b",
+    "openai.gpt-oss-120b",
+]
+
+# Models exposed over the Responses API. The gpt-5.x models are Responses-only on
+# Bedrock; gpt-oss supports both Responses and Chat Completions.
+# See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-openai-gpt-55.html
+BedrockResponsesModels = Literal[
+    "openai.gpt-5.5",
+    "openai.gpt-5.4",
+    "openai.gpt-oss-120b",
+    "openai.gpt-oss-20b",
+]
+
 
 def _supports_reasoning_effort(model: ChatModels | str) -> bool:
+    # Amazon Bedrock exposes the same models under an ``openai.`` prefix
+    # (e.g. ``openai.gpt-5.4``); normalize it so the defaults below still apply.
+    model = model.removeprefix("openai.")
     return model in [
         "gpt-5.4",
         "gpt-5.4-mini",

diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py
@@ -42,8 +42,9 @@
 from openai.types.shared_params import ResponsesModel
 
 from ..log import logger
-from ..models import _supports_reasoning_effort
+from ..models import BedrockResponsesModels, _supports_reasoning_effort
 from ..tools import OpenAITool
+from ..utils import AsyncBedrockTokenProvider, resolve_bedrock_base_url
 
 ServiceTier = Literal["auto", "default", "flex", "scale", "priority"]
 Verbosity = Literal["low", "medium", "high"]
@@ -186,7 +187,7 @@ def __init__(
         super().__init__()
 
         if not is_given(reasoning) and _supports_reasoning_effort(model):
-            if model in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
+            if model.removeprefix("openai.") in ["gpt-5.1", "gpt-5.2", "gpt-5.4"]:
                 reasoning = Reasoning(effort="none")
             else:
                 reasoning = Reasoning(effort="minimal")
@@ -250,6 +251,79 @@ def __init__(
                 ),
             )
 
+    @staticmethod
+    def with_aws_bedrock(
+        *,
+        model: str | BedrockResponsesModels = "openai.gpt-5.5",
+        api_key: str | None = None,
+        bedrock_token_provider: AsyncBedrockTokenProvider | None = None,
+        aws_region: str | None = None,
+        base_url: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        user: NotGivenOr[str] = NOT_GIVEN,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+        parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
+        reasoning: NotGivenOr[Reasoning] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice | Literal["auto", "required", "none"]] = NOT_GIVEN,
+        store: NotGivenOr[bool] = NOT_GIVEN,
+        metadata: NotGivenOr[dict[str, str]] = NOT_GIVEN,
+        service_tier: NotGivenOr[ServiceTier] = NOT_GIVEN,
+        verbosity: NotGivenOr[Verbosity] = NOT_GIVEN,
+        max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
+        timeout: httpx.Timeout | None = None,
+    ) -> LLM:
+        """
+        Create a new instance of a Responses API LLM backed by OpenAI models on Amazon
+        Bedrock.
+
+        Bedrock serves the ``gpt-5.x`` models (e.g. ``openai.gpt-5.5``,
+        ``openai.gpt-5.4``) and the ``gpt-oss`` models over the OpenAI-compatible
+        Responses API on the regional ``bedrock-mantle`` endpoint. This builds an
+        ``openai.AsyncBedrockOpenAI`` client; the WebSocket transport is disabled
+        automatically since Bedrock only exposes the HTTP Responses path.
+
+        This automatically infers the following arguments from their corresponding
+        environment variables if they are not provided:
+        - ``api_key`` from ``AWS_BEARER_TOKEN_BEDROCK``
+        - ``aws_region`` from ``AWS_REGION`` or ``AWS_DEFAULT_REGION``
+        - ``base_url`` from ``AWS_BEDROCK_BASE_URL``
+
+        For refreshable credentials, pass ``bedrock_token_provider`` (for example
+        ``aws_bedrock_token_generator.provide_token``) instead of ``api_key``; the two
+        are mutually exclusive.
+        """
+        bedrock_client = openai.AsyncBedrockOpenAI(
+            api_key=api_key,
+            bedrock_token_provider=bedrock_token_provider,
+            aws_region=aws_region,
+            base_url=resolve_bedrock_base_url(model, aws_region, base_url),
+            organization=organization,
+            project=project,
+            max_retries=0,
+            timeout=timeout
+            if timeout
+            else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
+        )
+
+        llm = LLM(
+            model=model,
+            client=bedrock_client,
+            use_websocket=False,
+            user=user,
+            temperature=temperature,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning=reasoning,
+            tool_choice=tool_choice,
+            store=store,
+            metadata=metadata,
+            service_tier=service_tier,
+            verbosity=verbosity,
+            max_output_tokens=max_output_tokens,
+        )
+        llm._owns_client = True
+        return llm
+
     async def aclose(self) -> None:
         if self._ws:
             await self._ws.aclose()

diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py
@@ -5,11 +5,42 @@
 
 AsyncAzureADTokenProvider = Callable[[], str | Awaitable[str]]
 
+# Returns a fresh Amazon Bedrock bearer token for each request. Useful for
+# short-lived credentials, e.g. ``aws_bedrock_token_generator.provide_token``.
+AsyncBedrockTokenProvider = Callable[[], str | Awaitable[str]]
+
 
 def get_base_url(base_url: str | None) -> str:
     if not base_url:
         base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
     return base_url
 
 
-__all__ = ["get_base_url", "AsyncAzureADTokenProvider"]
+def resolve_bedrock_base_url(
+    model: str, aws_region: str | None, base_url: str | None
+) -> str | None:
+    """Resolve the ``bedrock-mantle`` base URL for ``model``.
+
+    On Bedrock's mantle endpoint the ``gpt-oss`` open-weight models are served on the
+    ``/v1`` path, while the ``gpt-5.x`` models are served on ``/openai/v1``. The openai
+    SDK's ``AsyncBedrockOpenAI`` only ever derives ``/openai/v1``, so resolve the ``/v1``
+    URL for ``gpt-oss`` here. For every other case (explicit ``base_url``, an
+    ``AWS_BEDROCK_BASE_URL`` override, a non-gpt-oss model, or an unresolved region) the
+    value is returned unchanged so the SDK keeps its default behaviour.
+    """
+    if base_url is not None or not model.startswith("openai.gpt-oss"):
+        return base_url
+    if os.environ.get("AWS_BEDROCK_BASE_URL"):
+        return base_url
+    region = aws_region or os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
+    if not region:
+        return base_url
+    return f"https://bedrock-mantle.{region}.api.aws/v1"
+
+
+__all__ = [
+    "get_base_url",
+    "resolve_bedrock_base_url",
+    "AsyncAzureADTokenProvider",
+    "AsyncBedrockTokenProvider",
+]
diff --git a/livekit-plugins/livekit-plugins-openai/pyproject.toml b/livekit-plugins/livekit-plugins-openai/pyproject.toml
@@ -23,7 +23,7 @@ classifiers = [
 ]
 dependencies = [
     "livekit-agents[codecs, images]>=1.5.18",
-    "openai[realtime]>=2.36",
+    "openai[realtime]>=2.40",
 ]