braintrustdata · Abhijeet Prasad (AbhiPrasad) · Mar 26, 2026 · Mar 27, 2026
diff --git a/py/noxfile.py b/py/noxfile.py
@@ -243,7 +243,7 @@ def test_litellm(session, version):
     # Install fastapi and orjson as they're required by litellm for proxy/responses operations
     session.install("openai<=1.99.9", "--force-reinstall", "fastapi", "orjson")
     _install(session, "litellm", version)
-    _run_tests(session, f"{WRAPPER_DIR}/test_litellm.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/litellm/test_litellm.py")
     _run_core_tests(session)
 
 

diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py
@@ -73,6 +73,9 @@ def is_equal(expected, output):
 from .integrations.anthropic import (
     wrap_anthropic,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
+from .integrations.litellm import (
+    wrap_litellm,  # noqa: F401 # type: ignore[reportUnusedImport]
+)
 from .logger import *
 from .logger import (
     _internal_get_global_state,  # noqa: F401 # type: ignore[reportUnusedImport]
@@ -92,9 +95,6 @@ def is_equal(expected, output):
     BT_IS_ASYNC_ATTRIBUTE,  # noqa: F401 # type: ignore[reportUnusedImport]
     MarkAsyncWrapper,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
-from .wrappers.litellm import (
-    wrap_litellm,  # noqa: F401 # type: ignore[reportUnusedImport]
-)
 from .wrappers.pydantic_ai import (
     setup_pydantic_ai,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
@@ -14,6 +14,7 @@
     ClaudeAgentSDKIntegration,
     DSPyIntegration,
     GoogleGenAIIntegration,
+    LiteLLMIntegration,
 )
 
 
@@ -116,7 +117,7 @@ def auto_instrument(
     if anthropic:
         results["anthropic"] = _instrument_integration(AnthropicIntegration)
     if litellm:
-        results["litellm"] = _instrument_litellm()
+        results["litellm"] = _instrument_integration(LiteLLMIntegration)
     if pydantic_ai:
         results["pydantic_ai"] = _instrument_pydantic_ai()
     if google_genai:
@@ -147,14 +148,6 @@ def _instrument_integration(integration) -> bool:
     return False
 
 
-def _instrument_litellm() -> bool:
-    with _try_patch():
-        from braintrust.wrappers.litellm import patch_litellm
-
-        return patch_litellm()
-    return False
-
-
 def _instrument_pydantic_ai() -> bool:
     with _try_patch():
         from braintrust.wrappers.pydantic_ai import setup_pydantic_ai

diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
@@ -4,6 +4,7 @@
 from .claude_agent_sdk import ClaudeAgentSDKIntegration
 from .dspy import DSPyIntegration
 from .google_genai import GoogleGenAIIntegration
+from .litellm import LiteLLMIntegration
 
 
 __all__ = [
@@ -13,4 +14,5 @@
     "ClaudeAgentSDKIntegration",
     "DSPyIntegration",
     "GoogleGenAIIntegration",
+    "LiteLLMIntegration",
 ]
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py
@@ -1,24 +1,29 @@
 """Test auto_instrument for LiteLLM."""
 
+from pathlib import Path
+
 import litellm
 from braintrust.auto import auto_instrument
+from braintrust.integrations.litellm import LiteLLMIntegration
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
 
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "litellm" / "cassettes"
+
 # 1. Verify not patched initially
-assert not hasattr(litellm, "_braintrust_wrapped")
+assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 2. Instrument
 results = auto_instrument()
 assert results.get("litellm") == True
-assert hasattr(litellm, "_braintrust_wrapped")
+assert LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 3. Idempotent
 results2 = auto_instrument()
 assert results2.get("litellm") == True
 
 # 4. Make API call and verify span
-with autoinstrument_test_context("test_auto_litellm") as memory_logger:
+with autoinstrument_test_context("test_auto_litellm", cassettes_dir=_CASSETTES_DIR) as memory_logger:
     response = litellm.completion(
         model="gpt-4o-mini",
         messages=[{"role": "user", "content": "Say hi"}],

diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_patch_litellm_aresponses.py b/py/src/braintrust/integrations/auto_test_scripts/test_patch_litellm_aresponses.py
@@ -1,17 +1,20 @@
 """Test that patch_litellm() patches aresponses."""
 
 import asyncio
+from pathlib import Path
 
 import litellm
-from braintrust.wrappers.litellm import patch_litellm
+from braintrust.integrations.litellm import patch_litellm
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
 
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "litellm" / "cassettes"
+
 patch_litellm()
 
 
 async def main():
-    with autoinstrument_test_context("test_patch_litellm_aresponses") as memory_logger:
+    with autoinstrument_test_context("test_patch_litellm_aresponses", cassettes_dir=_CASSETTES_DIR) as memory_logger:
         response = await litellm.aresponses(
             model="gpt-4o-mini",
             input="What's 12 + 12?",

diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_patch_litellm_responses.py b/py/src/braintrust/integrations/auto_test_scripts/test_patch_litellm_responses.py
@@ -1,13 +1,17 @@
 """Test that patch_litellm() patches responses."""
 
+from pathlib import Path
+
 import litellm
-from braintrust.wrappers.litellm import patch_litellm
+from braintrust.integrations.litellm import patch_litellm
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
 
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "litellm" / "cassettes"
+
 patch_litellm()
 
-with autoinstrument_test_context("test_patch_litellm_responses") as memory_logger:
+with autoinstrument_test_context("test_patch_litellm_responses", cassettes_dir=_CASSETTES_DIR) as memory_logger:
     response = litellm.responses(
         model="gpt-4o-mini",
         input="What's 12 + 12?",

diff --git a/py/src/braintrust/integrations/dspy/tracing.py b/py/src/braintrust/integrations/dspy/tracing.py
@@ -50,7 +50,7 @@ class BraintrustDSpyCallback(BaseCallback):
         and disable DSPy's disk cache:
 
         ```python
-        from braintrust.wrappers.litellm import patch_litellm
+        from braintrust.integrations.litellm import patch_litellm
         patch_litellm()
 
         import dspy

diff --git a/py/src/braintrust/integrations/litellm/__init__.py b/py/src/braintrust/integrations/litellm/__init__.py
@@ -0,0 +1,40 @@
+"""Braintrust LiteLLM integration."""
+
+from .integration import LiteLLMIntegration
+from .patchers import wrap_litellm
+
+
+def patch_litellm() -> bool:
+    """Patch LiteLLM to add Braintrust tracing.
+
+    This wraps litellm.completion, litellm.acompletion, litellm.responses,
+    litellm.aresponses, litellm.embedding, and litellm.moderation to
+    automatically create Braintrust spans with detailed token metrics,
+    timing, and costs.
+
+    Returns:
+        True if LiteLLM was patched (or already patched), False if LiteLLM is not installed.
+
+    Example:
+        ```python
+        import braintrust
+        braintrust.patch_litellm()
+
+        import litellm
+        from braintrust import init_logger
+
+        logger = init_logger(project="my-project")
+        response = litellm.completion(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Hello"}]
+        )
+        ```
+    """
+    return LiteLLMIntegration.setup()
+
+
+__all__ = [
+    "LiteLLMIntegration",
+    "patch_litellm",
+    "wrap_litellm",
+]
diff --git a/...wrappers/cassettes/test_auto_litellm.yaml → .../litellm/cassettes/test_auto_litellm.yaml b/...wrappers/cassettes/test_auto_litellm.yaml → .../litellm/cassettes/test_auto_litellm.yaml
diff --git a/...tes/test_litellm_acompletion_metrics.yaml → ...tes/test_litellm_acompletion_metrics.yaml b/...tes/test_litellm_acompletion_metrics.yaml → ...tes/test_litellm_acompletion_metrics.yaml
diff --git a/..._litellm_acompletion_streaming_async.yaml → ..._litellm_acompletion_streaming_async.yaml b/..._litellm_acompletion_streaming_async.yaml → ..._litellm_acompletion_streaming_async.yaml
diff --git a/...tellm_acompletion_with_system_prompt.yaml → ...tellm_acompletion_with_system_prompt.yaml b/...tellm_acompletion_with_system_prompt.yaml → ...tellm_acompletion_with_system_prompt.yaml
diff --git a/...ttes/test_litellm_aresponses_metrics.yaml → ...ttes/test_litellm_aresponses_metrics.yaml b/...ttes/test_litellm_aresponses_metrics.yaml → ...ttes/test_litellm_aresponses_metrics.yaml
diff --git a/...t_litellm_aresponses_streaming_async.yaml → ...t_litellm_aresponses_streaming_async.yaml b/...t_litellm_aresponses_streaming_async.yaml → ...t_litellm_aresponses_streaming_async.yaml
diff --git a/...test_litellm_async_parallel_requests.yaml → ...test_litellm_async_parallel_requests.yaml b/...test_litellm_async_parallel_requests.yaml → ...test_litellm_async_parallel_requests.yaml
diff --git a/...t_litellm_async_streaming_with_break.yaml → ...t_litellm_async_streaming_with_break.yaml b/...t_litellm_async_streaming_with_break.yaml → ...t_litellm_async_streaming_with_break.yaml
diff --git a/...ttes/test_litellm_completion_metrics.yaml → ...ttes/test_litellm_completion_metrics.yaml b/...ttes/test_litellm_completion_metrics.yaml → ...ttes/test_litellm_completion_metrics.yaml
diff --git a/...st_litellm_completion_streaming_sync.yaml → ...st_litellm_completion_streaming_sync.yaml b/...st_litellm_completion_streaming_sync.yaml → ...st_litellm_completion_streaming_sync.yaml
diff --git a/...itellm_completion_with_system_prompt.yaml → ...itellm_completion_with_system_prompt.yaml b/...itellm_completion_with_system_prompt.yaml → ...itellm_completion_with_system_prompt.yaml
diff --git a/...rs/cassettes/test_litellm_embeddings.yaml → ...lm/cassettes/test_litellm_embeddings.yaml b/...rs/cassettes/test_litellm_embeddings.yaml → ...lm/cassettes/test_litellm_embeddings.yaml
diff --git a/...rs/cassettes/test_litellm_moderation.yaml → ...lm/cassettes/test_litellm_moderation.yaml b/...rs/cassettes/test_litellm_moderation.yaml → ...lm/cassettes/test_litellm_moderation.yaml
diff --git a/...lm_openrouter_no_booleans_in_metrics.yaml → ...lm_openrouter_no_booleans_in_metrics.yaml b/...lm_openrouter_no_booleans_in_metrics.yaml → ...lm_openrouter_no_booleans_in_metrics.yaml
diff --git a/...ettes/test_litellm_responses_metrics.yaml → ...ettes/test_litellm_responses_metrics.yaml b/...ettes/test_litellm_responses_metrics.yaml → ...ettes/test_litellm_responses_metrics.yaml
diff --git a/...est_litellm_responses_streaming_sync.yaml → ...est_litellm_responses_streaming_sync.yaml b/...est_litellm_responses_streaming_sync.yaml → ...est_litellm_responses_streaming_sync.yaml
diff --git a/...rs/cassettes/test_litellm_tool_calls.yaml → ...lm/cassettes/test_litellm_tool_calls.yaml b/...rs/cassettes/test_litellm_tool_calls.yaml → ...lm/cassettes/test_litellm_tool_calls.yaml
diff --git a/...settes/test_patch_litellm_aresponses.yaml → ...settes/test_patch_litellm_aresponses.yaml b/...settes/test_patch_litellm_aresponses.yaml → ...settes/test_patch_litellm_aresponses.yaml
diff --git a/...ssettes/test_patch_litellm_responses.yaml → ...ssettes/test_patch_litellm_responses.yaml b/...ssettes/test_patch_litellm_responses.yaml → ...ssettes/test_patch_litellm_responses.yaml
diff --git a/py/src/braintrust/integrations/litellm/integration.py b/py/src/braintrust/integrations/litellm/integration.py
@@ -0,0 +1,13 @@
+"""LiteLLM integration definition."""
+
+from braintrust.integrations.base import BaseIntegration
+
+from .patchers import _ALL_LITELLM_PATCHERS
+
+
+class LiteLLMIntegration(BaseIntegration):
+    """Braintrust instrumentation for the LiteLLM Python SDK."""
+
+    name = "litellm"
+    import_names = ("litellm",)
+    patchers = _ALL_LITELLM_PATCHERS
diff --git a/py/src/braintrust/integrations/litellm/patchers.py b/py/src/braintrust/integrations/litellm/patchers.py
@@ -0,0 +1,109 @@
+"""LiteLLM patchers — FunctionWrapperPatcher subclasses for each patch target."""
+
+from typing import Any
+
+from braintrust.integrations.base import FunctionWrapperPatcher
+
+from .tracing import (
+    _acompletion_wrapper_async,
+    _aresponses_wrapper_async,
+    _completion_wrapper,
+    _embedding_wrapper,
+    _moderation_wrapper,
+    _responses_wrapper,
+)
+
+
+# ---------------------------------------------------------------------------
+# Individual patchers
+# ---------------------------------------------------------------------------
+
+
+class LiteLLMCompletionPatcher(FunctionWrapperPatcher):
+    name = "litellm.completion"
+    target_path = "completion"
+    wrapper = _completion_wrapper
+
+
+class LiteLLMAcompletionPatcher(FunctionWrapperPatcher):
+    name = "litellm.acompletion"
+    target_path = "acompletion"
+    wrapper = _acompletion_wrapper_async
+
+
+class LiteLLMResponsesPatcher(FunctionWrapperPatcher):
+    name = "litellm.responses"
+    target_path = "responses"
+    wrapper = _responses_wrapper
+
+
+class LiteLLMAresponsesPatcher(FunctionWrapperPatcher):
+    name = "litellm.aresponses"
+    target_path = "aresponses"
+    wrapper = _aresponses_wrapper_async
+
+
+class LiteLLMEmbeddingPatcher(FunctionWrapperPatcher):
+    name = "litellm.embedding"
+    target_path = "embedding"
+    wrapper = _embedding_wrapper
+
+
+class LiteLLMModerationPatcher(FunctionWrapperPatcher):
+    name = "litellm.moderation"
+    target_path = "moderation"
+    wrapper = _moderation_wrapper
+
+
+# ---------------------------------------------------------------------------
+# All patchers, in declaration order
+# ---------------------------------------------------------------------------
+
+_ALL_LITELLM_PATCHERS = (
+    LiteLLMCompletionPatcher,
+    LiteLLMAcompletionPatcher,
+    LiteLLMResponsesPatcher,
+    LiteLLMAresponsesPatcher,
+    LiteLLMEmbeddingPatcher,
+    LiteLLMModerationPatcher,
+)
+
+
+# ---------------------------------------------------------------------------
+# Manual wrapping helper
+# ---------------------------------------------------------------------------
+
+
+def wrap_litellm(litellm: Any) -> Any:
+    """Wrap a LiteLLM module to add Braintrust tracing.
+
+    Unlike :func:`patch_litellm`, which patches the globally-imported ``litellm``
+    module, this function instruments a specific module object (or any object
+    that exposes the same top-level callables such as ``completion``,
+    ``acompletion``, ``responses``, ``aresponses``, ``embedding``, and
+    ``moderation``).  Each patcher is applied idempotently — calling
+    ``wrap_litellm`` twice on the same object is safe.
+
+    Args:
+        litellm: The ``litellm`` module or a module-like object that exposes
+            the standard LiteLLM top-level functions.
+
+    Returns:
+        The same *litellm* object, with tracing wrappers applied in-place.
+
+    Example::
+
+        import litellm
+        from braintrust.integrations.litellm import wrap_litellm
+
+        wrap_litellm(litellm)
+
+        # All subsequent calls are automatically traced.
+        response = litellm.completion(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+    """
+    for patcher in _ALL_LITELLM_PATCHERS:
+        patcher.wrap_target(litellm)
+    return litellm