diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
index 5368ca93cc..371851b521 100644
--- a/src/google/adk/flows/llm_flows/base_llm_flow.py
+++ b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -25,6 +25,8 @@
 from typing import TYPE_CHECKING
 
 from google.genai import types
+from opentelemetry import context as otel_context
+from opentelemetry import trace
 from websockets.exceptions import ConnectionClosed
 from websockets.exceptions import ConnectionClosedOK
 
@@ -44,6 +46,7 @@
 from ...models.base_llm_connection import BaseLlmConnection
 from ...models.llm_request import LlmRequest
 from ...models.llm_response import LlmResponse
+
 from ...telemetry import tracing
 from ...telemetry.tracing import trace_call_llm
 from ...telemetry.tracing import trace_send_data
@@ -1127,7 +1130,17 @@ async def _call_llm_async(
     llm = self.__get_llm(invocation_context)
 
     async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
-      with tracer.start_as_current_span('call_llm') as span:
+      # Use explicit span management instead of start_as_current_span context
+      # manager to ensure span.end() is always called. In multi-agent scenarios
+      # with transfer_to_agent, the async generator may receive GeneratorExit
+      # after an async context switch (sub-agent execution). This causes
+      # context.detach() to raise ValueError (stale contextvars token), which
+      # prevents span.end() from being reached when using the context manager.
+      # See: https://github.com/google/adk-python/issues/4715
+      span = tracer.start_span('call_llm')
+      ctx = trace.set_span_in_context(span)
+      token = otel_context.attach(ctx)
+      try:
         if invocation_context.run_config.support_cfc:
           invocation_context.live_request_queue = LiveRequestQueue()
           responses_generator = self.run_live(invocation_context)
@@ -1187,6 +1200,12 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
                 llm_response = altered_llm_response
 
               yield llm_response
+      finally:
+        try:
+          otel_context.detach(token)
+        except ValueError:
+          pass
+        span.end()
 
     async with Aclosing(_call_llm_with_tracing()) as agen:
       async for event in agen:
diff --git a/tests/unittests/telemetry/test_functional.py b/tests/unittests/telemetry/test_functional.py
index 3b7d93c443..bfa88557a6 100644
--- a/tests/unittests/telemetry/test_functional.py
+++ b/tests/unittests/telemetry/test_functional.py
@@ -75,6 +75,9 @@ def do_replace(tracer):
     monkeypatch.setattr(
         tracer, 'start_as_current_span', real_tracer.start_as_current_span
     )
+    monkeypatch.setattr(
+        tracer, 'start_span', real_tracer.start_span
+    )
 
   do_replace(tracing.tracer)
   do_replace(base_agent.tracer)