diff --git a/src/agents/tracing/create.py b/src/agents/tracing/create.py index 6585eebf7a..26ac2fb599 100644 --- a/src/agents/tracing/create.py +++ b/src/agents/tracing/create.py @@ -94,6 +94,7 @@ def agent_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[AgentSpanData]: """Create a new agent span. The span will not be started automatically, you should either do `with agent_span() ...` or call `span.start()` + `span.finish()` manually. @@ -109,12 +110,20 @@ def agent_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. Returns: The newly created agent span. """ return get_trace_provider().create_span( - span_data=AgentSpanData(name=name, handoffs=handoffs, tools=tools, output_type=output_type), + span_data=AgentSpanData( + name=name, + handoffs=handoffs, + tools=tools, + output_type=output_type, + metadata=metadata, + ), span_id=span_id, parent=parent, disabled=disabled, @@ -126,10 +135,11 @@ def task_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[TaskSpanData]: """Create a new task span. This represents one top-level Runner invocation.""" return get_trace_provider().create_span( - span_data=TaskSpanData(name=name), + span_data=TaskSpanData(name=name, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -142,10 +152,11 @@ def turn_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[TurnSpanData]: """Create a new turn span. This represents one agent loop turn.""" return get_trace_provider().create_span( - span_data=TurnSpanData(turn=turn, agent_name=agent_name), + span_data=TurnSpanData(turn=turn, agent_name=agent_name, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -159,6 +170,7 @@ def function_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[FunctionSpanData]: """Create a new function span. The span will not be started automatically, you should either do `with function_span() ...` or call `span.start()` + `span.finish()` manually. @@ -173,12 +185,14 @@ def function_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. Returns: The newly created function span. """ return get_trace_provider().create_span( - span_data=FunctionSpanData(name=name, input=input, output=output), + span_data=FunctionSpanData(name=name, input=input, output=output, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -194,6 +208,7 @@ def generation_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[GenerationSpanData]: """Create a new generation span. The span will not be started automatically, you should either do `with generation_span() ...` or call `span.start()` + `span.finish()` manually. @@ -215,6 +230,8 @@ def generation_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. Returns: The newly created generation span. @@ -226,6 +243,7 @@ def generation_span( model=model, model_config=model_config, usage=usage, + metadata=metadata, ), span_id=span_id, parent=parent, @@ -238,6 +256,7 @@ def response_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[ResponseSpanData]: """Create a new response span. The span will not be started automatically, you should either do `with response_span() ...` or call `span.start()` + `span.finish()` manually. @@ -250,9 +269,11 @@ def response_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. """ return get_trace_provider().create_span( - span_data=ResponseSpanData(response=response), + span_data=ResponseSpanData(response=response, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -265,6 +286,7 @@ def handoff_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[HandoffSpanData]: """Create a new handoff span. The span will not be started automatically, you should either do `with handoff_span() ...` or call `span.start()` + `span.finish()` manually. @@ -278,12 +300,14 @@ def handoff_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. Returns: The newly created handoff span. """ return get_trace_provider().create_span( - span_data=HandoffSpanData(from_agent=from_agent, to_agent=to_agent), + span_data=HandoffSpanData(from_agent=from_agent, to_agent=to_agent, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -296,6 +320,7 @@ def custom_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[CustomSpanData]: """Create a new custom span, to which you can add your own metadata. The span will not be started automatically, you should either do `with custom_span() ...` or call @@ -303,19 +328,22 @@ def custom_span( Args: name: The name of the custom span. - data: Arbitrary structured data to associate with the span. + data: Arbitrary structured data to associate with the span. Lives inside ``span_data`` + in the exported payload. span_id: The ID of the span. Optional. If not provided, we will generate an ID. We recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are correctly formatted. parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field, distinct from ``data``. Returns: The newly created custom span. """ return get_trace_provider().create_span( - span_data=CustomSpanData(name=name, data=data or {}), + span_data=CustomSpanData(name=name, data=data or {}, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -328,6 +356,7 @@ def guardrail_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[GuardrailSpanData]: """Create a new guardrail span. The span will not be started automatically, you should either do `with guardrail_span() ...` or call `span.start()` + `span.finish()` manually. @@ -341,9 +370,11 @@ def guardrail_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. """ return get_trace_provider().create_span( - span_data=GuardrailSpanData(name=name, triggered=triggered), + span_data=GuardrailSpanData(name=name, triggered=triggered, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -359,6 +390,7 @@ def transcription_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[TranscriptionSpanData]: """Create a new transcription span. The span will not be started automatically, you should either do `with transcription_span() ...` or call `span.start()` + `span.finish()` manually. @@ -376,6 +408,8 @@ def transcription_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. Returns: The newly created speech-to-text span. @@ -387,6 +421,7 @@ def transcription_span( output=output, model=model, model_config=model_config, + metadata=metadata, ), span_id=span_id, parent=parent, @@ -404,6 +439,7 @@ def speech_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[SpeechSpanData]: """Create a new speech span. The span will not be started automatically, you should either do `with speech_span() ...` or call `span.start()` + `span.finish()` manually. @@ -421,6 +457,8 @@ def speech_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. """ return get_trace_provider().create_span( span_data=SpeechSpanData( @@ -430,6 +468,7 @@ def speech_span( output_format=output_format, model_config=model_config, first_content_at=first_content_at, + metadata=metadata, ), span_id=span_id, parent=parent, @@ -442,6 +481,7 @@ def speech_group_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[SpeechGroupSpanData]: """Create a new speech group span. The span will not be started automatically, you should either do `with speech_group_span() ...` or call `span.start()` + `span.finish()` manually. @@ -454,9 +494,11 @@ def speech_group_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. """ return get_trace_provider().create_span( - span_data=SpeechGroupSpanData(input=input), + span_data=SpeechGroupSpanData(input=input, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, @@ -469,6 +511,7 @@ def mcp_tools_span( span_id: str | None = None, parent: Trace | Span[Any] | None = None, disabled: bool = False, + metadata: dict[str, Any] | None = None, ) -> Span[MCPListToolsSpanData]: """Create a new MCP list tools span. The span will not be started automatically, you should either do `with mcp_tools_span() ...` or call `span.start()` + `span.finish()` manually. @@ -482,9 +525,11 @@ def mcp_tools_span( parent: The parent span or trace. If not provided, we will automatically use the current trace/span as the parent. disabled: If True, we will return a Span but the Span will not be recorded. + metadata: Optional dictionary of custom metadata to attach to the span. Surfaces under + the exported payload's top-level ``metadata`` field. """ return get_trace_provider().create_span( - span_data=MCPListToolsSpanData(server=server, result=result), + span_data=MCPListToolsSpanData(server=server, result=result, metadata=metadata), span_id=span_id, parent=parent, disabled=disabled, diff --git a/src/agents/tracing/span_data.py b/src/agents/tracing/span_data.py index d109ee5ead..0f2be0945d 100644 --- a/src/agents/tracing/span_data.py +++ b/src/agents/tracing/span_data.py @@ -138,7 +138,7 @@ class FunctionSpanData(SpanData): Includes input, output and MCP data (if applicable). """ - __slots__ = ("name", "input", "output", "mcp_data") + __slots__ = ("name", "input", "output", "mcp_data", "metadata") def __init__( self, @@ -146,11 +146,13 @@ def __init__( input: str | None, output: Any | None, mcp_data: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, ): self.name = name self.input = input self.output = output self.mcp_data = mcp_data + self.metadata = metadata @property def type(self) -> str: @@ -178,6 +180,7 @@ class GenerationSpanData(SpanData): "model", "model_config", "usage", + "metadata", ) def __init__( @@ -187,12 +190,14 @@ def __init__( model: str | None = None, model_config: Mapping[str, Any] | None = None, usage: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, ): self.input = input self.output = output self.model = model self.model_config = model_config self.usage = usage + self.metadata = metadata @property def type(self) -> str: @@ -215,19 +220,21 @@ class ResponseSpanData(SpanData): Includes response and input. """ - __slots__ = ("response", "input", "usage") + __slots__ = ("response", "input", "usage", "metadata") def __init__( self, response: Response | None = None, input: str | list[ResponseInputItemParam] | None = None, usage: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, ) -> None: self.response = response # This is not used by the OpenAI trace processors, but is useful for other tracing # processor implementations self.input = input self.usage = usage + self.metadata = metadata @property def type(self) -> str: @@ -247,11 +254,17 @@ class HandoffSpanData(SpanData): Includes source and destination agents. """ - __slots__ = ("from_agent", "to_agent") + __slots__ = ("from_agent", "to_agent", "metadata") - def __init__(self, from_agent: str | None, to_agent: str | None): + def __init__( + self, + from_agent: str | None, + to_agent: str | None, + metadata: dict[str, Any] | None = None, + ): self.from_agent = from_agent self.to_agent = to_agent + self.metadata = metadata @property def type(self) -> str: @@ -271,11 +284,17 @@ class CustomSpanData(SpanData): Includes name and data property bag. """ - __slots__ = ("name", "data") + __slots__ = ("name", "data", "metadata") - def __init__(self, name: str, data: dict[str, Any]): + def __init__( + self, + name: str, + data: dict[str, Any], + metadata: dict[str, Any] | None = None, + ): self.name = name self.data = data + self.metadata = metadata @property def type(self) -> str: @@ -295,11 +314,17 @@ class GuardrailSpanData(SpanData): Includes name and triggered status. """ - __slots__ = ("name", "triggered") + __slots__ = ("name", "triggered", "metadata") - def __init__(self, name: str, triggered: bool = False): + def __init__( + self, + name: str, + triggered: bool = False, + metadata: dict[str, Any] | None = None, + ): self.name = name self.triggered = triggered + self.metadata = metadata @property def type(self) -> str: @@ -324,6 +349,7 @@ class TranscriptionSpanData(SpanData): "output", "model", "model_config", + "metadata", ) def __init__( @@ -333,12 +359,14 @@ def __init__( output: str | None = None, model: str | None = None, model_config: Mapping[str, Any] | None = None, + metadata: dict[str, Any] | None = None, ): self.input = input self.input_format = input_format self.output = output self.model = model self.model_config = model_config + self.metadata = metadata @property def type(self) -> str: @@ -363,7 +391,7 @@ class SpeechSpanData(SpanData): Includes input, output, model, model configuration, and first content timestamp. """ - __slots__ = ("input", "output", "model", "model_config", "first_content_at") + __slots__ = ("input", "output", "model", "model_config", "first_content_at", "metadata") def __init__( self, @@ -373,6 +401,7 @@ def __init__( model: str | None = None, model_config: Mapping[str, Any] | None = None, first_content_at: str | None = None, + metadata: dict[str, Any] | None = None, ): self.input = input self.output = output @@ -380,6 +409,7 @@ def __init__( self.model = model self.model_config = model_config self.first_content_at = first_content_at + self.metadata = metadata @property def type(self) -> str: @@ -404,13 +434,15 @@ class SpeechGroupSpanData(SpanData): Represents a Speech Group Span in the trace. """ - __slots__ = "input" + __slots__ = ("input", "metadata") def __init__( self, input: str | None = None, + metadata: dict[str, Any] | None = None, ): self.input = input + self.metadata = metadata @property def type(self) -> str: @@ -432,11 +464,18 @@ class MCPListToolsSpanData(SpanData): __slots__ = ( "server", "result", + "metadata", ) - def __init__(self, server: str | None = None, result: list[str] | None = None): + def __init__( + self, + server: str | None = None, + result: list[str] | None = None, + metadata: dict[str, Any] | None = None, + ): self.server = server self.result = result + self.metadata = metadata @property def type(self) -> str: diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 1076a79cfa..b2170f1a2b 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -14,9 +14,17 @@ custom_span, function_span, generation_span, + guardrail_span, handoff_span, + mcp_tools_span, + response_span, set_trace_processors, + speech_group_span, + speech_span, + task_span, trace, + transcription_span, + turn_span, ) from agents.tracing.spans import SpanError @@ -519,6 +527,76 @@ def force_flush(self) -> None: assert processor.span_trace_metadata == metadata +@pytest.mark.parametrize( + "factory", + [ + pytest.param(lambda md: agent_span(name="a", metadata=md), id="agent"), + pytest.param(lambda md: task_span(name="t", metadata=md), id="task"), + pytest.param( + lambda md: turn_span(turn=1, agent_name="a", metadata=md), + id="turn", + ), + pytest.param(lambda md: response_span(metadata=md), id="response"), + pytest.param(lambda md: function_span(name="fn", metadata=md), id="function"), + pytest.param(lambda md: generation_span(metadata=md), id="generation"), + pytest.param( + lambda md: handoff_span(from_agent="a", to_agent="b", metadata=md), + id="handoff", + ), + pytest.param(lambda md: guardrail_span(name="g", metadata=md), id="guardrail"), + pytest.param(lambda md: mcp_tools_span(server="s", metadata=md), id="mcp_tools"), + pytest.param( + lambda md: custom_span(name="c", data={"k": "v"}, metadata=md), + id="custom", + ), + pytest.param( + lambda md: transcription_span(model="m", metadata=md), + id="transcription", + ), + pytest.param(lambda md: speech_span(model="m", metadata=md), id="speech"), + pytest.param( + lambda md: speech_group_span(input="hi", metadata=md), + id="speech_group", + ), + ], +) +def test_span_factories_accept_metadata_and_export_it(factory): + """Issue #1844: every public span helper must accept a ``metadata`` kwarg + and surface it under the exported payload's top-level ``metadata`` field. + + Covers all 13 public factories exported from ``agents.tracing``. + """ + + custom_metadata = {"agent_name": "support", "current_turn": 3} + + with trace(workflow_name="test"): + with factory(custom_metadata) as span: + pass + + export = span.export() + + assert export is not None + assert export.get("metadata") == custom_metadata + + +def test_span_data_metadata_can_be_mutated_after_creation(): + """Code holding a direct reference to a span can mutate + ``span.span_data.metadata`` after construction and the value will appear + in the exported payload. + """ + + custom_metadata = {"current_turn": 2} + + with trace(workflow_name="test"): + with response_span() as span: + span.span_data.metadata = custom_metadata + + export = span.export() + + assert export is not None + assert export.get("metadata") == custom_metadata + + def test_trace_to_json_only_includes_tracing_api_key_when_requested(): with trace(workflow_name="test", tracing={"api_key": "secret-key"}) as tr: default_json = tr.to_json()