diff --git a/docs/examples/telemetry/README.md b/docs/examples/telemetry/README.md index fc79b1b6f..a91575c2a 100644 --- a/docs/examples/telemetry/README.md +++ b/docs/examples/telemetry/README.md @@ -6,6 +6,9 @@ This directory contains examples demonstrating OpenTelemetry tracing and metrics - **`telemetry_example.py`** - Demonstrates distributed tracing (application and backend traces) - **`metrics_example.py`** - Demonstrates token usage metrics collection +- **`otel_genai_semconv_example.py`** - Verifies OTel GenAI semantic convention attributes + emitted on backend spans (`gen_ai.provider.name`, `gen_ai.conversation.id`, `error.type`). + Designed for human verification against [otelite](https://github.com/planetf1/otelite). ## Quick Start diff --git a/docs/examples/telemetry/otel_genai_semconv_example.py b/docs/examples/telemetry/otel_genai_semconv_example.py new file mode 100644 index 000000000..e38840c4c --- /dev/null +++ b/docs/examples/telemetry/otel_genai_semconv_example.py @@ -0,0 +1,89 @@ +# pytest: ollama, e2e, qualitative + +"""Mellea backend spans carrying OTel GenAI semantic convention attributes. + +Each backend generation call emits a ``chat`` span with the following attributes +drawn from the OTel GenAI semconv (https://opentelemetry.io/docs/specs/semconv/gen-ai/): + + gen_ai.provider.name — provider identity (current semconv) + gen_ai.system — same value, retained for back-compat with existing dashboards + gen_ai.conversation.id — correlated to the active session via ``with_context`` + error.type — set on the error path alongside ERROR span status + +Run against otelite for human verification: + + # Terminal 1 — start otelite (OTLP gRPC :4317, UI :8080) + docker run --rm -p 4317:4317 -p 8080:8080 ghcr.io/planetf1/otelite:latest + + # Terminal 2 + export MELLEA_TRACE_BACKEND=1 + export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 + export OTEL_SERVICE_NAME=mellea-semconv-demo + python otel_genai_semconv_example.py + + Then open http://localhost:8080 and select the mellea-semconv-demo service. + +Expected span attributes +------------------------ + Span "chat" (normal path) + gen_ai.system = "ollama" + gen_ai.provider.name = "ollama" + gen_ai.conversation.id = "demo-session-1" + mellea.session_id = "demo-session-1" + + Span "chat" (error path) + error.type = + status = ERROR +""" + +from mellea import start_session +from mellea.telemetry import is_backend_tracing_enabled, with_context + + +def _section(title: str) -> None: + print(f"\n{'=' * 60}\n {title}\n{'=' * 60}") + + +def main() -> None: + _section("Mellea OTel GenAI Semantic Convention Demo") + print(f"Backend tracing: {is_backend_tracing_enabled()}") + if not is_backend_tracing_enabled(): + print("Set MELLEA_TRACE_BACKEND=1 to enable backend spans.") + + # ----------------------------------------------------------------------- + # Normal path: provider name + conversation id + # ----------------------------------------------------------------------- + _section("Normal path — provider name and conversation id") + print("Expected span attrs:") + print(" gen_ai.system = 'ollama'") + print(" gen_ai.provider.name = 'ollama'") + print(" gen_ai.conversation.id = 'demo-session-1'") + + with with_context(session_id="demo-session-1"): + with start_session() as m: + result = m.instruct("Summarise quantum tunnelling in one sentence.") + print(f"\nOutput: {str(result)[:120]}") + + # ----------------------------------------------------------------------- + # Error path: error.type + ERROR status + # ----------------------------------------------------------------------- + _section("Error path — error.type on span") + print("Expected span attrs:") + print(" status = ERROR") + print(" error.type = ") + + try: + with start_session(base_url="http://localhost:19999") as m2: + m2.instruct("Hello") + except Exception as exc: + print(f"\nGot expected error: {exc.__class__.__name__}") + else: + print("\n(No error — nothing is listening on port 19999)") + + _section("Done") + print("If OTEL_EXPORTER_OTLP_ENDPOINT is set, check your trace backend.") + print("If MELLEA_TRACE_CONSOLE=1, spans were printed to stdout above.") + + +if __name__ == "__main__": + main() diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py index 5b50cd709..bc436bece 100644 --- a/mellea/backends/ollama.py +++ b/mellea/backends/ollama.py @@ -28,7 +28,6 @@ from ..stdlib.components import Message from ..stdlib.requirements import ALoraRequirement from ..telemetry.backend_instrumentation import ( - instrument_generate_from_context, instrument_generate_from_raw, start_generate_span, ) diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index 1eea93511..5dfeaec51 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -45,7 +45,6 @@ from ..stdlib.components import Intrinsic, Message from ..stdlib.requirements import LLMaJRequirement from ..telemetry.backend_instrumentation import ( - instrument_generate_from_context, instrument_generate_from_raw, start_generate_span, ) diff --git a/mellea/core/base.py b/mellea/core/base.py index 5ab4aa935..3472bf4db 100644 --- a/mellea/core/base.py +++ b/mellea/core/base.py @@ -523,10 +523,9 @@ async def astream(self) -> str: # but we must not leak the span. span = self._meta.get("_telemetry_span") if span is not None: - from ..telemetry import end_backend_span, set_span_error + from ..telemetry.backend_instrumentation import finalize_backend_span - set_span_error(span, chunks[-1]) - end_backend_span(span) + finalize_backend_span(span, error=chunks[-1]) del self._meta["_telemetry_span"] # Fire generation_error hook (FIRE_AND_FORGET — does not block the raise) diff --git a/mellea/telemetry/__init__.py b/mellea/telemetry/__init__.py index ea9351252..6fb433a6c 100644 --- a/mellea/telemetry/__init__.py +++ b/mellea/telemetry/__init__.py @@ -92,9 +92,11 @@ def my_function(): ) from .pricing import is_pricing_enabled from .tracing import ( + add_span_event, end_backend_span, is_application_tracing_enabled, is_backend_tracing_enabled, + is_content_tracing_enabled, set_span_attribute, set_span_error, start_backend_span, @@ -104,6 +106,7 @@ def my_function(): __all__ = [ "MelleaContextFilter", + "add_span_event", "async_with_context", "create_counter", "create_histogram", @@ -118,6 +121,7 @@ def my_function(): "get_session_id", "is_application_tracing_enabled", "is_backend_tracing_enabled", + "is_content_tracing_enabled", "is_metrics_enabled", "is_pricing_enabled", "record_cost", diff --git a/mellea/telemetry/backend_instrumentation.py b/mellea/telemetry/backend_instrumentation.py index 737da43ab..dc2d26fff 100644 --- a/mellea/telemetry/backend_instrumentation.py +++ b/mellea/telemetry/backend_instrumentation.py @@ -7,7 +7,7 @@ from typing import Any from ..backends.utils import get_value -from .tracing import set_span_attribute, trace_backend +from .tracing import end_backend_span, set_span_attribute, set_span_error, trace_backend def get_model_id_str(backend: Any) -> str: @@ -30,6 +30,9 @@ def get_model_id_str(backend: Any) -> str: def get_system_name(backend: Any) -> str: """Get the Gen-AI system name from backend. + Kept for back-compatibility with existing dashboards keyed on ``gen_ai.system``. + New code should prefer ``get_provider_name()``. + Args: backend: Backend instance @@ -51,6 +54,21 @@ def get_system_name(backend: Any) -> str: return backend.__class__.__name__ +def get_provider_name(backend: Any) -> str: + """Get the Gen-AI provider name from backend. + + Returns the value for ``gen_ai.provider.name`` (semconv v1.37.0+), which + supersedes the deprecated ``gen_ai.system`` attribute. + + Args: + backend: Backend instance + + Returns: + Provider name (e.g., 'openai', 'ollama', 'huggingface') + """ + return get_system_name(backend) + + def get_context_size(ctx: Any) -> int: """Get the size of a context. @@ -70,44 +88,6 @@ def get_context_size(ctx: Any) -> int: return 0 -def instrument_generate_from_context( - backend: Any, action: Any, ctx: Any, format: Any = None, tool_calls: bool = False -): - """Create a backend trace span for generate_from_context. - - Follows Gen-AI semantic conventions for chat operations. - - Args: - backend: Backend instance - action: Action component - ctx: Context - format: Response format (BaseModel subclass or None) - tool_calls: Whether tool calling is enabled - - Returns: - Context manager for the trace span - """ - model_id = get_model_id_str(backend) - system_name = get_system_name(backend) - - return trace_backend( - "chat", # Gen-AI convention: use 'chat' for chat completions - **{ - # Gen-AI semantic convention attributes - "gen_ai.system": system_name, - "gen_ai.request.model": model_id, - "gen_ai.operation.name": "chat", - # Mellea-specific attributes - "mellea.backend": backend.__class__.__name__, - "mellea.action_type": action.__class__.__name__, - "mellea.context_size": get_context_size(ctx), - "mellea.has_format": format is not None, - "mellea.format_type": format.__name__ if format else None, - "mellea.tool_calls_enabled": tool_calls, - }, - ) - - def start_generate_span( backend: Any, action: Any, ctx: Any, format: Any = None, tool_calls: bool = False ): @@ -130,13 +110,15 @@ def start_generate_span( model_id = get_model_id_str(backend) system_name = get_system_name(backend) + provider_name = get_provider_name(backend) from .context import get_current_context telemetry_ctx = get_current_context() - span_attrs: dict = { + span_attrs: dict[str, Any] = { # Gen-AI semantic convention attributes "gen_ai.system": system_name, + "gen_ai.provider.name": provider_name, "gen_ai.request.model": model_id, "gen_ai.operation.name": "chat", # Mellea-specific attributes @@ -147,10 +129,16 @@ def start_generate_span( "mellea.format_type": format.__name__ if format else None, "mellea.tool_calls_enabled": tool_calls, } + # Propagate telemetry context to span for key, value in telemetry_ctx.items(): span_attrs[f"mellea.{key}"] = value + # gen_ai.conversation.id maps from the existing session_id ContextVar + session_id = telemetry_ctx.get("session_id") + if session_id is not None: + span_attrs["gen_ai.conversation.id"] = session_id + return start_backend_span("chat", **span_attrs) @@ -172,12 +160,14 @@ def instrument_generate_from_raw( """ model_id = get_model_id_str(backend) system_name = get_system_name(backend) + provider_name = get_provider_name(backend) return trace_backend( "text_completion", # Gen-AI convention: use 'text_completion' for completions **{ # Gen-AI semantic convention attributes "gen_ai.system": system_name, + "gen_ai.provider.name": provider_name, "gen_ai.request.model": model_id, "gen_ai.operation.name": "text_completion", # Mellea-specific attributes @@ -214,6 +204,22 @@ def record_token_usage(span: Any, usage: Any) -> None: total_tokens = get_value(usage, "total_tokens") if total_tokens is not None: set_span_attribute(span, "gen_ai.usage.total_tokens", total_tokens) + + cache_read = get_value(usage, "cache_read_input_tokens") + if cache_read is not None: + set_span_attribute(span, "gen_ai.usage.cache_read.input_tokens", cache_read) + + cache_creation = get_value(usage, "cache_creation_input_tokens") + if cache_creation is not None: + set_span_attribute( + span, "gen_ai.usage.cache_creation.input_tokens", cache_creation + ) + + reasoning_tokens = get_value(usage, "reasoning_tokens") + if reasoning_tokens is not None: + set_span_attribute( + span, "gen_ai.usage.reasoning.output_tokens", reasoning_tokens + ) except Exception: # Don't fail if we can't extract token usage pass @@ -260,12 +266,42 @@ def record_response_metadata( pass +def finalize_backend_span(span: Any, *, error: Exception | None = None) -> None: + """Close a backend span on the error path, setting error.type and ERROR status. + + Used by the streaming error path in ``ModelOutputThunk.__aiter__`` where a + span may be left open after an exception. Backends close spans on the + success path themselves via ``record_token_usage`` + ``record_response_metadata`` + + ``end_backend_span``. + + Args: + span: The span to finalise (no-op when ``None``). + error: Exception to record; sets ERROR status and ``error.type``. + """ + if span is None: + return + + try: + if error is not None: + set_span_error(span, error) + # error.type is a Stable OTel cross-signal attribute + set_span_attribute(span, "error.type", type(error).__name__) + except Exception: + pass + try: + end_backend_span(span) + except Exception: + pass + + __all__ = [ + "finalize_backend_span", "get_context_size", "get_model_id_str", + "get_provider_name", "get_system_name", - "instrument_generate_from_context", "instrument_generate_from_raw", "record_response_metadata", "record_token_usage", + "start_generate_span", ] diff --git a/mellea/telemetry/tracing.py b/mellea/telemetry/tracing.py index 12b7b486f..48a3bf5e6 100644 --- a/mellea/telemetry/tracing.py +++ b/mellea/telemetry/tracing.py @@ -10,6 +10,9 @@ Configuration via environment variables: - MELLEA_TRACE_APPLICATION: Enable/disable application tracing (default: false) - MELLEA_TRACE_BACKEND: Enable/disable backend tracing (default: false) +- MELLEA_TRACE_CONTENT: Capture prompt/response content in spans (default: false). + Content may include PII — enable only in controlled environments. + Also recognised: OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT (OTel standard). - OTEL_EXPORTER_OTLP_ENDPOINT: OTLP endpoint for trace export - OTEL_SERVICE_NAME: Service name for traces (default: mellea) """ @@ -42,6 +45,11 @@ _TRACE_BACKEND_ENABLED = _OTEL_AVAILABLE and os.getenv( "MELLEA_TRACE_BACKEND", "false" ).lower() in ("true", "1", "yes") +_TRACE_CONTENT_ENABLED = _OTEL_AVAILABLE and ( + os.getenv("MELLEA_TRACE_CONTENT", "false").lower() in ("true", "1", "yes") + or os.getenv("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "false").lower() + in ("true", "1", "yes") +) _OTLP_ENDPOINT = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") _SERVICE_NAME = os.getenv("OTEL_SERVICE_NAME", "mellea") _CONSOLE_EXPORT = os.getenv("MELLEA_TRACE_CONSOLE", "false").lower() in ( @@ -113,6 +121,33 @@ def is_backend_tracing_enabled() -> bool: return _TRACE_BACKEND_ENABLED +def is_content_tracing_enabled() -> bool: + """Check if content capture is enabled. + + Content capture records prompt and response text on spans and may contain PII. + Enable only in controlled environments. + + Returns: + True if enabled via ``MELLEA_TRACE_CONTENT`` or + ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT``. + """ + return _TRACE_CONTENT_ENABLED + + +def add_span_event( + span: Any, name: str, attributes: dict[str, Any] | None = None +) -> None: + """Add a named event to a span if the span is not None. + + Args: + span: The span object (may be None if tracing is disabled). + name: Event name. + attributes: Optional event attributes. + """ + if span is not None and _OTEL_AVAILABLE: + span.add_event(name, attributes=attributes or {}) + + @contextmanager def trace_application(name: str, **attributes: Any) -> Generator[Any, None, None]: """Create an application trace span if application tracing is enabled. @@ -246,9 +281,11 @@ def set_span_error(span: Any, exception: Exception) -> None: __all__ = [ + "add_span_event", "end_backend_span", "is_application_tracing_enabled", "is_backend_tracing_enabled", + "is_content_tracing_enabled", "set_span_attribute", "set_span_error", "start_backend_span", diff --git a/test/telemetry/test_genai_semconv_emission.py b/test/telemetry/test_genai_semconv_emission.py new file mode 100644 index 000000000..2aa7a77c1 --- /dev/null +++ b/test/telemetry/test_genai_semconv_emission.py @@ -0,0 +1,185 @@ +"""Unit tests for OTel GenAI semantic convention attribute emission. + +Covers: gen_ai.provider.name (gap 1), gen_ai.conversation.id (gap 2), +error.type + ERROR status (gap 4), and the MELLEA_TRACE_CONTENT flag. + +All tests use a fake span and do not require a live backend or OTel SDK. +""" + +from unittest.mock import MagicMock, patch + +from mellea.telemetry.backend_instrumentation import ( + finalize_backend_span, + get_provider_name, + get_system_name, + start_generate_span, +) +from mellea.telemetry.context import with_context +from mellea.telemetry.tracing import add_span_event, is_content_tracing_enabled + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_span() -> MagicMock: + return MagicMock() + + +def _fake_backend(class_name: str) -> object: + return type(class_name, (), {})() + + +def _span_attrs(span: MagicMock) -> dict: + """Collect all set_attribute calls into a flat dict.""" + return {call.args[0]: call.args[1] for call in span.set_attribute.call_args_list} + + +# --------------------------------------------------------------------------- +# Gap 1: gen_ai.provider.name alongside gen_ai.system +# --------------------------------------------------------------------------- + + +def test_provider_name_equals_system_name(): + backend = _fake_backend("OpenAIBackend") + assert get_provider_name(backend) == get_system_name(backend) == "openai" + + +def test_provider_name_emitted_in_start_generate_span(): + """Both gen_ai.system and gen_ai.provider.name should be set on the span.""" + backend = _fake_backend("OpenAIBackend") + backend.model_id = "gpt-4" # type: ignore[attr-defined] + action = MagicMock() + + with patch("mellea.telemetry.tracing.start_backend_span") as mock_start: + mock_start.return_value = _mock_span() + start_generate_span(backend, action, ctx=[], format=None, tool_calls=False) + + call_kwargs = mock_start.call_args[1] + assert call_kwargs.get("gen_ai.system") == "openai" + assert call_kwargs.get("gen_ai.provider.name") == "openai" + + +# --------------------------------------------------------------------------- +# Gap 2: gen_ai.conversation.id from session_id ContextVar +# --------------------------------------------------------------------------- + + +def test_conversation_id_emitted_from_session_id(): + backend = _fake_backend("OpenAIBackend") + backend.model_id = "gpt-4" # type: ignore[attr-defined] + action = MagicMock() + + with with_context(session_id="sess-abc"): + with patch("mellea.telemetry.tracing.start_backend_span") as mock_start: + mock_start.return_value = _mock_span() + start_generate_span(backend, action, ctx=[], format=None, tool_calls=False) + + call_kwargs = mock_start.call_args[1] + assert call_kwargs.get("gen_ai.conversation.id") == "sess-abc" + assert call_kwargs.get("mellea.session_id") == "sess-abc" + + +def test_conversation_id_absent_when_no_session(): + backend = _fake_backend("OpenAIBackend") + backend.model_id = "gpt-4" # type: ignore[attr-defined] + action = MagicMock() + + with patch("mellea.telemetry.tracing.start_backend_span") as mock_start: + mock_start.return_value = _mock_span() + start_generate_span(backend, action, ctx=[], format=None, tool_calls=False) + + call_kwargs = mock_start.call_args[1] + assert "gen_ai.conversation.id" not in call_kwargs + + +# --------------------------------------------------------------------------- +# Gap 4: ERROR span status + error.type +# --------------------------------------------------------------------------- + + +def test_error_sets_status_and_error_type(): + span = _mock_span() + exc = RuntimeError("model rejected") + + with ( + patch( + "mellea.telemetry.backend_instrumentation.set_span_error" + ) as mock_set_err, + patch("mellea.telemetry.backend_instrumentation.end_backend_span") as mock_end, + ): + finalize_backend_span(span, error=exc) + + mock_set_err.assert_called_once_with(span, exc) + assert _span_attrs(span).get("error.type") == "RuntimeError" + mock_end.assert_called_once_with(span) + + +def test_error_path_always_closes_span(): + span = _mock_span() + with patch("mellea.telemetry.backend_instrumentation.set_span_error"): + with patch( + "mellea.telemetry.backend_instrumentation.end_backend_span" + ) as mock_end: + finalize_backend_span(span, error=ValueError("x")) + mock_end.assert_called_once() + + +def test_finalize_never_raises_on_span_error(): + """finalize_backend_span must not propagate exceptions from helpers.""" + span = _mock_span() + span.set_attribute.side_effect = RuntimeError("span broke") + + with patch("mellea.telemetry.backend_instrumentation.end_backend_span"): + with patch("mellea.telemetry.backend_instrumentation.set_span_error"): + finalize_backend_span(span, error=ValueError("test")) + + +def test_finalize_never_raises_if_end_span_raises(): + """end_backend_span exceptions must not propagate on the error path.""" + span = _mock_span() + with patch( + "mellea.telemetry.backend_instrumentation.end_backend_span", + side_effect=RuntimeError("sdk shutdown"), + ): + with patch("mellea.telemetry.backend_instrumentation.set_span_error"): + finalize_backend_span(span, error=ValueError("original error")) + + +def test_finalize_none_span_is_noop(): + finalize_backend_span(None, error=RuntimeError("x")) + + +# --------------------------------------------------------------------------- +# Content tracing default (infrastructure for deferred gap 5) +# --------------------------------------------------------------------------- + + +def test_content_tracing_disabled_by_default(): + assert not is_content_tracing_enabled() + + +# --------------------------------------------------------------------------- +# add_span_event helper +# --------------------------------------------------------------------------- + + +def test_add_span_event_calls_span_add_event(): + span = _mock_span() + with patch("mellea.telemetry.tracing._OTEL_AVAILABLE", True): + add_span_event(span, "gen_ai.content.prompt", {"gen_ai.prompt": "hello"}) + span.add_event.assert_called_once_with( + "gen_ai.content.prompt", attributes={"gen_ai.prompt": "hello"} + ) + + +def test_add_span_event_none_span_is_noop(): + with patch("mellea.telemetry.tracing._OTEL_AVAILABLE", True): + add_span_event(None, "gen_ai.content.prompt") + + +def test_add_span_event_defaults_to_empty_attributes(): + span = _mock_span() + with patch("mellea.telemetry.tracing._OTEL_AVAILABLE", True): + add_span_event(span, "gen_ai.content.completion") + span.add_event.assert_called_once_with("gen_ai.content.completion", attributes={}) diff --git a/test/telemetry/test_tracing.py b/test/telemetry/test_tracing.py index af83de5ad..b1a058680 100644 --- a/test/telemetry/test_tracing.py +++ b/test/telemetry/test_tracing.py @@ -200,31 +200,6 @@ def __init__(self): assert get_context_size(ctx) == 3 -def test_instrument_generate_from_context(): - """Test instrument_generate_from_context helper.""" - from mellea.telemetry.backend_instrumentation import ( - instrument_generate_from_context, - ) - - class MockBackend: - model_id = "test-model" - - class MockAction: - pass - - class MockContext: - turns = [] - - backend = MockBackend() - action = MockAction() - ctx = MockContext() - - # Should return a context manager - with instrument_generate_from_context(backend, action, ctx) as span: - # Span will be None when tracing is disabled - assert span is None or hasattr(span, "set_attribute") - - def test_instrument_generate_from_raw(): """Test instrument_generate_from_raw helper.""" from mellea.telemetry.backend_instrumentation import instrument_generate_from_raw