From eb36b1e3425601b1fba314080e6d8a2131d4436f Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Wed, 25 Mar 2026 14:59:31 -0700 Subject: [PATCH 1/3] [VoiceLive] Add built-in OpenTelemetry tracing support --- sdk/voicelive/azure-ai-voicelive/CHANGELOG.md | 9 + sdk/voicelive/azure-ai-voicelive/README.md | 75 +++ .../checkstyle-suppressions.xml | 12 + sdk/voicelive/azure-ai-voicelive/pom.xml | 42 ++ .../ai/voicelive/VoiceLiveAsyncClient.java | 94 ++-- .../ai/voicelive/VoiceLiveClientBuilder.java | 49 +- .../VoiceLiveSessionAsyncClient.java | 67 ++- .../azure/ai/voicelive/VoiceLiveTracer.java | 500 ++++++++++++++++++ .../src/main/java/module-info.java | 2 + .../com/azure/ai/voicelive/ReadmeSamples.java | 29 + .../azure/ai/voicelive/TelemetrySample.java | 152 ++++++ .../ai/voicelive/VoiceAssistantSample.java | 58 +- .../voicelive/VoiceLiveClientBuilderTest.java | 53 ++ .../ai/voicelive/VoiceLiveTracerTest.java | 379 +++++++++++++ 14 files changed, 1471 insertions(+), 50 deletions(-) create mode 100644 sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java create mode 100644 sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java create mode 100644 sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md index 5969b2be656e..e662e467a646 100644 --- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md +++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md @@ -4,6 +4,15 @@ ### Features Added +- Added built-in OpenTelemetry tracing support for voice sessions following GenAI Semantic Conventions: + - `VoiceLiveClientBuilder.openTelemetry(OpenTelemetry)` method for providing a custom OpenTelemetry instance + - Defaults to `GlobalOpenTelemetry.getOrNoop()` for automatic Java agent detection with zero-cost no-op fallback + - Emits spans for `connect`, `send`, `recv`, and `close` operations with voice-specific attributes + - Session-level counters: turn count, interruption count, audio bytes sent/received, first token latency + - Per-message attributes: token usage, event types, error details + - Content recording controlled via `enableContentRecording(boolean)` or `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable +- Added `TelemetrySample.java` demonstrating OpenTelemetry integration patterns + ### Breaking Changes ### Bugs Fixed diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md index a57e3e5cb387..e305a6ebabef 100644 --- a/sdk/voicelive/azure-ai-voicelive/README.md +++ b/sdk/voicelive/azure-ai-voicelive/README.md @@ -126,6 +126,7 @@ The following sections provide code snippets for common scenarios: * [Handle event types](#handle-event-types) * [Voice configuration](#voice-configuration) * [Function calling](#function-calling) +* [Telemetry and tracing](#telemetry-and-tracing) * [Complete voice assistant with microphone](#complete-voice-assistant-with-microphone) ### Focused Sample Files @@ -166,6 +167,12 @@ For easier learning, explore these focused samples in order: - Execute functions locally and return results - Continue conversation with function results +7. **TelemetrySample.java** - OpenTelemetry tracing integration + - Automatic tracing via GlobalOpenTelemetry (zero-config) + - Explicit OpenTelemetry instance via builder + - Span structure and session-level attributes + - Azure Monitor integration example + > **Note:** To run audio samples (AudioPlaybackSample, MicrophoneInputSample, VoiceAssistantSample, FunctionCallingSample): > ```bash > mvn exec:java -Dexec.mainClass=com.azure.ai.voicelive.FunctionCallingSample -Dexec.classpathScope=test @@ -397,6 +404,74 @@ client.startSession("gpt-4o-realtime-preview") * Results are sent back to continue the conversation * See `FunctionCallingSample.java` for a complete working example +### Telemetry and tracing + +The SDK has built-in [OpenTelemetry](https://opentelemetry.io/) tracing that emits spans for every WebSocket operation. When no OpenTelemetry SDK is present, all tracing calls are automatically no-op with zero performance impact. + +#### Automatic tracing (recommended) + +If the [OpenTelemetry Java agent](https://opentelemetry.io/docs/languages/java/automatic/) is attached, or `GlobalOpenTelemetry` is configured, tracing works automatically with no code changes: + +```java com.azure.ai.voicelive.tracing.automatic +// No special configuration needed — tracing is picked up from GlobalOpenTelemetry +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildAsyncClient(); +``` + +#### Explicit OpenTelemetry instance + +Provide your own `OpenTelemetry` instance to control trace export: + +```java com.azure.ai.voicelive.tracing.explicit +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .buildAsyncClient(); +``` + +#### Span structure + +When tracing is active, the following span hierarchy is emitted for each voice session: + +``` +connect gpt-4o-realtime-preview ← session lifetime span +├── send session.update ← one span per sent event +├── send response.create +├── recv session.created ← one span per received event +├── recv response.audio.delta +├── recv response.done ← includes token usage attributes +└── close +``` + +**Session-level attributes** (on the connect span): +- `gen_ai.system` — `openai` +- `gen_ai.request.model` — Model name (e.g., `gpt-4o-realtime-preview`) +- `server.address` — Service endpoint +- `gen_ai.voice.session_id` — Voice session ID +- `gen_ai.voice.turn_count` — Completed response turns +- `gen_ai.voice.interruption_count` — User interruptions +- `gen_ai.voice.audio_bytes_sent` / `audio_bytes_received` — Audio payload bytes +- `gen_ai.voice.first_token_latency_ms` — Time to first audio response + +#### Content recording + +By default, message payloads are not recorded in spans for privacy. Enable content recording via the builder or environment variable: + +```java +// Via builder +new VoiceLiveClientBuilder() + .enableContentRecording(true) + // ... + +// Or via environment variable +// AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED=true +``` + +> See `TelemetrySample.java` for complete tracing examples including Azure Monitor integration. + ### Complete voice assistant with microphone A full example demonstrating real-time microphone input and audio playback: diff --git a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml index 6397e9e904dd..957cb8620c20 100644 --- a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml +++ b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml @@ -3,6 +3,18 @@ + + + + + + + + + + + + diff --git a/sdk/voicelive/azure-ai-voicelive/pom.xml b/sdk/voicelive/azure-ai-voicelive/pom.xml index 3eea97993716..bd5e20f8e99b 100644 --- a/sdk/voicelive/azure-ai-voicelive/pom.xml +++ b/sdk/voicelive/azure-ai-voicelive/pom.xml @@ -56,6 +56,11 @@ Code generated by Microsoft (R) TypeSpec Code Generator. azure-core-http-netty 1.16.3 + + io.opentelemetry + opentelemetry-api + 1.58.0 + com.azure azure-core-test @@ -82,5 +87,42 @@ Code generated by Microsoft (R) TypeSpec Code Generator. 3.7.11 test + + io.opentelemetry + opentelemetry-sdk + 1.58.0 + test + + + io.opentelemetry + opentelemetry-sdk-testing + 1.58.0 + test + + + io.opentelemetry + opentelemetry-exporter-logging + 1.58.0 + test + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.6.1 + + + + + io.opentelemetry:opentelemetry-api:[1.58.0] + + + + + + + diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java index 000fcf98a5cd..ee9ae084b547 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java @@ -18,6 +18,7 @@ import com.azure.core.http.HttpHeaderName; import com.azure.core.http.HttpHeaders; import com.azure.core.util.logging.ClientLogger; +import io.opentelemetry.api.trace.Tracer; import reactor.core.publisher.Mono; @@ -34,6 +35,8 @@ public final class VoiceLiveAsyncClient { private final TokenCredential tokenCredential; private final String apiVersion; private final HttpHeaders additionalHeaders; + private final Tracer tracer; + private final Boolean enableContentRecording; /** * Creates a VoiceLiveAsyncClient with API key authentication. @@ -44,11 +47,28 @@ public final class VoiceLiveAsyncClient { * @param additionalHeaders Additional headers to include in requests. */ VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders) { + this(endpoint, keyCredential, apiVersion, additionalHeaders, null, null); + } + + /** + * Creates a VoiceLiveAsyncClient with API key authentication and tracing. + * + * @param endpoint The service endpoint. + * @param keyCredential The API key credential. + * @param apiVersion The API version. + * @param additionalHeaders Additional headers to include in requests. + * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer). + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders, + Tracer tracer, Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null"); this.tokenCredential = null; this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null"); this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders(); + this.tracer = tracer; + this.enableContentRecording = enableContentRecording; } /** @@ -61,11 +81,28 @@ public final class VoiceLiveAsyncClient { */ VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion, HttpHeaders additionalHeaders) { + this(endpoint, tokenCredential, apiVersion, additionalHeaders, null, null); + } + + /** + * Creates a VoiceLiveAsyncClient with token authentication and tracing. + * + * @param endpoint The service endpoint. + * @param tokenCredential The token credential. + * @param apiVersion The API version. + * @param additionalHeaders Additional headers to include in requests. + * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer). + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion, + HttpHeaders additionalHeaders, Tracer tracer, Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = null; this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null"); this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null"); this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders(); + this.tracer = tracer; + this.enableContentRecording = enableContentRecording; } /** @@ -79,12 +116,7 @@ public Mono startSession(String model) { Objects.requireNonNull(model, "'model' cannot be null"); return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, model)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -97,12 +129,7 @@ public Mono startSession(String model) { */ public Mono startSession() { return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -122,12 +149,7 @@ public Mono startSession(String model, VoiceLiveReq return Mono .fromCallable(() -> convertToWebSocketEndpoint(endpoint, model, requestOptions.getCustomQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); @@ -148,12 +170,7 @@ public Mono startSession(VoiceLiveRequestOptions re return Mono .fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, requestOptions.getCustomQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); @@ -176,12 +193,7 @@ public Mono startSession(AgentSessionConfig agentCo return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, agentConfig.toQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -211,18 +223,28 @@ public Mono startSession(AgentSessionConfig agentCo } return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, mergedParams)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); }); } + /** + * Creates a VoiceLiveSessionAsyncClient with the appropriate credentials and optional tracing. + * + * @param wsEndpoint The WebSocket endpoint URI. + * @param model The model name, used for tracing span names. + * @return A new VoiceLiveSessionAsyncClient instance. + */ + private VoiceLiveSessionAsyncClient createSessionClient(URI wsEndpoint, String model) { + if (keyCredential != null) { + return new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential, tracer, model, enableContentRecording); + } else { + return new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential, tracer, model, enableContentRecording); + } + } + /** * Merges two HttpHeaders objects, with custom headers taking precedence. * diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java index e655bde45d91..41ff285b4f1e 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java @@ -17,6 +17,9 @@ import com.azure.core.util.ClientOptions; import com.azure.core.util.CoreUtils; import com.azure.core.util.logging.ClientLogger; +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.trace.Tracer; /** * Builder for creating instances of {@link VoiceLiveAsyncClient}. @@ -31,6 +34,8 @@ public final class VoiceLiveClientBuilder implements TokenCredentialTraitIf not set, defaults to {@link GlobalOpenTelemetry#getOrNoop()}, which automatically + * uses the OpenTelemetry instance installed by the Java agent (if present), or a no-op + * implementation that has zero performance impact.

+ * + *

When an OpenTelemetry SDK is configured (either globally or via this method), the SDK + * automatically emits spans for connect, send, recv, and close operations with voice-specific + * attributes and session-level counters following GenAI Semantic Conventions.

+ * + * @param openTelemetry The OpenTelemetry instance. + * @return The updated VoiceLiveClientBuilder instance. + * @throws NullPointerException if {@code openTelemetry} is null. + */ + public VoiceLiveClientBuilder openTelemetry(OpenTelemetry openTelemetry) { + this.openTelemetry = Objects.requireNonNull(openTelemetry, "'openTelemetry' cannot be null"); + return this; + } + + /** + * Enables or disables content recording in trace spans. + * + *

When enabled, full JSON payloads (including audio data) will be captured in span events. + * This is off by default for privacy. Can also be controlled via the + * {@code AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED} environment variable.

+ * + * @param enableContentRecording true to enable content recording in spans. + * @return The updated VoiceLiveClientBuilder instance. + */ + public VoiceLiveClientBuilder enableContentRecording(boolean enableContentRecording) { + this.enableContentRecording = enableContentRecording; + return this; + } + /** * Builds a {@link VoiceLiveAsyncClient} instance with the configured options. * @@ -125,10 +165,15 @@ public VoiceLiveAsyncClient buildAsyncClient() { VoiceLiveServiceVersion version = serviceVersion != null ? serviceVersion : VoiceLiveServiceVersion.getLatest(); HttpHeaders additionalHeaders = CoreUtils.createHttpHeadersFromClientOptions(clientOptions); + OpenTelemetry otel = openTelemetry != null ? openTelemetry : GlobalOpenTelemetry.getOrNoop(); + Tracer tracer = otel.getTracer("azure-ai-voicelive", "1.0.0-beta.6"); + if (keyCredential != null) { - return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders); + return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders, tracer, + enableContentRecording); } else { - return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders); + return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders, tracer, + enableContentRecording); } } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java index d1bc2f098e03..4622391b9208 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java @@ -39,6 +39,7 @@ import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.websocketx.TextWebSocketFrame; import io.netty.handler.codec.http.websocketx.WebSocketFrame; +import io.opentelemetry.api.trace.Tracer; import reactor.core.Disposable; import reactor.core.publisher.BufferOverflowStrategy; import reactor.core.publisher.Flux; @@ -95,6 +96,7 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl private final KeyCredential keyCredential; private final TokenCredential tokenCredential; private final SerializerAdapter serializer; + private final VoiceLiveTracer voiceLiveTracer; private final AtomicBoolean isConnected = new AtomicBoolean(false); private final AtomicBoolean isClosed = new AtomicBoolean(false); @@ -122,10 +124,26 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl * @param keyCredential The API key credential. */ VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential) { + this(endpoint, keyCredential, null, null, null); + } + + /** + * Creates a new VoiceLiveSessionAsyncClient with API key authentication and tracing. + * + * @param endpoint The WebSocket endpoint. + * @param keyCredential The API key credential. + * @param tracer The OpenTelemetry Tracer (may be a no-op tracer). + * @param model The model name for span naming. + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential, Tracer tracer, String model, + Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null"); this.tokenCredential = null; this.serializer = JacksonAdapter.createDefaultSerializerAdapter(); + this.voiceLiveTracer + = tracer != null ? new VoiceLiveTracer(tracer, endpoint, model, enableContentRecording) : null; } /** @@ -135,10 +153,26 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl * @param tokenCredential The token credential. */ VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential) { + this(endpoint, tokenCredential, null, null, null); + } + + /** + * Creates a new VoiceLiveSessionAsyncClient with token authentication and tracing. + * + * @param endpoint The WebSocket endpoint. + * @param tokenCredential The token credential. + * @param tracer The OpenTelemetry Tracer (may be a no-op tracer). + * @param model The model name for span naming. + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential, Tracer tracer, String model, + Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = null; this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null"); this.serializer = JacksonAdapter.createDefaultSerializerAdapter(); + this.voiceLiveTracer + = tracer != null ? new VoiceLiveTracer(tracer, endpoint, model, enableContentRecording) : null; } /** @@ -160,6 +194,11 @@ Mono connect(HttpHeaders additionalHeaders) { return Mono.error(new IllegalStateException("Session lifecycle already active")); } + // Start the connect span (session lifetime) + if (voiceLiveTracer != null) { + voiceLiveTracer.startConnectSpan(); + } + Sinks.One readySink = Sinks.one(); Sinks.One closeSignal = Sinks.one(); connectionCloseSignalRef.set(closeSignal); @@ -263,6 +302,11 @@ Mono connect(HttpHeaders additionalHeaders) { readySink.tryEmitError(error); connectionCloseSignalRef.compareAndSet(closeSignal, null); disposeLifecycleSubscription(); + + // End the connect span on error + if (voiceLiveTracer != null) { + voiceLiveTracer.endConnectSpan(error); + } }, () -> { LOGGER.info("WebSocket handler completed"); connectionCloseSignalRef.compareAndSet(closeSignal, null); @@ -281,6 +325,13 @@ Mono connect(HttpHeaders additionalHeaders) { public Mono closeAsync() { if (isClosed.compareAndSet(false, true)) { LOGGER.info("Closing VoiceLive session"); + + // Trace the close operation and end the connect span + if (voiceLiveTracer != null) { + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + } + sendSink.tryEmitComplete(); Sinks.One closeSignal = connectionCloseSignalRef.getAndSet(null); @@ -351,6 +402,12 @@ public Mono sendEvent(ClientEvent event) { return Mono.fromCallable(() -> { try { String json = serializer.serialize(event, SerializerEncoding.JSON); + + // Trace the send operation + if (voiceLiveTracer != null) { + voiceLiveTracer.traceSend(event, json); + } + return BinaryData.fromString(json); } catch (IOException e) { throw LOGGER.logExceptionAsError(new RuntimeException("Failed to serialize event", e)); @@ -401,7 +458,15 @@ public Flux receiveEvents() { .onBackpressureBuffer(INBOUND_BUFFER_CAPACITY, dropped -> LOGGER.error("Inbound buffer overflow; dropped {} bytes", dropped.toBytes().length), BufferOverflowStrategy.ERROR) - .flatMap(this::parseToSessionUpdate) + .flatMap(data -> { + String rawPayload = data.toString(); + return parseToSessionUpdate(data).doOnNext(update -> { + // Trace the recv operation + if (voiceLiveTracer != null) { + voiceLiveTracer.traceRecv(update, rawPayload); + } + }); + }) .doOnError(error -> LOGGER.error("Failed to parse session update", error)) .onErrorResume(error -> { LOGGER.warning("Skipping unrecognized server event: {}", error.getMessage()); diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java new file mode 100644 index 000000000000..983ca8eef213 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java @@ -0,0 +1,500 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive; + +import com.azure.ai.voicelive.models.ClientEvent; +import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend; +import com.azure.ai.voicelive.models.ClientEventResponseCancel; +import com.azure.ai.voicelive.models.ClientEventResponseCreate; +import com.azure.ai.voicelive.models.ClientEventSessionUpdate; +import com.azure.ai.voicelive.models.ResponseTokenStatistics; +import com.azure.ai.voicelive.models.SessionResponse; +import com.azure.ai.voicelive.models.SessionUpdate; +import com.azure.ai.voicelive.models.SessionUpdateError; +import com.azure.ai.voicelive.models.SessionUpdateErrorDetails; +import com.azure.ai.voicelive.models.SessionUpdateResponseAudioDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseDone; +import com.azure.ai.voicelive.models.SessionUpdateSessionCreated; +import com.azure.ai.voicelive.models.SessionUpdateSessionUpdated; +import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; +import com.azure.ai.voicelive.models.VoiceLiveSessionResponse; +import com.azure.core.util.Configuration; +import com.azure.core.util.ConfigurationProperty; +import com.azure.core.util.ConfigurationPropertyBuilder; +import com.azure.core.util.logging.ClientLogger; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.common.AttributesBuilder; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanBuilder; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; + +import java.net.URI; +import java.util.Base64; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Tracer for VoiceLive WebSocket sessions using the OpenTelemetry API. + *

+ * Manages a parent "connect" span for the session lifetime, with child spans for + * send, recv, and close operations. Tracks session-level counters for audio bytes, + * turn counts, interruptions, and first-token latency. + *

+ */ +final class VoiceLiveTracer { + + private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveTracer.class); + + // GenAI semantic convention attribute keys + static final AttributeKey GEN_AI_SYSTEM = AttributeKey.stringKey("gen_ai.system"); + static final String GEN_AI_SYSTEM_VALUE = "az.ai.voicelive"; + static final AttributeKey GEN_AI_OPERATION_NAME = AttributeKey.stringKey("gen_ai.operation.name"); + static final AttributeKey GEN_AI_REQUEST_MODEL = AttributeKey.stringKey("gen_ai.request.model"); + static final AttributeKey AZ_NAMESPACE = AttributeKey.stringKey("az.namespace"); + static final String AZ_NAMESPACE_VALUE = "Microsoft.CognitiveServices"; + static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); + static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); + + // Voice-specific attribute keys + static final AttributeKey GEN_AI_VOICE_SESSION_ID = AttributeKey.stringKey("gen_ai.voice.session_id"); + static final AttributeKey GEN_AI_VOICE_INPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.input_audio_format"); + static final AttributeKey GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.output_audio_format"); + static final AttributeKey GEN_AI_VOICE_TURN_COUNT = AttributeKey.longKey("gen_ai.voice.turn_count"); + static final AttributeKey GEN_AI_VOICE_INTERRUPTION_COUNT + = AttributeKey.longKey("gen_ai.voice.interruption_count"); + static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_SENT + = AttributeKey.longKey("gen_ai.voice.audio_bytes_sent"); + static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_RECEIVED + = AttributeKey.longKey("gen_ai.voice.audio_bytes_received"); + static final AttributeKey GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS + = AttributeKey.longKey("gen_ai.voice.first_token_latency_ms"); + static final AttributeKey GEN_AI_VOICE_EVENT_TYPE = AttributeKey.stringKey("gen_ai.voice.event_type"); + static final AttributeKey GEN_AI_VOICE_MESSAGE_SIZE = AttributeKey.longKey("gen_ai.voice.message_size"); + static final AttributeKey GEN_AI_USAGE_INPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.input_tokens"); + static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.output_tokens"); + static final AttributeKey ERROR_TYPE = AttributeKey.stringKey("error.type"); + + // Span event names + static final String GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"; + static final String GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"; + static final String GEN_AI_VOICE_ERROR = "gen_ai.voice.error"; + + // Event attribute keys + private static final AttributeKey EVENT_CONTENT = AttributeKey.stringKey("gen_ai.event.content"); + private static final AttributeKey ERROR_CODE = AttributeKey.stringKey("error.code"); + private static final AttributeKey ERROR_MESSAGE = AttributeKey.stringKey("error.message"); + + // Content recording configuration + private static final ConfigurationProperty CAPTURE_MESSAGE_CONTENT + = ConfigurationPropertyBuilder.ofBoolean("azure.tracing.gen_ai.content_recording_enabled") + .environmentVariableName("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") + .systemPropertyName("azure.tracing.gen_ai.content_recording_enabled") + .shared(true) + .defaultValue(false) + .build(); + + private final Tracer tracer; + private final boolean captureContent; + private final String serverAddress; + private final int serverPort; + private final String model; + + // Session lifetime span and its OTel context (for parenting child spans) + private final AtomicReference connectSpan = new AtomicReference<>(); + private final AtomicReference connectContext = new AtomicReference<>(); + + // Session-level counters (thread-safe) + private final AtomicLong turnCount = new AtomicLong(0); + private final AtomicLong interruptionCount = new AtomicLong(0); + private final AtomicLong audioBytesSent = new AtomicLong(0); + private final AtomicLong audioBytesReceived = new AtomicLong(0); + + // First-token latency tracking + private final AtomicLong responseCreateTimestampNanos = new AtomicLong(0); + private final AtomicLong firstTokenLatencyMs = new AtomicLong(-1); + + // Session attributes discovered during the session + private final AtomicReference sessionId = new AtomicReference<>(); + private final AtomicReference inputAudioFormat = new AtomicReference<>(); + private final AtomicReference outputAudioFormat = new AtomicReference<>(); + + /** + * Creates a VoiceLiveTracer. + * + * @param tracer The OpenTelemetry Tracer instance (may be a no-op tracer). + * @param endpoint The WebSocket endpoint URI. + * @param model The model name. + * @param captureContentOverride Optional override for content recording (null = use env var). + */ + VoiceLiveTracer(Tracer tracer, URI endpoint, String model, Boolean captureContentOverride) { + this.tracer = tracer; + this.model = model; + + if (endpoint != null) { + this.serverAddress = endpoint.getHost(); + this.serverPort + = endpoint.getPort() == -1 ? ("wss".equals(endpoint.getScheme()) ? 443 : 80) : endpoint.getPort(); + } else { + this.serverAddress = null; + this.serverPort = -1; + } + + if (captureContentOverride != null) { + this.captureContent = captureContentOverride; + } else { + this.captureContent = Configuration.getGlobalConfiguration().get(CAPTURE_MESSAGE_CONTENT); + } + } + + // ============================================================================ + // Connect Span (session lifetime) + // ============================================================================ + + /** + * Starts the parent "connect" span for the session lifetime. + */ + void startConnectSpan() { + String spanName = model != null ? "connect " + model : "connect"; + + SpanBuilder spanBuilder = tracer.spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, "connect") + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE); + + if (model != null) { + spanBuilder.setAttribute(GEN_AI_REQUEST_MODEL, model); + } + if (serverAddress != null) { + spanBuilder.setAttribute(SERVER_ADDRESS, serverAddress); + if (serverPort != 443 && serverPort != -1) { + spanBuilder.setAttribute(SERVER_PORT, (long) serverPort); + } + } + + Span span = spanBuilder.startSpan(); + Context ctx = Context.current().with(span); + connectSpan.set(span); + connectContext.set(ctx); + } + + /** + * Ends the connect span, flushing session-level counters as attributes. + * + * @param error The error that caused the session to close, or null. + */ + void endConnectSpan(Throwable error) { + Span span = connectSpan.getAndSet(null); + connectContext.set(null); + if (span == null) { + return; + } + + // Flush session-level counters + String sid = sessionId.get(); + if (sid != null) { + span.setAttribute(GEN_AI_VOICE_SESSION_ID, sid); + } + String inFormat = inputAudioFormat.get(); + if (inFormat != null) { + span.setAttribute(GEN_AI_VOICE_INPUT_AUDIO_FORMAT, inFormat); + } + String outFormat = outputAudioFormat.get(); + if (outFormat != null) { + span.setAttribute(GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT, outFormat); + } + span.setAttribute(GEN_AI_VOICE_TURN_COUNT, turnCount.get()); + span.setAttribute(GEN_AI_VOICE_INTERRUPTION_COUNT, interruptionCount.get()); + span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_SENT, audioBytesSent.get()); + span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_RECEIVED, audioBytesReceived.get()); + + long latency = firstTokenLatencyMs.get(); + if (latency >= 0) { + span.setAttribute(GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS, latency); + } + + if (error != null) { + span.setStatus(StatusCode.ERROR, error.getMessage()); + span.recordException(error); + span.setAttribute(ERROR_TYPE, error.getClass().getCanonicalName()); + } + + span.end(); + } + + // ============================================================================ + // Send Span + // ============================================================================ + + /** + * Traces a send operation. Creates a child span of the connect span. + * + * @param event The client event being sent. + * @param jsonPayload The serialized JSON payload. + */ + void traceSend(ClientEvent event, String jsonPayload) { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + String eventType = event.getType() != null ? event.getType().toString() : "unknown"; + String spanName = "send " + eventType; + + Span span = tracer.spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .setParent(parentCtx) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, "send") + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE) + .setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType) + .startSpan(); + + try { + if (span.isRecording()) { + if (jsonPayload != null) { + span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) jsonPayload.length()); + } + + // Add span event + AttributesBuilder eventAttrs = Attributes.builder() + .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .put(GEN_AI_VOICE_EVENT_TYPE, eventType); + if (captureContent && jsonPayload != null) { + eventAttrs.put(EVENT_CONTENT, jsonPayload); + } + span.addEvent(GEN_AI_INPUT_MESSAGES, eventAttrs.build()); + + // Track session-level counters from sent events + trackSendCounters(event); + } + } finally { + span.end(); + } + } + + // ============================================================================ + // Recv Span + // ============================================================================ + + /** + * Traces a recv operation. Creates a child span of the connect span. + * + * @param update The parsed session update event. + * @param rawPayload The raw JSON payload string (for message size and content recording). + */ + void traceRecv(SessionUpdate update, String rawPayload) { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + String eventType = update.getType() != null ? update.getType().toString() : "unknown"; + String spanName = "recv " + eventType; + + Span span = tracer.spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .setParent(parentCtx) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, "recv") + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE) + .setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType) + .startSpan(); + + try { + if (span.isRecording()) { + if (rawPayload != null) { + span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) rawPayload.length()); + } + + // Track per-message token usage from response.done + trackRecvTokenUsage(update, span); + + // Add span event for output messages + AttributesBuilder eventAttrs = Attributes.builder() + .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .put(GEN_AI_VOICE_EVENT_TYPE, eventType); + if (captureContent && rawPayload != null) { + eventAttrs.put(EVENT_CONTENT, rawPayload); + } + span.addEvent(GEN_AI_OUTPUT_MESSAGES, eventAttrs.build()); + + // Track error events — set error status on span + trackErrorEvents(update, span); + + // Track session-level counters from received events + trackRecvCounters(update); + } + } finally { + span.end(); + } + } + + // ============================================================================ + // Close Span + // ============================================================================ + + /** + * Traces the close operation. + */ + void traceClose() { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + Span span = tracer.spanBuilder("close") + .setSpanKind(SpanKind.CLIENT) + .setParent(parentCtx) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, "close") + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE) + .startSpan(); + span.end(); + } + + // ============================================================================ + // Counter Tracking + // ============================================================================ + + private void trackSendCounters(ClientEvent event) { + // Track audio bytes sent from input_audio_buffer.append + if (event instanceof ClientEventInputAudioBufferAppend) { + ClientEventInputAudioBufferAppend appendEvent = (ClientEventInputAudioBufferAppend) event; + String audio = appendEvent.getAudio(); + if (audio != null) { + try { + byte[] decoded = Base64.getDecoder().decode(audio); + audioBytesSent.addAndGet(decoded.length); + } catch (IllegalArgumentException e) { + LOGGER.atVerbose().log("Failed to decode audio for byte counting", e); + } + } + } + + // Track response.create for first-token latency + if (event instanceof ClientEventResponseCreate) { + responseCreateTimestampNanos.set(System.nanoTime()); + firstTokenLatencyMs.set(-1); // Reset for this response + } + + // Track interruptions from response.cancel + if (event instanceof ClientEventResponseCancel) { + interruptionCount.incrementAndGet(); + } + + // Track audio format from session.update + if (event instanceof ClientEventSessionUpdate) { + ClientEventSessionUpdate sessionUpdate = (ClientEventSessionUpdate) event; + VoiceLiveSessionOptions session = sessionUpdate.getSession(); + if (session != null) { + if (session.getInputAudioFormat() != null) { + inputAudioFormat.set(session.getInputAudioFormat().toString()); + } + if (session.getOutputAudioFormat() != null) { + outputAudioFormat.set(session.getOutputAudioFormat().toString()); + } + } + } + } + + private void trackRecvCounters(SessionUpdate update) { + // Track session ID from session.created / session.updated + if (update instanceof SessionUpdateSessionCreated) { + VoiceLiveSessionResponse session = ((SessionUpdateSessionCreated) update).getSession(); + if (session != null && session.getId() != null) { + sessionId.set(session.getId()); + } + } + if (update instanceof SessionUpdateSessionUpdated) { + VoiceLiveSessionResponse session = ((SessionUpdateSessionUpdated) update).getSession(); + if (session != null && session.getId() != null) { + sessionId.set(session.getId()); + } + } + + // Track audio format from session.created / session.updated responses + if (update instanceof SessionUpdateSessionCreated) { + updateAudioFormatsFromResponse(((SessionUpdateSessionCreated) update).getSession()); + } + if (update instanceof SessionUpdateSessionUpdated) { + updateAudioFormatsFromResponse(((SessionUpdateSessionUpdated) update).getSession()); + } + + // Track audio bytes received from response.audio.delta + if (update instanceof SessionUpdateResponseAudioDelta) { + SessionUpdateResponseAudioDelta audioDelta = (SessionUpdateResponseAudioDelta) update; + byte[] delta = audioDelta.getDelta(); + if (delta != null) { + audioBytesReceived.addAndGet(delta.length); + } + + // First-token latency: measure from response.create to first audio delta + long createTs = responseCreateTimestampNanos.get(); + if (createTs > 0 && firstTokenLatencyMs.get() < 0) { + long elapsed = (System.nanoTime() - createTs) / 1_000_000; + firstTokenLatencyMs.compareAndSet(-1, elapsed); + } + } + + // Track turn count from response.done + if (update instanceof SessionUpdateResponseDone) { + turnCount.incrementAndGet(); + } + } + + private void trackRecvTokenUsage(SessionUpdate update, Span span) { + if (update instanceof SessionUpdateResponseDone) { + SessionResponse response = ((SessionUpdateResponseDone) update).getResponse(); + if (response != null) { + ResponseTokenStatistics usage = response.getUsage(); + if (usage != null) { + span.setAttribute(GEN_AI_USAGE_INPUT_TOKENS, (long) usage.getInputTokens()); + span.setAttribute(GEN_AI_USAGE_OUTPUT_TOKENS, (long) usage.getOutputTokens()); + } + } + } + } + + private void trackErrorEvents(SessionUpdate update, Span span) { + if (update instanceof SessionUpdateError) { + SessionUpdateError errorUpdate = (SessionUpdateError) update; + SessionUpdateErrorDetails errorDetails = errorUpdate.getError(); + if (errorDetails != null) { + // Set error status on the span + String message = errorDetails.getMessage() != null ? errorDetails.getMessage() : "Unknown error"; + span.setStatus(StatusCode.ERROR, message); + span.setAttribute(ERROR_TYPE, errorDetails.getType() != null ? errorDetails.getType() : "server_error"); + + // Add error event with details + AttributesBuilder errorAttrs = Attributes.builder().put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE); + if (errorDetails.getCode() != null) { + errorAttrs.put(ERROR_CODE, errorDetails.getCode()); + } + if (errorDetails.getMessage() != null) { + errorAttrs.put(ERROR_MESSAGE, errorDetails.getMessage()); + } + span.addEvent(GEN_AI_VOICE_ERROR, errorAttrs.build()); + } + } + } + + private void updateAudioFormatsFromResponse(VoiceLiveSessionResponse session) { + if (session == null) { + return; + } + if (session.getInputAudioFormat() != null) { + inputAudioFormat.set(session.getInputAudioFormat().toString()); + } + if (session.getOutputAudioFormat() != null) { + outputAudioFormat.set(session.getOutputAudioFormat().toString()); + } + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java index 6e615f120e95..76aab10bf839 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java @@ -9,6 +9,8 @@ requires io.netty.buffer; requires io.netty.codec.http; requires io.netty.resolver; + requires io.opentelemetry.api; + requires io.opentelemetry.context; exports com.azure.ai.voicelive; exports com.azure.ai.voicelive.models; diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java index b006cc42c402..39af028010e8 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java @@ -38,6 +38,7 @@ import com.azure.identity.AzureCliCredentialBuilder; import com.azure.identity.DefaultAzureCredentialBuilder; import com.fasterxml.jackson.databind.ObjectMapper; +import io.opentelemetry.api.OpenTelemetry; import reactor.core.publisher.Mono; import java.io.IOException; @@ -402,6 +403,34 @@ public void functionCalling() { // END: com.azure.ai.voicelive.functioncalling } + /** + * Tracing: automatic via GlobalOpenTelemetry + */ + public void tracingAutomatic() { + // BEGIN: com.azure.ai.voicelive.tracing.automatic + // No special configuration needed — tracing is picked up from GlobalOpenTelemetry + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildAsyncClient(); + // END: com.azure.ai.voicelive.tracing.automatic + } + + /** + * Tracing: explicit OpenTelemetry instance + */ + public void tracingExplicit() { + OpenTelemetry otel = OpenTelemetry.noop(); // Replace with your configured OpenTelemetry SDK instance + + // BEGIN: com.azure.ai.voicelive.tracing.explicit + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .buildAsyncClient(); + // END: com.azure.ai.voicelive.tracing.explicit + } + // Helper methods private Object parametersSchema = new Object(); diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java new file mode 100644 index 000000000000..6a3e43edbdd7 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive; + +import com.azure.ai.voicelive.models.ClientEventSessionUpdate; +import com.azure.ai.voicelive.models.InteractionModality; +import com.azure.ai.voicelive.models.SessionUpdateResponseDone; +import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; +import com.azure.core.credential.KeyCredential; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.exporter.logging.LoggingSpanExporter; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import reactor.core.publisher.Mono; + +import java.util.Arrays; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +/** + * Sample demonstrating how to enable OpenTelemetry tracing for VoiceLive sessions. + * + *

This runnable sample shows how to configure OpenTelemetry tracing so that every + * connect, send, recv, and close operation emits spans with voice-specific attributes.

+ * + *

Environment Variables Required:

+ *
    + *
  • {@code AZURE_VOICELIVE_ENDPOINT} — The VoiceLive service endpoint URL
  • + *
  • {@code AZURE_VOICELIVE_API_KEY} — The API key for authentication
  • + *
+ * + *

How to Run:

+ *
{@code
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.TelemetrySample" -Dexec.classpathScope=test
+ * }
+ * + *

Span Structure:

+ * When tracing is enabled, the following span hierarchy is emitted: + *
+ * connect gpt-4o-realtime-preview        ← session lifetime
+ * ├── send session.update                ← send spans include event type
+ * ├── send response.create
+ * ├── recv session.created               ← recv spans include event type
+ * ├── recv response.audio.delta
+ * ├── recv response.done                 ← includes token usage
+ * └── close
+ * 
+ * + *

Session-level Attributes (on connect span):

+ *
    + *
  • {@code gen_ai.voice.session_id} — Voice session ID
  • + *
  • {@code gen_ai.voice.turn_count} — Completed response turns
  • + *
  • {@code gen_ai.voice.interruption_count} — User interruptions
  • + *
  • {@code gen_ai.voice.audio_bytes_sent} — Total audio payload bytes sent
  • + *
  • {@code gen_ai.voice.audio_bytes_received} — Total audio payload bytes received
  • + *
  • {@code gen_ai.voice.first_token_latency_ms} — Time to first response
  • + *
+ * + *

Alternative: Automatic tracing via GlobalOpenTelemetry

+ *

If the OpenTelemetry Java agent is attached or {@code GlobalOpenTelemetry} is configured, + * tracing works automatically with no builder configuration needed. The client defaults to + * {@code GlobalOpenTelemetry.getOrNoop()}, which is a zero-cost no-op when no SDK is present.

+ * + *

Alternative: Azure Monitor Integration

+ *
{@code
+ * // Replace LoggingSpanExporter with azure-monitor-opentelemetry-exporter:
+ * AzureMonitorExporterBuilder exporter = new AzureMonitorExporterBuilder()
+ *     .connectionString(System.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"));
+ * SdkTracerProvider tracerProvider = SdkTracerProvider.builder()
+ *     .addSpanProcessor(SimpleSpanProcessor.create(exporter.buildTraceExporter()))
+ *     .build();
+ * }
+ */ +public final class TelemetrySample { + + /** + * Main method to run the telemetry sample. + * + * @param args Unused command line arguments + * @throws InterruptedException if the thread is interrupted while waiting + */ + public static void main(String[] args) throws InterruptedException { + String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); + String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); + + if (endpoint == null || apiKey == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + return; + } + + // 1. Set up OpenTelemetry with a console exporter that prints spans to stdout. + // In production, replace LoggingSpanExporter with OtlpGrpcSpanExporter + // or the Azure Monitor exporter. + SdkTracerProvider tracerProvider = SdkTracerProvider.builder() + .addSpanProcessor(SimpleSpanProcessor.create(LoggingSpanExporter.create())) + .build(); + OpenTelemetry otel = OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .build(); + + // 2. Build client with the explicit OpenTelemetry instance. + // Alternatively, omit .openTelemetry() to use GlobalOpenTelemetry automatically. + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new KeyCredential(apiKey)) + .openTelemetry(otel) + .enableContentRecording(false) // Set true to capture JSON payloads in spans + .buildAsyncClient(); + + System.out.println("Starting traced voice session..."); + + CountDownLatch done = new CountDownLatch(1); + + // 3. Run a short text-mode conversation — all operations are traced automatically. + client.startSession("gpt-4o-realtime-preview") + .flatMap(session -> { + VoiceLiveSessionOptions options = new VoiceLiveSessionOptions() + .setModalities(Arrays.asList(InteractionModality.TEXT)) + .setInstructions("You are a helpful assistant. Be concise."); + + session.receiveEvents() + .subscribe( + event -> { + System.out.println("Event: " + event.getType()); + if (event instanceof SessionUpdateResponseDone) { + session.closeAsync().subscribe(); + done.countDown(); + } + }, + error -> { + System.err.println("Error: " + error.getMessage()); + done.countDown(); + } + ); + + return session.sendEvent(new ClientEventSessionUpdate(options)) + .then(session.startResponse()) + .then(Mono.empty()); + }) + .subscribe(); + + done.await(30, TimeUnit.SECONDS); + + // 4. Shut down the tracer provider to flush remaining spans to console. + tracerProvider.close(); + } + + private TelemetrySample() { + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java index 133b4ae21f32..9470b79edff9 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java @@ -25,6 +25,11 @@ import com.azure.core.credential.TokenCredential; import com.azure.core.util.BinaryData; import com.azure.identity.AzureCliCredentialBuilder; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.exporter.logging.LoggingSpanExporter; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; import reactor.core.publisher.Mono; @@ -84,6 +89,9 @@ * * # With Token Credential: * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--use-token-credential" + * + * # With OpenTelemetry tracing enabled: + * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing" * } */ public final class VoiceAssistantSample { @@ -361,10 +369,12 @@ void shutdown() { public static void main(String[] args) { // Parse command line arguments boolean useTokenCredential = false; + boolean enableTracing = false; for (String arg : args) { if ("--use-token-credential".equals(arg)) { useTokenCredential = true; - break; + } else if ("--enable-tracing".equals(arg)) { + enableTracing = true; } } @@ -389,6 +399,19 @@ public static void main(String[] args) { return; } + // Set up OpenTelemetry tracing if enabled + SdkTracerProvider tracerProvider = null; + OpenTelemetry otel = null; + if (enableTracing) { + tracerProvider = SdkTracerProvider.builder() + .addSpanProcessor(SimpleSpanProcessor.create(LoggingSpanExporter.create())) + .build(); + otel = OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .build(); + System.out.println("📊 OpenTelemetry tracing enabled (console exporter)"); + } + System.out.println("🎙️ Starting Voice Assistant..."); try { @@ -397,16 +420,20 @@ public static void main(String[] args) { System.out.println("🔑 Using Token Credential authentication (Azure CLI)"); System.out.println(" Make sure you have run 'az login' before running this sample"); TokenCredential credential = new AzureCliCredentialBuilder().build(); - runVoiceAssistant(endpoint, credential); + runVoiceAssistant(endpoint, credential, otel); } else { // Use API Key authentication System.out.println("🔑 Using API Key authentication"); - runVoiceAssistant(endpoint, new KeyCredential(apiKey)); + runVoiceAssistant(endpoint, new KeyCredential(apiKey), otel); } System.out.println("✓ Voice Assistant completed successfully"); } catch (Exception e) { System.err.println("❌ Voice Assistant failed: " + e.getMessage()); e.printStackTrace(); + } finally { + if (tracerProvider != null) { + tracerProvider.close(); + } } } @@ -449,6 +476,7 @@ private static void printUsage() { System.err.println(" " + ENV_API_KEY + "= (required if not using --use-token-credential)"); System.err.println("\nOptional:"); System.err.println(" Use --use-token-credential flag to authenticate with Azure CLI (requires 'az login')"); + System.err.println(" Use --enable-tracing flag to enable OpenTelemetry tracing (console exporter)"); } /** @@ -456,17 +484,21 @@ private static void printUsage() { * * @param endpoint The VoiceLive service endpoint * @param credential The API key credential + * @param otel The OpenTelemetry instance, or null to disable tracing */ - private static void runVoiceAssistant(String endpoint, KeyCredential credential) { + private static void runVoiceAssistant(String endpoint, KeyCredential credential, OpenTelemetry otel) { System.out.println("🔧 Initializing VoiceLive client:"); System.out.println(" Endpoint: " + endpoint); // Create the VoiceLive client - VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + VoiceLiveClientBuilder builder = new VoiceLiveClientBuilder() .endpoint(endpoint) .credential(credential) - .serviceVersion(VoiceLiveServiceVersion.V2025_10_01) - .buildAsyncClient(); + .serviceVersion(VoiceLiveServiceVersion.V2025_10_01); + if (otel != null) { + builder.openTelemetry(otel); + } + VoiceLiveAsyncClient client = builder.buildAsyncClient(); runVoiceAssistantWithClient(client); } @@ -476,17 +508,21 @@ private static void runVoiceAssistant(String endpoint, KeyCredential credential) * * @param endpoint The VoiceLive service endpoint * @param credential The token credential + * @param otel The OpenTelemetry instance, or null to disable tracing */ - private static void runVoiceAssistant(String endpoint, TokenCredential credential) { + private static void runVoiceAssistant(String endpoint, TokenCredential credential, OpenTelemetry otel) { System.out.println("🔧 Initializing VoiceLive client:"); System.out.println(" Endpoint: " + endpoint); // Create the VoiceLive client - VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + VoiceLiveClientBuilder builder = new VoiceLiveClientBuilder() .endpoint(endpoint) .credential(credential) - .serviceVersion(VoiceLiveServiceVersion.V2025_10_01) - .buildAsyncClient(); + .serviceVersion(VoiceLiveServiceVersion.V2025_10_01); + if (otel != null) { + builder.openTelemetry(otel); + } + VoiceLiveAsyncClient client = builder.buildAsyncClient(); runVoiceAssistantWithClient(client); } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java index 5055c71b91e5..5bb5499dfb59 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java @@ -6,6 +6,7 @@ import com.azure.core.credential.KeyCredential; import com.azure.core.credential.TokenCredential; import com.azure.core.test.utils.MockTokenCredential; +import io.opentelemetry.api.OpenTelemetry; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -203,4 +204,56 @@ void testBuilderReturnsBuilder() { assertSame(clientBuilder, clientBuilder.credential(mockKeyCredential)); assertSame(clientBuilder, clientBuilder.serviceVersion(VoiceLiveServiceVersion.V2026_01_01_PREVIEW)); } + + @Test + void testBuilderWithExplicitOpenTelemetry() { + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint) + .credential(mockKeyCredential) + .openTelemetry(OpenTelemetry.noop()) + .buildAsyncClient(); + + assertNotNull(client); + }); + } + + @Test + void testBuilderWithNullOpenTelemetryThrows() { + assertThrows(NullPointerException.class, () -> clientBuilder.openTelemetry(null)); + } + + @Test + void testBuilderDefaultsToGlobalOpenTelemetry() { + // When no explicit OpenTelemetry is set, builder should use GlobalOpenTelemetry.getOrNoop() + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client + = clientBuilder.endpoint(endpoint).credential(mockKeyCredential).buildAsyncClient(); + + assertNotNull(client); + }); + } + + @Test + void testBuilderOpenTelemetryReturnsBuilder() { + assertSame(clientBuilder, clientBuilder.openTelemetry(OpenTelemetry.noop())); + } + + @Test + void testBuilderWithOpenTelemetryAndContentRecording() { + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint) + .credential(mockKeyCredential) + .openTelemetry(OpenTelemetry.noop()) + .enableContentRecording(true) + .buildAsyncClient(); + + assertNotNull(client); + }); + } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java new file mode 100644 index 000000000000..dd020015ef6f --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java @@ -0,0 +1,379 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive; + +import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend; +import com.azure.ai.voicelive.models.ClientEventResponseCancel; +import com.azure.ai.voicelive.models.ClientEventResponseCreate; +import com.azure.ai.voicelive.models.SessionUpdate; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.EventData; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests for {@link VoiceLiveTracer} using OpenTelemetry SDK testing utilities. + */ +class VoiceLiveTracerTest { + + private InMemorySpanExporter spanExporter; + private SdkTracerProvider tracerProvider; + private Tracer tracer; + private VoiceLiveTracer voiceLiveTracer; + + @BeforeEach + void setUp() throws Exception { + spanExporter = InMemorySpanExporter.create(); + tracerProvider = SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(spanExporter)).build(); + tracer = tracerProvider.get("azure-ai-voicelive", "1.0.0-beta.6"); + URI endpoint = new URI("wss://test.cognitiveservices.azure.com/voice-live/realtime"); + voiceLiveTracer = new VoiceLiveTracer(tracer, endpoint, "gpt-4o-realtime-preview", null); + } + + @AfterEach + void tearDown() { + tracerProvider.close(); + } + + @Test + void testConnectSpanCreated() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(1, spans.size()); + SpanData span = spans.get(0); + assertEquals("connect gpt-4o-realtime-preview", span.getName()); + assertEquals(SpanKind.CLIENT, span.getKind()); + assertEquals("az.ai.voicelive", span.getAttributes().get(AttributeKey.stringKey("gen_ai.system"))); + assertEquals("connect", span.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name"))); + assertEquals("Microsoft.CognitiveServices", span.getAttributes().get(AttributeKey.stringKey("az.namespace"))); + assertEquals("gpt-4o-realtime-preview", + span.getAttributes().get(AttributeKey.stringKey("gen_ai.request.model"))); + assertEquals("test.cognitiveservices.azure.com", + span.getAttributes().get(AttributeKey.stringKey("server.address"))); + } + + @Test + void testSendSpanCreated() { + voiceLiveTracer.startConnectSpan(); + + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + // Send span finishes first + SpanData sendSpan = spans.get(0); + assertEquals("send response.create", sendSpan.getName()); + assertEquals(SpanKind.CLIENT, sendSpan.getKind()); + assertEquals("send", sendSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name"))); + assertEquals("response.create", + sendSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.event_type"))); + + // Send span should be a child of the connect span + SpanData connectSpan = spans.get(1); + assertEquals(connectSpan.getSpanContext().getTraceId(), sendSpan.getSpanContext().getTraceId()); + assertEquals(connectSpan.getSpanContext().getSpanId(), sendSpan.getParentSpanId()); + } + + @Test + void testRecvSpanCreated() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\"," + + "\"session\":{\"id\":\"sess_123\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + SpanData recvSpan = spans.get(0); + assertEquals("recv session.created", recvSpan.getName()); + assertEquals("recv", recvSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name"))); + + // Verify parent-child relationship + SpanData connectSpan = spans.get(1); + assertEquals(connectSpan.getSpanContext().getSpanId(), recvSpan.getParentSpanId()); + } + + @Test + void testCloseSpanCreated() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + SpanData closeSpan = spans.get(0); + assertEquals("close", closeSpan.getName()); + assertEquals("close", closeSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name"))); + } + + @Test + void testSessionCountersOnEndConnectSpan() { + voiceLiveTracer.startConnectSpan(); + + // Simulate a response.cancel (interruption) + ClientEventResponseCancel cancelEvent = new ClientEventResponseCancel(); + voiceLiveTracer.traceSend(cancelEvent, "{\"type\":\"response.cancel\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + // Find the connect span (last to finish) + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(1L, + connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.interruption_count")).longValue()); + } + + @Test + void testResponseCreateTracksLatency() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send response.create + ClientEventResponseCreate createEvent = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(createEvent, "{\"type\":\"response.create\"}"); + + // Simulate small delay and receive audio delta + Thread.sleep(10); + String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\"," + + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQID\"}"; + SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson)); + voiceLiveTracer.traceRecv(audioDelta, audioJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + Long latency = connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.first_token_latency_ms")); + assertNotNull(latency); + assertTrue(latency >= 0, "Latency should be >= 0, was: " + latency); + } + + @Test + void testAudioBytesTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send audio (base64 of 3 bytes: [1,2,3] = "AQID") + ClientEventInputAudioBufferAppend appendEvent = new ClientEventInputAudioBufferAppend("AQID"); + voiceLiveTracer.traceSend(appendEvent, "{\"type\":\"input_audio_buffer.append\",\"audio\":\"AQID\"}"); + + // Receive audio delta (base64 "AQIDBA==" = 4 bytes) + String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\"," + + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQIDBA==\"}"; + SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson)); + voiceLiveTracer.traceRecv(audioDelta, audioJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(3L, + connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.audio_bytes_sent")).longValue()); + assertEquals(4L, + connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.audio_bytes_received")).longValue()); + } + + @Test + void testTurnCountTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String doneJson = "{\"type\":\"response.done\",\"event_id\":\"evt1\"," + + "\"response\":{\"id\":\"r1\",\"status\":\"completed\",\"output\":[]}}"; + SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson)); + voiceLiveTracer.traceRecv(responseDone, doneJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(1L, connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.turn_count")).longValue()); + } + + @Test + void testTokenUsageOnResponseDone() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String doneJson = "{\"type\":\"response.done\",\"event_id\":\"evt1\"," + + "\"response\":{\"id\":\"r1\",\"status\":\"completed\",\"output\":[]," + + "\"usage\":{\"total_tokens\":150,\"input_tokens\":100,\"output_tokens\":50," + + "\"input_token_details\":{\"cached_tokens\":0,\"text_tokens\":50,\"audio_tokens\":50}," + + "\"output_token_details\":{\"text_tokens\":25,\"audio_tokens\":25}}}}"; + SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson)); + voiceLiveTracer.traceRecv(responseDone, doneJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + // Recv span is first to finish + SpanData recvSpan = spans.get(0); + assertEquals(100L, recvSpan.getAttributes().get(AttributeKey.longKey("gen_ai.usage.input_tokens")).longValue()); + assertEquals(50L, recvSpan.getAttributes().get(AttributeKey.longKey("gen_ai.usage.output_tokens")).longValue()); + } + + @Test + void testErrorEventTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String errorJson = "{\"type\":\"error\",\"event_id\":\"evt1\"," + + "\"error\":{\"type\":\"server_error\",\"code\":\"500\",\"message\":\"Internal error\"}}"; + SessionUpdate errorUpdate = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(errorJson)); + voiceLiveTracer.traceRecv(errorUpdate, errorJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData recvSpan = spans.get(0); + + // Verify error status is set on the span + assertEquals(StatusCode.ERROR, recvSpan.getStatus().getStatusCode()); + assertEquals("Internal error", recvSpan.getStatus().getDescription()); + assertEquals("server_error", recvSpan.getAttributes().get(AttributeKey.stringKey("error.type"))); + + // Verify error event was added + List events = recvSpan.getEvents(); + EventData errorEvent = events.stream() + .filter(e -> "gen_ai.voice.error".equals(e.getName())) + .findFirst() + .orElseThrow(() -> new AssertionError("Expected gen_ai.voice.error event")); + assertEquals("500", errorEvent.getAttributes().get(AttributeKey.stringKey("error.code"))); + assertEquals("Internal error", errorEvent.getAttributes().get(AttributeKey.stringKey("error.message"))); + } + + @Test + void testConnectSpanErrorStatus() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.endConnectSpan(new RuntimeException("Connection lost")); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(0); + assertEquals(StatusCode.ERROR, connectSpan.getStatus().getStatusCode()); + assertEquals("Connection lost", connectSpan.getStatus().getDescription()); + assertEquals("java.lang.RuntimeException", + connectSpan.getAttributes().get(AttributeKey.stringKey("error.type"))); + + // Verify exception was recorded + assertTrue(connectSpan.getEvents().stream().anyMatch(e -> "exception".equals(e.getName()))); + } + + @Test + void testSessionIdFromSessionCreated() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\"," + + "\"session\":{\"id\":\"sess_abc123\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals("sess_abc123", connectSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.session_id"))); + } + + @Test + void testConnectSpanWithoutModel() throws Exception { + VoiceLiveTracer tracerNoModel + = new VoiceLiveTracer(tracer, new URI("wss://test.cognitiveservices.azure.com"), null, null); + + tracerNoModel.startConnectSpan(); + tracerNoModel.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData span = spans.get(0); + assertEquals("connect", span.getName()); + assertFalse(span.getAttributes().asMap().containsKey(AttributeKey.stringKey("gen_ai.request.model"))); + } + + @Test + void testParentChildSpanHierarchy() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + // Recv + String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\"," + + "\"session\":{\"id\":\"sess_123\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + // Close + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(4, spans.size()); + + // All child spans share the same trace ID and reference the connect span as parent + SpanData connectSpan = spans.get(spans.size() - 1); + String traceId = connectSpan.getSpanContext().getTraceId(); + String connectSpanId = connectSpan.getSpanContext().getSpanId(); + + for (int i = 0; i < spans.size() - 1; i++) { + SpanData child = spans.get(i); + assertEquals(traceId, child.getSpanContext().getTraceId(), "Child span should be in same trace"); + assertEquals(connectSpanId, child.getParentSpanId(), "Child span should have connect as parent"); + } + } + + @Test + void testBuilderWithOpenTelemetry() { + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder().endpoint("https://test.cognitiveservices.azure.com") + .credential(new com.azure.core.credential.KeyCredential("fake")) + .openTelemetry(io.opentelemetry.api.OpenTelemetry.noop()) + .buildAsyncClient(); + + assertNotNull(client); + } + + @Test + void testSpanEventsContainAttributes() { + voiceLiveTracer.startConnectSpan(); + + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData sendSpan = spans.get(0); + List events = sendSpan.getEvents(); + assertFalse(events.isEmpty()); + + EventData inputEvent = events.stream() + .filter(e -> "gen_ai.input.messages".equals(e.getName())) + .findFirst() + .orElseThrow(() -> new AssertionError("Expected gen_ai.input.messages event")); + assertEquals("az.ai.voicelive", inputEvent.getAttributes().get(AttributeKey.stringKey("gen_ai.system"))); + assertEquals("response.create", + inputEvent.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.event_type"))); + } +} From 914e9472988ecbe6c62158a7bb6e7aa05303fabb Mon Sep 17 00:00:00 2001 From: xitzhang Date: Wed, 25 Mar 2026 15:41:36 -0700 Subject: [PATCH 2/3] Update sdk/voicelive/azure-ai-voicelive/README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/voicelive/azure-ai-voicelive/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md index e305a6ebabef..30455e0cfa77 100644 --- a/sdk/voicelive/azure-ai-voicelive/README.md +++ b/sdk/voicelive/azure-ai-voicelive/README.md @@ -447,13 +447,13 @@ connect gpt-4o-realtime-preview ← session lifetime span ``` **Session-level attributes** (on the connect span): -- `gen_ai.system` — `openai` +- `gen_ai.system` — `az.ai.voicelive` - `gen_ai.request.model` — Model name (e.g., `gpt-4o-realtime-preview`) - `server.address` — Service endpoint - `gen_ai.voice.session_id` — Voice session ID - `gen_ai.voice.turn_count` — Completed response turns - `gen_ai.voice.interruption_count` — User interruptions -- `gen_ai.voice.audio_bytes_sent` / `audio_bytes_received` — Audio payload bytes +- `gen_ai.voice.audio_bytes_sent` / `gen_ai.voice.audio_bytes_received` — Audio payload bytes - `gen_ai.voice.first_token_latency_ms` — Time to first audio response #### Content recording From f88e342d2ef7a1e3b918decbae8b083d92a1fe6e Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Wed, 25 Mar 2026 15:50:55 -0700 Subject: [PATCH 3/3] Derive tracer version from azure-ai-voicelive.properties at runtime --- .../com/azure/ai/voicelive/VoiceLiveClientBuilder.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java index 41ff285b4f1e..9b6098a96190 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java @@ -21,6 +21,8 @@ import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.Tracer; +import java.util.Map; + /** * Builder for creating instances of {@link VoiceLiveAsyncClient}. */ @@ -28,6 +30,9 @@ public final class VoiceLiveClientBuilder implements TokenCredentialTrait, KeyCredentialTrait, EndpointTrait { private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveClientBuilder.class); + private static final Map PROPERTIES = CoreUtils.getProperties("azure-ai-voicelive.properties"); + private static final String SDK_NAME = "azure-ai-voicelive"; + private static final String SDK_VERSION = PROPERTIES.getOrDefault("version", "unknown"); private URI endpoint; private KeyCredential keyCredential; @@ -166,7 +171,7 @@ public VoiceLiveAsyncClient buildAsyncClient() { HttpHeaders additionalHeaders = CoreUtils.createHttpHeadersFromClientOptions(clientOptions); OpenTelemetry otel = openTelemetry != null ? openTelemetry : GlobalOpenTelemetry.getOrNoop(); - Tracer tracer = otel.getTracer("azure-ai-voicelive", "1.0.0-beta.6"); + Tracer tracer = otel.getTracer(SDK_NAME, SDK_VERSION); if (keyCredential != null) { return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders, tracer,