From eb36b1e3425601b1fba314080e6d8a2131d4436f Mon Sep 17 00:00:00 2001
From: Xiting Zhang
Date: Wed, 25 Mar 2026 14:59:31 -0700
Subject: [PATCH 1/3] [VoiceLive] Add built-in OpenTelemetry tracing support
---
sdk/voicelive/azure-ai-voicelive/CHANGELOG.md | 9 +
sdk/voicelive/azure-ai-voicelive/README.md | 75 +++
.../checkstyle-suppressions.xml | 12 +
sdk/voicelive/azure-ai-voicelive/pom.xml | 42 ++
.../ai/voicelive/VoiceLiveAsyncClient.java | 94 ++--
.../ai/voicelive/VoiceLiveClientBuilder.java | 49 +-
.../VoiceLiveSessionAsyncClient.java | 67 ++-
.../azure/ai/voicelive/VoiceLiveTracer.java | 500 ++++++++++++++++++
.../src/main/java/module-info.java | 2 +
.../com/azure/ai/voicelive/ReadmeSamples.java | 29 +
.../azure/ai/voicelive/TelemetrySample.java | 152 ++++++
.../ai/voicelive/VoiceAssistantSample.java | 58 +-
.../voicelive/VoiceLiveClientBuilderTest.java | 53 ++
.../ai/voicelive/VoiceLiveTracerTest.java | 379 +++++++++++++
14 files changed, 1471 insertions(+), 50 deletions(-)
create mode 100644 sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java
create mode 100644 sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java
create mode 100644 sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java
diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
index 5969b2be656e..e662e467a646 100644
--- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
+++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
@@ -4,6 +4,15 @@
### Features Added
+- Added built-in OpenTelemetry tracing support for voice sessions following GenAI Semantic Conventions:
+ - `VoiceLiveClientBuilder.openTelemetry(OpenTelemetry)` method for providing a custom OpenTelemetry instance
+ - Defaults to `GlobalOpenTelemetry.getOrNoop()` for automatic Java agent detection with zero-cost no-op fallback
+ - Emits spans for `connect`, `send`, `recv`, and `close` operations with voice-specific attributes
+ - Session-level counters: turn count, interruption count, audio bytes sent/received, first token latency
+ - Per-message attributes: token usage, event types, error details
+ - Content recording controlled via `enableContentRecording(boolean)` or `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable
+- Added `TelemetrySample.java` demonstrating OpenTelemetry integration patterns
+
### Breaking Changes
### Bugs Fixed
diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md
index a57e3e5cb387..e305a6ebabef 100644
--- a/sdk/voicelive/azure-ai-voicelive/README.md
+++ b/sdk/voicelive/azure-ai-voicelive/README.md
@@ -126,6 +126,7 @@ The following sections provide code snippets for common scenarios:
* [Handle event types](#handle-event-types)
* [Voice configuration](#voice-configuration)
* [Function calling](#function-calling)
+* [Telemetry and tracing](#telemetry-and-tracing)
* [Complete voice assistant with microphone](#complete-voice-assistant-with-microphone)
### Focused Sample Files
@@ -166,6 +167,12 @@ For easier learning, explore these focused samples in order:
- Execute functions locally and return results
- Continue conversation with function results
+7. **TelemetrySample.java** - OpenTelemetry tracing integration
+ - Automatic tracing via GlobalOpenTelemetry (zero-config)
+ - Explicit OpenTelemetry instance via builder
+ - Span structure and session-level attributes
+ - Azure Monitor integration example
+
> **Note:** To run audio samples (AudioPlaybackSample, MicrophoneInputSample, VoiceAssistantSample, FunctionCallingSample):
> ```bash
> mvn exec:java -Dexec.mainClass=com.azure.ai.voicelive.FunctionCallingSample -Dexec.classpathScope=test
@@ -397,6 +404,74 @@ client.startSession("gpt-4o-realtime-preview")
* Results are sent back to continue the conversation
* See `FunctionCallingSample.java` for a complete working example
+### Telemetry and tracing
+
+The SDK has built-in [OpenTelemetry](https://opentelemetry.io/) tracing that emits spans for every WebSocket operation. When no OpenTelemetry SDK is present, all tracing calls are automatically no-op with zero performance impact.
+
+#### Automatic tracing (recommended)
+
+If the [OpenTelemetry Java agent](https://opentelemetry.io/docs/languages/java/automatic/) is attached, or `GlobalOpenTelemetry` is configured, tracing works automatically with no code changes:
+
+```java com.azure.ai.voicelive.tracing.automatic
+// No special configuration needed — tracing is picked up from GlobalOpenTelemetry
+VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .buildAsyncClient();
+```
+
+#### Explicit OpenTelemetry instance
+
+Provide your own `OpenTelemetry` instance to control trace export:
+
+```java com.azure.ai.voicelive.tracing.explicit
+VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .openTelemetry(otel)
+ .buildAsyncClient();
+```
+
+#### Span structure
+
+When tracing is active, the following span hierarchy is emitted for each voice session:
+
+```
+connect gpt-4o-realtime-preview ← session lifetime span
+├── send session.update ← one span per sent event
+├── send response.create
+├── recv session.created ← one span per received event
+├── recv response.audio.delta
+├── recv response.done ← includes token usage attributes
+└── close
+```
+
+**Session-level attributes** (on the connect span):
+- `gen_ai.system` — `openai`
+- `gen_ai.request.model` — Model name (e.g., `gpt-4o-realtime-preview`)
+- `server.address` — Service endpoint
+- `gen_ai.voice.session_id` — Voice session ID
+- `gen_ai.voice.turn_count` — Completed response turns
+- `gen_ai.voice.interruption_count` — User interruptions
+- `gen_ai.voice.audio_bytes_sent` / `audio_bytes_received` — Audio payload bytes
+- `gen_ai.voice.first_token_latency_ms` — Time to first audio response
+
+#### Content recording
+
+By default, message payloads are not recorded in spans for privacy. Enable content recording via the builder or environment variable:
+
+```java
+// Via builder
+new VoiceLiveClientBuilder()
+ .enableContentRecording(true)
+ // ...
+
+// Or via environment variable
+// AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED=true
+```
+
+> See `TelemetrySample.java` for complete tracing examples including Azure Monitor integration.
+
### Complete voice assistant with microphone
A full example demonstrating real-time microphone input and audio playback:
diff --git a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml
index 6397e9e904dd..957cb8620c20 100644
--- a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml
+++ b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml
@@ -3,6 +3,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/sdk/voicelive/azure-ai-voicelive/pom.xml b/sdk/voicelive/azure-ai-voicelive/pom.xml
index 3eea97993716..bd5e20f8e99b 100644
--- a/sdk/voicelive/azure-ai-voicelive/pom.xml
+++ b/sdk/voicelive/azure-ai-voicelive/pom.xml
@@ -56,6 +56,11 @@ Code generated by Microsoft (R) TypeSpec Code Generator.
azure-core-http-netty
1.16.3
+
+ io.opentelemetry
+ opentelemetry-api
+ 1.58.0
+
com.azure
azure-core-test
@@ -82,5 +87,42 @@ Code generated by Microsoft (R) TypeSpec Code Generator.
3.7.11
test
+
+ io.opentelemetry
+ opentelemetry-sdk
+ 1.58.0
+ test
+
+
+ io.opentelemetry
+ opentelemetry-sdk-testing
+ 1.58.0
+ test
+
+
+ io.opentelemetry
+ opentelemetry-exporter-logging
+ 1.58.0
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 3.6.1
+
+
+
+
+ io.opentelemetry:opentelemetry-api:[1.58.0]
+
+
+
+
+
+
+
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java
index 000fcf98a5cd..ee9ae084b547 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java
@@ -18,6 +18,7 @@
import com.azure.core.http.HttpHeaderName;
import com.azure.core.http.HttpHeaders;
import com.azure.core.util.logging.ClientLogger;
+import io.opentelemetry.api.trace.Tracer;
import reactor.core.publisher.Mono;
@@ -34,6 +35,8 @@ public final class VoiceLiveAsyncClient {
private final TokenCredential tokenCredential;
private final String apiVersion;
private final HttpHeaders additionalHeaders;
+ private final Tracer tracer;
+ private final Boolean enableContentRecording;
/**
* Creates a VoiceLiveAsyncClient with API key authentication.
@@ -44,11 +47,28 @@ public final class VoiceLiveAsyncClient {
* @param additionalHeaders Additional headers to include in requests.
*/
VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders) {
+ this(endpoint, keyCredential, apiVersion, additionalHeaders, null, null);
+ }
+
+ /**
+ * Creates a VoiceLiveAsyncClient with API key authentication and tracing.
+ *
+ * @param endpoint The service endpoint.
+ * @param keyCredential The API key credential.
+ * @param apiVersion The API version.
+ * @param additionalHeaders Additional headers to include in requests.
+ * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer).
+ * @param enableContentRecording Override for content recording, or null to use env var.
+ */
+ VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders,
+ Tracer tracer, Boolean enableContentRecording) {
this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null");
this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null");
this.tokenCredential = null;
this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null");
this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders();
+ this.tracer = tracer;
+ this.enableContentRecording = enableContentRecording;
}
/**
@@ -61,11 +81,28 @@ public final class VoiceLiveAsyncClient {
*/
VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion,
HttpHeaders additionalHeaders) {
+ this(endpoint, tokenCredential, apiVersion, additionalHeaders, null, null);
+ }
+
+ /**
+ * Creates a VoiceLiveAsyncClient with token authentication and tracing.
+ *
+ * @param endpoint The service endpoint.
+ * @param tokenCredential The token credential.
+ * @param apiVersion The API version.
+ * @param additionalHeaders Additional headers to include in requests.
+ * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer).
+ * @param enableContentRecording Override for content recording, or null to use env var.
+ */
+ VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion,
+ HttpHeaders additionalHeaders, Tracer tracer, Boolean enableContentRecording) {
this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null");
this.keyCredential = null;
this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null");
this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null");
this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders();
+ this.tracer = tracer;
+ this.enableContentRecording = enableContentRecording;
}
/**
@@ -79,12 +116,7 @@ public Mono startSession(String model) {
Objects.requireNonNull(model, "'model' cannot be null");
return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, model)).flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model);
return session.connect(additionalHeaders).thenReturn(session);
});
}
@@ -97,12 +129,7 @@ public Mono startSession(String model) {
*/
public Mono startSession() {
return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null)).flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null);
return session.connect(additionalHeaders).thenReturn(session);
});
}
@@ -122,12 +149,7 @@ public Mono startSession(String model, VoiceLiveReq
return Mono
.fromCallable(() -> convertToWebSocketEndpoint(endpoint, model, requestOptions.getCustomQueryParameters()))
.flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model);
// Merge additional headers with custom headers from requestOptions
HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders());
return session.connect(mergedHeaders).thenReturn(session);
@@ -148,12 +170,7 @@ public Mono startSession(VoiceLiveRequestOptions re
return Mono
.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, requestOptions.getCustomQueryParameters()))
.flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null);
// Merge additional headers with custom headers from requestOptions
HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders());
return session.connect(mergedHeaders).thenReturn(session);
@@ -176,12 +193,7 @@ public Mono startSession(AgentSessionConfig agentCo
return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, agentConfig.toQueryParameters()))
.flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null);
return session.connect(additionalHeaders).thenReturn(session);
});
}
@@ -211,18 +223,28 @@ public Mono startSession(AgentSessionConfig agentCo
}
return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, mergedParams)).flatMap(wsEndpoint -> {
- VoiceLiveSessionAsyncClient session;
- if (keyCredential != null) {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential);
- } else {
- session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential);
- }
+ VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null);
// Merge additional headers with custom headers from requestOptions
HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders());
return session.connect(mergedHeaders).thenReturn(session);
});
}
+ /**
+ * Creates a VoiceLiveSessionAsyncClient with the appropriate credentials and optional tracing.
+ *
+ * @param wsEndpoint The WebSocket endpoint URI.
+ * @param model The model name, used for tracing span names.
+ * @return A new VoiceLiveSessionAsyncClient instance.
+ */
+ private VoiceLiveSessionAsyncClient createSessionClient(URI wsEndpoint, String model) {
+ if (keyCredential != null) {
+ return new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential, tracer, model, enableContentRecording);
+ } else {
+ return new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential, tracer, model, enableContentRecording);
+ }
+ }
+
/**
* Merges two HttpHeaders objects, with custom headers taking precedence.
*
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
index e655bde45d91..41ff285b4f1e 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
@@ -17,6 +17,9 @@
import com.azure.core.util.ClientOptions;
import com.azure.core.util.CoreUtils;
import com.azure.core.util.logging.ClientLogger;
+import io.opentelemetry.api.GlobalOpenTelemetry;
+import io.opentelemetry.api.OpenTelemetry;
+import io.opentelemetry.api.trace.Tracer;
/**
* Builder for creating instances of {@link VoiceLiveAsyncClient}.
@@ -31,6 +34,8 @@ public final class VoiceLiveClientBuilder implements TokenCredentialTraitIf not set, defaults to {@link GlobalOpenTelemetry#getOrNoop()}, which automatically
+ * uses the OpenTelemetry instance installed by the Java agent (if present), or a no-op
+ * implementation that has zero performance impact.
+ *
+ * When an OpenTelemetry SDK is configured (either globally or via this method), the SDK
+ * automatically emits spans for connect, send, recv, and close operations with voice-specific
+ * attributes and session-level counters following GenAI Semantic Conventions.
+ *
+ * @param openTelemetry The OpenTelemetry instance.
+ * @return The updated VoiceLiveClientBuilder instance.
+ * @throws NullPointerException if {@code openTelemetry} is null.
+ */
+ public VoiceLiveClientBuilder openTelemetry(OpenTelemetry openTelemetry) {
+ this.openTelemetry = Objects.requireNonNull(openTelemetry, "'openTelemetry' cannot be null");
+ return this;
+ }
+
+ /**
+ * Enables or disables content recording in trace spans.
+ *
+ * When enabled, full JSON payloads (including audio data) will be captured in span events.
+ * This is off by default for privacy. Can also be controlled via the
+ * {@code AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED} environment variable.
+ *
+ * @param enableContentRecording true to enable content recording in spans.
+ * @return The updated VoiceLiveClientBuilder instance.
+ */
+ public VoiceLiveClientBuilder enableContentRecording(boolean enableContentRecording) {
+ this.enableContentRecording = enableContentRecording;
+ return this;
+ }
+
/**
* Builds a {@link VoiceLiveAsyncClient} instance with the configured options.
*
@@ -125,10 +165,15 @@ public VoiceLiveAsyncClient buildAsyncClient() {
VoiceLiveServiceVersion version = serviceVersion != null ? serviceVersion : VoiceLiveServiceVersion.getLatest();
HttpHeaders additionalHeaders = CoreUtils.createHttpHeadersFromClientOptions(clientOptions);
+ OpenTelemetry otel = openTelemetry != null ? openTelemetry : GlobalOpenTelemetry.getOrNoop();
+ Tracer tracer = otel.getTracer("azure-ai-voicelive", "1.0.0-beta.6");
+
if (keyCredential != null) {
- return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders);
+ return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders, tracer,
+ enableContentRecording);
} else {
- return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders);
+ return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders, tracer,
+ enableContentRecording);
}
}
}
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java
index d1bc2f098e03..4622391b9208 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java
@@ -39,6 +39,7 @@
import io.netty.buffer.Unpooled;
import io.netty.handler.codec.http.websocketx.TextWebSocketFrame;
import io.netty.handler.codec.http.websocketx.WebSocketFrame;
+import io.opentelemetry.api.trace.Tracer;
import reactor.core.Disposable;
import reactor.core.publisher.BufferOverflowStrategy;
import reactor.core.publisher.Flux;
@@ -95,6 +96,7 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl
private final KeyCredential keyCredential;
private final TokenCredential tokenCredential;
private final SerializerAdapter serializer;
+ private final VoiceLiveTracer voiceLiveTracer;
private final AtomicBoolean isConnected = new AtomicBoolean(false);
private final AtomicBoolean isClosed = new AtomicBoolean(false);
@@ -122,10 +124,26 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl
* @param keyCredential The API key credential.
*/
VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential) {
+ this(endpoint, keyCredential, null, null, null);
+ }
+
+ /**
+ * Creates a new VoiceLiveSessionAsyncClient with API key authentication and tracing.
+ *
+ * @param endpoint The WebSocket endpoint.
+ * @param keyCredential The API key credential.
+ * @param tracer The OpenTelemetry Tracer (may be a no-op tracer).
+ * @param model The model name for span naming.
+ * @param enableContentRecording Override for content recording, or null to use env var.
+ */
+ VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential, Tracer tracer, String model,
+ Boolean enableContentRecording) {
this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null");
this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null");
this.tokenCredential = null;
this.serializer = JacksonAdapter.createDefaultSerializerAdapter();
+ this.voiceLiveTracer
+ = tracer != null ? new VoiceLiveTracer(tracer, endpoint, model, enableContentRecording) : null;
}
/**
@@ -135,10 +153,26 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl
* @param tokenCredential The token credential.
*/
VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential) {
+ this(endpoint, tokenCredential, null, null, null);
+ }
+
+ /**
+ * Creates a new VoiceLiveSessionAsyncClient with token authentication and tracing.
+ *
+ * @param endpoint The WebSocket endpoint.
+ * @param tokenCredential The token credential.
+ * @param tracer The OpenTelemetry Tracer (may be a no-op tracer).
+ * @param model The model name for span naming.
+ * @param enableContentRecording Override for content recording, or null to use env var.
+ */
+ VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential, Tracer tracer, String model,
+ Boolean enableContentRecording) {
this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null");
this.keyCredential = null;
this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null");
this.serializer = JacksonAdapter.createDefaultSerializerAdapter();
+ this.voiceLiveTracer
+ = tracer != null ? new VoiceLiveTracer(tracer, endpoint, model, enableContentRecording) : null;
}
/**
@@ -160,6 +194,11 @@ Mono connect(HttpHeaders additionalHeaders) {
return Mono.error(new IllegalStateException("Session lifecycle already active"));
}
+ // Start the connect span (session lifetime)
+ if (voiceLiveTracer != null) {
+ voiceLiveTracer.startConnectSpan();
+ }
+
Sinks.One readySink = Sinks.one();
Sinks.One closeSignal = Sinks.one();
connectionCloseSignalRef.set(closeSignal);
@@ -263,6 +302,11 @@ Mono connect(HttpHeaders additionalHeaders) {
readySink.tryEmitError(error);
connectionCloseSignalRef.compareAndSet(closeSignal, null);
disposeLifecycleSubscription();
+
+ // End the connect span on error
+ if (voiceLiveTracer != null) {
+ voiceLiveTracer.endConnectSpan(error);
+ }
}, () -> {
LOGGER.info("WebSocket handler completed");
connectionCloseSignalRef.compareAndSet(closeSignal, null);
@@ -281,6 +325,13 @@ Mono connect(HttpHeaders additionalHeaders) {
public Mono closeAsync() {
if (isClosed.compareAndSet(false, true)) {
LOGGER.info("Closing VoiceLive session");
+
+ // Trace the close operation and end the connect span
+ if (voiceLiveTracer != null) {
+ voiceLiveTracer.traceClose();
+ voiceLiveTracer.endConnectSpan(null);
+ }
+
sendSink.tryEmitComplete();
Sinks.One closeSignal = connectionCloseSignalRef.getAndSet(null);
@@ -351,6 +402,12 @@ public Mono sendEvent(ClientEvent event) {
return Mono.fromCallable(() -> {
try {
String json = serializer.serialize(event, SerializerEncoding.JSON);
+
+ // Trace the send operation
+ if (voiceLiveTracer != null) {
+ voiceLiveTracer.traceSend(event, json);
+ }
+
return BinaryData.fromString(json);
} catch (IOException e) {
throw LOGGER.logExceptionAsError(new RuntimeException("Failed to serialize event", e));
@@ -401,7 +458,15 @@ public Flux receiveEvents() {
.onBackpressureBuffer(INBOUND_BUFFER_CAPACITY,
dropped -> LOGGER.error("Inbound buffer overflow; dropped {} bytes", dropped.toBytes().length),
BufferOverflowStrategy.ERROR)
- .flatMap(this::parseToSessionUpdate)
+ .flatMap(data -> {
+ String rawPayload = data.toString();
+ return parseToSessionUpdate(data).doOnNext(update -> {
+ // Trace the recv operation
+ if (voiceLiveTracer != null) {
+ voiceLiveTracer.traceRecv(update, rawPayload);
+ }
+ });
+ })
.doOnError(error -> LOGGER.error("Failed to parse session update", error))
.onErrorResume(error -> {
LOGGER.warning("Skipping unrecognized server event: {}", error.getMessage());
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java
new file mode 100644
index 000000000000..983ca8eef213
--- /dev/null
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java
@@ -0,0 +1,500 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.voicelive;
+
+import com.azure.ai.voicelive.models.ClientEvent;
+import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend;
+import com.azure.ai.voicelive.models.ClientEventResponseCancel;
+import com.azure.ai.voicelive.models.ClientEventResponseCreate;
+import com.azure.ai.voicelive.models.ClientEventSessionUpdate;
+import com.azure.ai.voicelive.models.ResponseTokenStatistics;
+import com.azure.ai.voicelive.models.SessionResponse;
+import com.azure.ai.voicelive.models.SessionUpdate;
+import com.azure.ai.voicelive.models.SessionUpdateError;
+import com.azure.ai.voicelive.models.SessionUpdateErrorDetails;
+import com.azure.ai.voicelive.models.SessionUpdateResponseAudioDelta;
+import com.azure.ai.voicelive.models.SessionUpdateResponseDone;
+import com.azure.ai.voicelive.models.SessionUpdateSessionCreated;
+import com.azure.ai.voicelive.models.SessionUpdateSessionUpdated;
+import com.azure.ai.voicelive.models.VoiceLiveSessionOptions;
+import com.azure.ai.voicelive.models.VoiceLiveSessionResponse;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.ConfigurationProperty;
+import com.azure.core.util.ConfigurationPropertyBuilder;
+import com.azure.core.util.logging.ClientLogger;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.api.common.AttributesBuilder;
+import io.opentelemetry.api.trace.Span;
+import io.opentelemetry.api.trace.SpanBuilder;
+import io.opentelemetry.api.trace.SpanKind;
+import io.opentelemetry.api.trace.StatusCode;
+import io.opentelemetry.api.trace.Tracer;
+import io.opentelemetry.context.Context;
+
+import java.net.URI;
+import java.util.Base64;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Tracer for VoiceLive WebSocket sessions using the OpenTelemetry API.
+ *
+ * Manages a parent "connect" span for the session lifetime, with child spans for
+ * send, recv, and close operations. Tracks session-level counters for audio bytes,
+ * turn counts, interruptions, and first-token latency.
+ *
+ */
+final class VoiceLiveTracer {
+
+ private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveTracer.class);
+
+ // GenAI semantic convention attribute keys
+ static final AttributeKey GEN_AI_SYSTEM = AttributeKey.stringKey("gen_ai.system");
+ static final String GEN_AI_SYSTEM_VALUE = "az.ai.voicelive";
+ static final AttributeKey GEN_AI_OPERATION_NAME = AttributeKey.stringKey("gen_ai.operation.name");
+ static final AttributeKey GEN_AI_REQUEST_MODEL = AttributeKey.stringKey("gen_ai.request.model");
+ static final AttributeKey AZ_NAMESPACE = AttributeKey.stringKey("az.namespace");
+ static final String AZ_NAMESPACE_VALUE = "Microsoft.CognitiveServices";
+ static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address");
+ static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port");
+
+ // Voice-specific attribute keys
+ static final AttributeKey GEN_AI_VOICE_SESSION_ID = AttributeKey.stringKey("gen_ai.voice.session_id");
+ static final AttributeKey GEN_AI_VOICE_INPUT_AUDIO_FORMAT
+ = AttributeKey.stringKey("gen_ai.voice.input_audio_format");
+ static final AttributeKey GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT
+ = AttributeKey.stringKey("gen_ai.voice.output_audio_format");
+ static final AttributeKey GEN_AI_VOICE_TURN_COUNT = AttributeKey.longKey("gen_ai.voice.turn_count");
+ static final AttributeKey GEN_AI_VOICE_INTERRUPTION_COUNT
+ = AttributeKey.longKey("gen_ai.voice.interruption_count");
+ static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_SENT
+ = AttributeKey.longKey("gen_ai.voice.audio_bytes_sent");
+ static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_RECEIVED
+ = AttributeKey.longKey("gen_ai.voice.audio_bytes_received");
+ static final AttributeKey GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS
+ = AttributeKey.longKey("gen_ai.voice.first_token_latency_ms");
+ static final AttributeKey GEN_AI_VOICE_EVENT_TYPE = AttributeKey.stringKey("gen_ai.voice.event_type");
+ static final AttributeKey GEN_AI_VOICE_MESSAGE_SIZE = AttributeKey.longKey("gen_ai.voice.message_size");
+ static final AttributeKey GEN_AI_USAGE_INPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.input_tokens");
+ static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.output_tokens");
+ static final AttributeKey ERROR_TYPE = AttributeKey.stringKey("error.type");
+
+ // Span event names
+ static final String GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages";
+ static final String GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages";
+ static final String GEN_AI_VOICE_ERROR = "gen_ai.voice.error";
+
+ // Event attribute keys
+ private static final AttributeKey EVENT_CONTENT = AttributeKey.stringKey("gen_ai.event.content");
+ private static final AttributeKey ERROR_CODE = AttributeKey.stringKey("error.code");
+ private static final AttributeKey ERROR_MESSAGE = AttributeKey.stringKey("error.message");
+
+ // Content recording configuration
+ private static final ConfigurationProperty CAPTURE_MESSAGE_CONTENT
+ = ConfigurationPropertyBuilder.ofBoolean("azure.tracing.gen_ai.content_recording_enabled")
+ .environmentVariableName("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
+ .systemPropertyName("azure.tracing.gen_ai.content_recording_enabled")
+ .shared(true)
+ .defaultValue(false)
+ .build();
+
+ private final Tracer tracer;
+ private final boolean captureContent;
+ private final String serverAddress;
+ private final int serverPort;
+ private final String model;
+
+ // Session lifetime span and its OTel context (for parenting child spans)
+ private final AtomicReference connectSpan = new AtomicReference<>();
+ private final AtomicReference connectContext = new AtomicReference<>();
+
+ // Session-level counters (thread-safe)
+ private final AtomicLong turnCount = new AtomicLong(0);
+ private final AtomicLong interruptionCount = new AtomicLong(0);
+ private final AtomicLong audioBytesSent = new AtomicLong(0);
+ private final AtomicLong audioBytesReceived = new AtomicLong(0);
+
+ // First-token latency tracking
+ private final AtomicLong responseCreateTimestampNanos = new AtomicLong(0);
+ private final AtomicLong firstTokenLatencyMs = new AtomicLong(-1);
+
+ // Session attributes discovered during the session
+ private final AtomicReference sessionId = new AtomicReference<>();
+ private final AtomicReference inputAudioFormat = new AtomicReference<>();
+ private final AtomicReference outputAudioFormat = new AtomicReference<>();
+
+ /**
+ * Creates a VoiceLiveTracer.
+ *
+ * @param tracer The OpenTelemetry Tracer instance (may be a no-op tracer).
+ * @param endpoint The WebSocket endpoint URI.
+ * @param model The model name.
+ * @param captureContentOverride Optional override for content recording (null = use env var).
+ */
+ VoiceLiveTracer(Tracer tracer, URI endpoint, String model, Boolean captureContentOverride) {
+ this.tracer = tracer;
+ this.model = model;
+
+ if (endpoint != null) {
+ this.serverAddress = endpoint.getHost();
+ this.serverPort
+ = endpoint.getPort() == -1 ? ("wss".equals(endpoint.getScheme()) ? 443 : 80) : endpoint.getPort();
+ } else {
+ this.serverAddress = null;
+ this.serverPort = -1;
+ }
+
+ if (captureContentOverride != null) {
+ this.captureContent = captureContentOverride;
+ } else {
+ this.captureContent = Configuration.getGlobalConfiguration().get(CAPTURE_MESSAGE_CONTENT);
+ }
+ }
+
+ // ============================================================================
+ // Connect Span (session lifetime)
+ // ============================================================================
+
+ /**
+ * Starts the parent "connect" span for the session lifetime.
+ */
+ void startConnectSpan() {
+ String spanName = model != null ? "connect " + model : "connect";
+
+ SpanBuilder spanBuilder = tracer.spanBuilder(spanName)
+ .setSpanKind(SpanKind.CLIENT)
+ .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .setAttribute(GEN_AI_OPERATION_NAME, "connect")
+ .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE);
+
+ if (model != null) {
+ spanBuilder.setAttribute(GEN_AI_REQUEST_MODEL, model);
+ }
+ if (serverAddress != null) {
+ spanBuilder.setAttribute(SERVER_ADDRESS, serverAddress);
+ if (serverPort != 443 && serverPort != -1) {
+ spanBuilder.setAttribute(SERVER_PORT, (long) serverPort);
+ }
+ }
+
+ Span span = spanBuilder.startSpan();
+ Context ctx = Context.current().with(span);
+ connectSpan.set(span);
+ connectContext.set(ctx);
+ }
+
+ /**
+ * Ends the connect span, flushing session-level counters as attributes.
+ *
+ * @param error The error that caused the session to close, or null.
+ */
+ void endConnectSpan(Throwable error) {
+ Span span = connectSpan.getAndSet(null);
+ connectContext.set(null);
+ if (span == null) {
+ return;
+ }
+
+ // Flush session-level counters
+ String sid = sessionId.get();
+ if (sid != null) {
+ span.setAttribute(GEN_AI_VOICE_SESSION_ID, sid);
+ }
+ String inFormat = inputAudioFormat.get();
+ if (inFormat != null) {
+ span.setAttribute(GEN_AI_VOICE_INPUT_AUDIO_FORMAT, inFormat);
+ }
+ String outFormat = outputAudioFormat.get();
+ if (outFormat != null) {
+ span.setAttribute(GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT, outFormat);
+ }
+ span.setAttribute(GEN_AI_VOICE_TURN_COUNT, turnCount.get());
+ span.setAttribute(GEN_AI_VOICE_INTERRUPTION_COUNT, interruptionCount.get());
+ span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_SENT, audioBytesSent.get());
+ span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_RECEIVED, audioBytesReceived.get());
+
+ long latency = firstTokenLatencyMs.get();
+ if (latency >= 0) {
+ span.setAttribute(GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS, latency);
+ }
+
+ if (error != null) {
+ span.setStatus(StatusCode.ERROR, error.getMessage());
+ span.recordException(error);
+ span.setAttribute(ERROR_TYPE, error.getClass().getCanonicalName());
+ }
+
+ span.end();
+ }
+
+ // ============================================================================
+ // Send Span
+ // ============================================================================
+
+ /**
+ * Traces a send operation. Creates a child span of the connect span.
+ *
+ * @param event The client event being sent.
+ * @param jsonPayload The serialized JSON payload.
+ */
+ void traceSend(ClientEvent event, String jsonPayload) {
+ Context parentCtx = connectContext.get();
+ if (parentCtx == null) {
+ return;
+ }
+
+ String eventType = event.getType() != null ? event.getType().toString() : "unknown";
+ String spanName = "send " + eventType;
+
+ Span span = tracer.spanBuilder(spanName)
+ .setSpanKind(SpanKind.CLIENT)
+ .setParent(parentCtx)
+ .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .setAttribute(GEN_AI_OPERATION_NAME, "send")
+ .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE)
+ .setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType)
+ .startSpan();
+
+ try {
+ if (span.isRecording()) {
+ if (jsonPayload != null) {
+ span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) jsonPayload.length());
+ }
+
+ // Add span event
+ AttributesBuilder eventAttrs = Attributes.builder()
+ .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .put(GEN_AI_VOICE_EVENT_TYPE, eventType);
+ if (captureContent && jsonPayload != null) {
+ eventAttrs.put(EVENT_CONTENT, jsonPayload);
+ }
+ span.addEvent(GEN_AI_INPUT_MESSAGES, eventAttrs.build());
+
+ // Track session-level counters from sent events
+ trackSendCounters(event);
+ }
+ } finally {
+ span.end();
+ }
+ }
+
+ // ============================================================================
+ // Recv Span
+ // ============================================================================
+
+ /**
+ * Traces a recv operation. Creates a child span of the connect span.
+ *
+ * @param update The parsed session update event.
+ * @param rawPayload The raw JSON payload string (for message size and content recording).
+ */
+ void traceRecv(SessionUpdate update, String rawPayload) {
+ Context parentCtx = connectContext.get();
+ if (parentCtx == null) {
+ return;
+ }
+
+ String eventType = update.getType() != null ? update.getType().toString() : "unknown";
+ String spanName = "recv " + eventType;
+
+ Span span = tracer.spanBuilder(spanName)
+ .setSpanKind(SpanKind.CLIENT)
+ .setParent(parentCtx)
+ .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .setAttribute(GEN_AI_OPERATION_NAME, "recv")
+ .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE)
+ .setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType)
+ .startSpan();
+
+ try {
+ if (span.isRecording()) {
+ if (rawPayload != null) {
+ span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) rawPayload.length());
+ }
+
+ // Track per-message token usage from response.done
+ trackRecvTokenUsage(update, span);
+
+ // Add span event for output messages
+ AttributesBuilder eventAttrs = Attributes.builder()
+ .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .put(GEN_AI_VOICE_EVENT_TYPE, eventType);
+ if (captureContent && rawPayload != null) {
+ eventAttrs.put(EVENT_CONTENT, rawPayload);
+ }
+ span.addEvent(GEN_AI_OUTPUT_MESSAGES, eventAttrs.build());
+
+ // Track error events — set error status on span
+ trackErrorEvents(update, span);
+
+ // Track session-level counters from received events
+ trackRecvCounters(update);
+ }
+ } finally {
+ span.end();
+ }
+ }
+
+ // ============================================================================
+ // Close Span
+ // ============================================================================
+
+ /**
+ * Traces the close operation.
+ */
+ void traceClose() {
+ Context parentCtx = connectContext.get();
+ if (parentCtx == null) {
+ return;
+ }
+
+ Span span = tracer.spanBuilder("close")
+ .setSpanKind(SpanKind.CLIENT)
+ .setParent(parentCtx)
+ .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE)
+ .setAttribute(GEN_AI_OPERATION_NAME, "close")
+ .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE)
+ .startSpan();
+ span.end();
+ }
+
+ // ============================================================================
+ // Counter Tracking
+ // ============================================================================
+
+ private void trackSendCounters(ClientEvent event) {
+ // Track audio bytes sent from input_audio_buffer.append
+ if (event instanceof ClientEventInputAudioBufferAppend) {
+ ClientEventInputAudioBufferAppend appendEvent = (ClientEventInputAudioBufferAppend) event;
+ String audio = appendEvent.getAudio();
+ if (audio != null) {
+ try {
+ byte[] decoded = Base64.getDecoder().decode(audio);
+ audioBytesSent.addAndGet(decoded.length);
+ } catch (IllegalArgumentException e) {
+ LOGGER.atVerbose().log("Failed to decode audio for byte counting", e);
+ }
+ }
+ }
+
+ // Track response.create for first-token latency
+ if (event instanceof ClientEventResponseCreate) {
+ responseCreateTimestampNanos.set(System.nanoTime());
+ firstTokenLatencyMs.set(-1); // Reset for this response
+ }
+
+ // Track interruptions from response.cancel
+ if (event instanceof ClientEventResponseCancel) {
+ interruptionCount.incrementAndGet();
+ }
+
+ // Track audio format from session.update
+ if (event instanceof ClientEventSessionUpdate) {
+ ClientEventSessionUpdate sessionUpdate = (ClientEventSessionUpdate) event;
+ VoiceLiveSessionOptions session = sessionUpdate.getSession();
+ if (session != null) {
+ if (session.getInputAudioFormat() != null) {
+ inputAudioFormat.set(session.getInputAudioFormat().toString());
+ }
+ if (session.getOutputAudioFormat() != null) {
+ outputAudioFormat.set(session.getOutputAudioFormat().toString());
+ }
+ }
+ }
+ }
+
+ private void trackRecvCounters(SessionUpdate update) {
+ // Track session ID from session.created / session.updated
+ if (update instanceof SessionUpdateSessionCreated) {
+ VoiceLiveSessionResponse session = ((SessionUpdateSessionCreated) update).getSession();
+ if (session != null && session.getId() != null) {
+ sessionId.set(session.getId());
+ }
+ }
+ if (update instanceof SessionUpdateSessionUpdated) {
+ VoiceLiveSessionResponse session = ((SessionUpdateSessionUpdated) update).getSession();
+ if (session != null && session.getId() != null) {
+ sessionId.set(session.getId());
+ }
+ }
+
+ // Track audio format from session.created / session.updated responses
+ if (update instanceof SessionUpdateSessionCreated) {
+ updateAudioFormatsFromResponse(((SessionUpdateSessionCreated) update).getSession());
+ }
+ if (update instanceof SessionUpdateSessionUpdated) {
+ updateAudioFormatsFromResponse(((SessionUpdateSessionUpdated) update).getSession());
+ }
+
+ // Track audio bytes received from response.audio.delta
+ if (update instanceof SessionUpdateResponseAudioDelta) {
+ SessionUpdateResponseAudioDelta audioDelta = (SessionUpdateResponseAudioDelta) update;
+ byte[] delta = audioDelta.getDelta();
+ if (delta != null) {
+ audioBytesReceived.addAndGet(delta.length);
+ }
+
+ // First-token latency: measure from response.create to first audio delta
+ long createTs = responseCreateTimestampNanos.get();
+ if (createTs > 0 && firstTokenLatencyMs.get() < 0) {
+ long elapsed = (System.nanoTime() - createTs) / 1_000_000;
+ firstTokenLatencyMs.compareAndSet(-1, elapsed);
+ }
+ }
+
+ // Track turn count from response.done
+ if (update instanceof SessionUpdateResponseDone) {
+ turnCount.incrementAndGet();
+ }
+ }
+
+ private void trackRecvTokenUsage(SessionUpdate update, Span span) {
+ if (update instanceof SessionUpdateResponseDone) {
+ SessionResponse response = ((SessionUpdateResponseDone) update).getResponse();
+ if (response != null) {
+ ResponseTokenStatistics usage = response.getUsage();
+ if (usage != null) {
+ span.setAttribute(GEN_AI_USAGE_INPUT_TOKENS, (long) usage.getInputTokens());
+ span.setAttribute(GEN_AI_USAGE_OUTPUT_TOKENS, (long) usage.getOutputTokens());
+ }
+ }
+ }
+ }
+
+ private void trackErrorEvents(SessionUpdate update, Span span) {
+ if (update instanceof SessionUpdateError) {
+ SessionUpdateError errorUpdate = (SessionUpdateError) update;
+ SessionUpdateErrorDetails errorDetails = errorUpdate.getError();
+ if (errorDetails != null) {
+ // Set error status on the span
+ String message = errorDetails.getMessage() != null ? errorDetails.getMessage() : "Unknown error";
+ span.setStatus(StatusCode.ERROR, message);
+ span.setAttribute(ERROR_TYPE, errorDetails.getType() != null ? errorDetails.getType() : "server_error");
+
+ // Add error event with details
+ AttributesBuilder errorAttrs = Attributes.builder().put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE);
+ if (errorDetails.getCode() != null) {
+ errorAttrs.put(ERROR_CODE, errorDetails.getCode());
+ }
+ if (errorDetails.getMessage() != null) {
+ errorAttrs.put(ERROR_MESSAGE, errorDetails.getMessage());
+ }
+ span.addEvent(GEN_AI_VOICE_ERROR, errorAttrs.build());
+ }
+ }
+ }
+
+ private void updateAudioFormatsFromResponse(VoiceLiveSessionResponse session) {
+ if (session == null) {
+ return;
+ }
+ if (session.getInputAudioFormat() != null) {
+ inputAudioFormat.set(session.getInputAudioFormat().toString());
+ }
+ if (session.getOutputAudioFormat() != null) {
+ outputAudioFormat.set(session.getOutputAudioFormat().toString());
+ }
+ }
+}
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java
index 6e615f120e95..76aab10bf839 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java
@@ -9,6 +9,8 @@
requires io.netty.buffer;
requires io.netty.codec.http;
requires io.netty.resolver;
+ requires io.opentelemetry.api;
+ requires io.opentelemetry.context;
exports com.azure.ai.voicelive;
exports com.azure.ai.voicelive.models;
diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java
index b006cc42c402..39af028010e8 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java
@@ -38,6 +38,7 @@
import com.azure.identity.AzureCliCredentialBuilder;
import com.azure.identity.DefaultAzureCredentialBuilder;
import com.fasterxml.jackson.databind.ObjectMapper;
+import io.opentelemetry.api.OpenTelemetry;
import reactor.core.publisher.Mono;
import java.io.IOException;
@@ -402,6 +403,34 @@ public void functionCalling() {
// END: com.azure.ai.voicelive.functioncalling
}
+ /**
+ * Tracing: automatic via GlobalOpenTelemetry
+ */
+ public void tracingAutomatic() {
+ // BEGIN: com.azure.ai.voicelive.tracing.automatic
+ // No special configuration needed — tracing is picked up from GlobalOpenTelemetry
+ VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .buildAsyncClient();
+ // END: com.azure.ai.voicelive.tracing.automatic
+ }
+
+ /**
+ * Tracing: explicit OpenTelemetry instance
+ */
+ public void tracingExplicit() {
+ OpenTelemetry otel = OpenTelemetry.noop(); // Replace with your configured OpenTelemetry SDK instance
+
+ // BEGIN: com.azure.ai.voicelive.tracing.explicit
+ VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .openTelemetry(otel)
+ .buildAsyncClient();
+ // END: com.azure.ai.voicelive.tracing.explicit
+ }
+
// Helper methods
private Object parametersSchema = new Object();
diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java
new file mode 100644
index 000000000000..6a3e43edbdd7
--- /dev/null
+++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/TelemetrySample.java
@@ -0,0 +1,152 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.voicelive;
+
+import com.azure.ai.voicelive.models.ClientEventSessionUpdate;
+import com.azure.ai.voicelive.models.InteractionModality;
+import com.azure.ai.voicelive.models.SessionUpdateResponseDone;
+import com.azure.ai.voicelive.models.VoiceLiveSessionOptions;
+import com.azure.core.credential.KeyCredential;
+import io.opentelemetry.api.OpenTelemetry;
+import io.opentelemetry.exporter.logging.LoggingSpanExporter;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.trace.SdkTracerProvider;
+import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor;
+import reactor.core.publisher.Mono;
+
+import java.util.Arrays;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Sample demonstrating how to enable OpenTelemetry tracing for VoiceLive sessions.
+ *
+ * This runnable sample shows how to configure OpenTelemetry tracing so that every
+ * connect, send, recv, and close operation emits spans with voice-specific attributes.
+ *
+ * Environment Variables Required:
+ *
+ * - {@code AZURE_VOICELIVE_ENDPOINT} — The VoiceLive service endpoint URL
+ * - {@code AZURE_VOICELIVE_API_KEY} — The API key for authentication
+ *
+ *
+ * How to Run:
+ * {@code
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.TelemetrySample" -Dexec.classpathScope=test
+ * }
+ *
+ * Span Structure:
+ * When tracing is enabled, the following span hierarchy is emitted:
+ *
+ * connect gpt-4o-realtime-preview ← session lifetime
+ * ├── send session.update ← send spans include event type
+ * ├── send response.create
+ * ├── recv session.created ← recv spans include event type
+ * ├── recv response.audio.delta
+ * ├── recv response.done ← includes token usage
+ * └── close
+ *
+ *
+ * Session-level Attributes (on connect span):
+ *
+ * - {@code gen_ai.voice.session_id} — Voice session ID
+ * - {@code gen_ai.voice.turn_count} — Completed response turns
+ * - {@code gen_ai.voice.interruption_count} — User interruptions
+ * - {@code gen_ai.voice.audio_bytes_sent} — Total audio payload bytes sent
+ * - {@code gen_ai.voice.audio_bytes_received} — Total audio payload bytes received
+ * - {@code gen_ai.voice.first_token_latency_ms} — Time to first response
+ *
+ *
+ * Alternative: Automatic tracing via GlobalOpenTelemetry
+ * If the OpenTelemetry Java agent is attached or {@code GlobalOpenTelemetry} is configured,
+ * tracing works automatically with no builder configuration needed. The client defaults to
+ * {@code GlobalOpenTelemetry.getOrNoop()}, which is a zero-cost no-op when no SDK is present.
+ *
+ * Alternative: Azure Monitor Integration
+ * {@code
+ * // Replace LoggingSpanExporter with azure-monitor-opentelemetry-exporter:
+ * AzureMonitorExporterBuilder exporter = new AzureMonitorExporterBuilder()
+ * .connectionString(System.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"));
+ * SdkTracerProvider tracerProvider = SdkTracerProvider.builder()
+ * .addSpanProcessor(SimpleSpanProcessor.create(exporter.buildTraceExporter()))
+ * .build();
+ * }
+ */
+public final class TelemetrySample {
+
+ /**
+ * Main method to run the telemetry sample.
+ *
+ * @param args Unused command line arguments
+ * @throws InterruptedException if the thread is interrupted while waiting
+ */
+ public static void main(String[] args) throws InterruptedException {
+ String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT");
+ String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables");
+ return;
+ }
+
+ // 1. Set up OpenTelemetry with a console exporter that prints spans to stdout.
+ // In production, replace LoggingSpanExporter with OtlpGrpcSpanExporter
+ // or the Azure Monitor exporter.
+ SdkTracerProvider tracerProvider = SdkTracerProvider.builder()
+ .addSpanProcessor(SimpleSpanProcessor.create(LoggingSpanExporter.create()))
+ .build();
+ OpenTelemetry otel = OpenTelemetrySdk.builder()
+ .setTracerProvider(tracerProvider)
+ .build();
+
+ // 2. Build client with the explicit OpenTelemetry instance.
+ // Alternatively, omit .openTelemetry() to use GlobalOpenTelemetry automatically.
+ VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .openTelemetry(otel)
+ .enableContentRecording(false) // Set true to capture JSON payloads in spans
+ .buildAsyncClient();
+
+ System.out.println("Starting traced voice session...");
+
+ CountDownLatch done = new CountDownLatch(1);
+
+ // 3. Run a short text-mode conversation — all operations are traced automatically.
+ client.startSession("gpt-4o-realtime-preview")
+ .flatMap(session -> {
+ VoiceLiveSessionOptions options = new VoiceLiveSessionOptions()
+ .setModalities(Arrays.asList(InteractionModality.TEXT))
+ .setInstructions("You are a helpful assistant. Be concise.");
+
+ session.receiveEvents()
+ .subscribe(
+ event -> {
+ System.out.println("Event: " + event.getType());
+ if (event instanceof SessionUpdateResponseDone) {
+ session.closeAsync().subscribe();
+ done.countDown();
+ }
+ },
+ error -> {
+ System.err.println("Error: " + error.getMessage());
+ done.countDown();
+ }
+ );
+
+ return session.sendEvent(new ClientEventSessionUpdate(options))
+ .then(session.startResponse())
+ .then(Mono.empty());
+ })
+ .subscribe();
+
+ done.await(30, TimeUnit.SECONDS);
+
+ // 4. Shut down the tracer provider to flush remaining spans to console.
+ tracerProvider.close();
+ }
+
+ private TelemetrySample() {
+ }
+}
diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java
index 133b4ae21f32..9470b79edff9 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java
@@ -25,6 +25,11 @@
import com.azure.core.credential.TokenCredential;
import com.azure.core.util.BinaryData;
import com.azure.identity.AzureCliCredentialBuilder;
+import io.opentelemetry.api.OpenTelemetry;
+import io.opentelemetry.exporter.logging.LoggingSpanExporter;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.trace.SdkTracerProvider;
+import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor;
import reactor.core.publisher.Mono;
@@ -84,6 +89,9 @@
*
* # With Token Credential:
* mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--use-token-credential"
+ *
+ * # With OpenTelemetry tracing enabled:
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing"
* }
*/
public final class VoiceAssistantSample {
@@ -361,10 +369,12 @@ void shutdown() {
public static void main(String[] args) {
// Parse command line arguments
boolean useTokenCredential = false;
+ boolean enableTracing = false;
for (String arg : args) {
if ("--use-token-credential".equals(arg)) {
useTokenCredential = true;
- break;
+ } else if ("--enable-tracing".equals(arg)) {
+ enableTracing = true;
}
}
@@ -389,6 +399,19 @@ public static void main(String[] args) {
return;
}
+ // Set up OpenTelemetry tracing if enabled
+ SdkTracerProvider tracerProvider = null;
+ OpenTelemetry otel = null;
+ if (enableTracing) {
+ tracerProvider = SdkTracerProvider.builder()
+ .addSpanProcessor(SimpleSpanProcessor.create(LoggingSpanExporter.create()))
+ .build();
+ otel = OpenTelemetrySdk.builder()
+ .setTracerProvider(tracerProvider)
+ .build();
+ System.out.println("📊 OpenTelemetry tracing enabled (console exporter)");
+ }
+
System.out.println("🎙️ Starting Voice Assistant...");
try {
@@ -397,16 +420,20 @@ public static void main(String[] args) {
System.out.println("🔑 Using Token Credential authentication (Azure CLI)");
System.out.println(" Make sure you have run 'az login' before running this sample");
TokenCredential credential = new AzureCliCredentialBuilder().build();
- runVoiceAssistant(endpoint, credential);
+ runVoiceAssistant(endpoint, credential, otel);
} else {
// Use API Key authentication
System.out.println("🔑 Using API Key authentication");
- runVoiceAssistant(endpoint, new KeyCredential(apiKey));
+ runVoiceAssistant(endpoint, new KeyCredential(apiKey), otel);
}
System.out.println("✓ Voice Assistant completed successfully");
} catch (Exception e) {
System.err.println("❌ Voice Assistant failed: " + e.getMessage());
e.printStackTrace();
+ } finally {
+ if (tracerProvider != null) {
+ tracerProvider.close();
+ }
}
}
@@ -449,6 +476,7 @@ private static void printUsage() {
System.err.println(" " + ENV_API_KEY + "= (required if not using --use-token-credential)");
System.err.println("\nOptional:");
System.err.println(" Use --use-token-credential flag to authenticate with Azure CLI (requires 'az login')");
+ System.err.println(" Use --enable-tracing flag to enable OpenTelemetry tracing (console exporter)");
}
/**
@@ -456,17 +484,21 @@ private static void printUsage() {
*
* @param endpoint The VoiceLive service endpoint
* @param credential The API key credential
+ * @param otel The OpenTelemetry instance, or null to disable tracing
*/
- private static void runVoiceAssistant(String endpoint, KeyCredential credential) {
+ private static void runVoiceAssistant(String endpoint, KeyCredential credential, OpenTelemetry otel) {
System.out.println("🔧 Initializing VoiceLive client:");
System.out.println(" Endpoint: " + endpoint);
// Create the VoiceLive client
- VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ VoiceLiveClientBuilder builder = new VoiceLiveClientBuilder()
.endpoint(endpoint)
.credential(credential)
- .serviceVersion(VoiceLiveServiceVersion.V2025_10_01)
- .buildAsyncClient();
+ .serviceVersion(VoiceLiveServiceVersion.V2025_10_01);
+ if (otel != null) {
+ builder.openTelemetry(otel);
+ }
+ VoiceLiveAsyncClient client = builder.buildAsyncClient();
runVoiceAssistantWithClient(client);
}
@@ -476,17 +508,21 @@ private static void runVoiceAssistant(String endpoint, KeyCredential credential)
*
* @param endpoint The VoiceLive service endpoint
* @param credential The token credential
+ * @param otel The OpenTelemetry instance, or null to disable tracing
*/
- private static void runVoiceAssistant(String endpoint, TokenCredential credential) {
+ private static void runVoiceAssistant(String endpoint, TokenCredential credential, OpenTelemetry otel) {
System.out.println("🔧 Initializing VoiceLive client:");
System.out.println(" Endpoint: " + endpoint);
// Create the VoiceLive client
- VoiceLiveAsyncClient client = new VoiceLiveClientBuilder()
+ VoiceLiveClientBuilder builder = new VoiceLiveClientBuilder()
.endpoint(endpoint)
.credential(credential)
- .serviceVersion(VoiceLiveServiceVersion.V2025_10_01)
- .buildAsyncClient();
+ .serviceVersion(VoiceLiveServiceVersion.V2025_10_01);
+ if (otel != null) {
+ builder.openTelemetry(otel);
+ }
+ VoiceLiveAsyncClient client = builder.buildAsyncClient();
runVoiceAssistantWithClient(client);
}
diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java
index 5055c71b91e5..5bb5499dfb59 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java
@@ -6,6 +6,7 @@
import com.azure.core.credential.KeyCredential;
import com.azure.core.credential.TokenCredential;
import com.azure.core.test.utils.MockTokenCredential;
+import io.opentelemetry.api.OpenTelemetry;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -203,4 +204,56 @@ void testBuilderReturnsBuilder() {
assertSame(clientBuilder, clientBuilder.credential(mockKeyCredential));
assertSame(clientBuilder, clientBuilder.serviceVersion(VoiceLiveServiceVersion.V2026_01_01_PREVIEW));
}
+
+ @Test
+ void testBuilderWithExplicitOpenTelemetry() {
+ String endpoint = "https://test.cognitiveservices.azure.com";
+
+ assertDoesNotThrow(() -> {
+ VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint)
+ .credential(mockKeyCredential)
+ .openTelemetry(OpenTelemetry.noop())
+ .buildAsyncClient();
+
+ assertNotNull(client);
+ });
+ }
+
+ @Test
+ void testBuilderWithNullOpenTelemetryThrows() {
+ assertThrows(NullPointerException.class, () -> clientBuilder.openTelemetry(null));
+ }
+
+ @Test
+ void testBuilderDefaultsToGlobalOpenTelemetry() {
+ // When no explicit OpenTelemetry is set, builder should use GlobalOpenTelemetry.getOrNoop()
+ String endpoint = "https://test.cognitiveservices.azure.com";
+
+ assertDoesNotThrow(() -> {
+ VoiceLiveAsyncClient client
+ = clientBuilder.endpoint(endpoint).credential(mockKeyCredential).buildAsyncClient();
+
+ assertNotNull(client);
+ });
+ }
+
+ @Test
+ void testBuilderOpenTelemetryReturnsBuilder() {
+ assertSame(clientBuilder, clientBuilder.openTelemetry(OpenTelemetry.noop()));
+ }
+
+ @Test
+ void testBuilderWithOpenTelemetryAndContentRecording() {
+ String endpoint = "https://test.cognitiveservices.azure.com";
+
+ assertDoesNotThrow(() -> {
+ VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint)
+ .credential(mockKeyCredential)
+ .openTelemetry(OpenTelemetry.noop())
+ .enableContentRecording(true)
+ .buildAsyncClient();
+
+ assertNotNull(client);
+ });
+ }
}
diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java
new file mode 100644
index 000000000000..dd020015ef6f
--- /dev/null
+++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java
@@ -0,0 +1,379 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.voicelive;
+
+import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend;
+import com.azure.ai.voicelive.models.ClientEventResponseCancel;
+import com.azure.ai.voicelive.models.ClientEventResponseCreate;
+import com.azure.ai.voicelive.models.SessionUpdate;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.api.trace.SpanKind;
+import io.opentelemetry.api.trace.StatusCode;
+import io.opentelemetry.api.trace.Tracer;
+import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter;
+import io.opentelemetry.sdk.trace.SdkTracerProvider;
+import io.opentelemetry.sdk.trace.data.EventData;
+import io.opentelemetry.sdk.trace.data.SpanData;
+import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.net.URI;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests for {@link VoiceLiveTracer} using OpenTelemetry SDK testing utilities.
+ */
+class VoiceLiveTracerTest {
+
+ private InMemorySpanExporter spanExporter;
+ private SdkTracerProvider tracerProvider;
+ private Tracer tracer;
+ private VoiceLiveTracer voiceLiveTracer;
+
+ @BeforeEach
+ void setUp() throws Exception {
+ spanExporter = InMemorySpanExporter.create();
+ tracerProvider = SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(spanExporter)).build();
+ tracer = tracerProvider.get("azure-ai-voicelive", "1.0.0-beta.6");
+ URI endpoint = new URI("wss://test.cognitiveservices.azure.com/voice-live/realtime");
+ voiceLiveTracer = new VoiceLiveTracer(tracer, endpoint, "gpt-4o-realtime-preview", null);
+ }
+
+ @AfterEach
+ void tearDown() {
+ tracerProvider.close();
+ }
+
+ @Test
+ void testConnectSpanCreated() {
+ voiceLiveTracer.startConnectSpan();
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ assertEquals(1, spans.size());
+ SpanData span = spans.get(0);
+ assertEquals("connect gpt-4o-realtime-preview", span.getName());
+ assertEquals(SpanKind.CLIENT, span.getKind());
+ assertEquals("az.ai.voicelive", span.getAttributes().get(AttributeKey.stringKey("gen_ai.system")));
+ assertEquals("connect", span.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name")));
+ assertEquals("Microsoft.CognitiveServices", span.getAttributes().get(AttributeKey.stringKey("az.namespace")));
+ assertEquals("gpt-4o-realtime-preview",
+ span.getAttributes().get(AttributeKey.stringKey("gen_ai.request.model")));
+ assertEquals("test.cognitiveservices.azure.com",
+ span.getAttributes().get(AttributeKey.stringKey("server.address")));
+ }
+
+ @Test
+ void testSendSpanCreated() {
+ voiceLiveTracer.startConnectSpan();
+
+ ClientEventResponseCreate event = new ClientEventResponseCreate();
+ voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}");
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ assertEquals(2, spans.size());
+
+ // Send span finishes first
+ SpanData sendSpan = spans.get(0);
+ assertEquals("send response.create", sendSpan.getName());
+ assertEquals(SpanKind.CLIENT, sendSpan.getKind());
+ assertEquals("send", sendSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name")));
+ assertEquals("response.create",
+ sendSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.event_type")));
+
+ // Send span should be a child of the connect span
+ SpanData connectSpan = spans.get(1);
+ assertEquals(connectSpan.getSpanContext().getTraceId(), sendSpan.getSpanContext().getTraceId());
+ assertEquals(connectSpan.getSpanContext().getSpanId(), sendSpan.getParentSpanId());
+ }
+
+ @Test
+ void testRecvSpanCreated() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\","
+ + "\"session\":{\"id\":\"sess_123\",\"model\":\"gpt-4o\"}}";
+ SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json));
+ voiceLiveTracer.traceRecv(update, json);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ assertEquals(2, spans.size());
+
+ SpanData recvSpan = spans.get(0);
+ assertEquals("recv session.created", recvSpan.getName());
+ assertEquals("recv", recvSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name")));
+
+ // Verify parent-child relationship
+ SpanData connectSpan = spans.get(1);
+ assertEquals(connectSpan.getSpanContext().getSpanId(), recvSpan.getParentSpanId());
+ }
+
+ @Test
+ void testCloseSpanCreated() {
+ voiceLiveTracer.startConnectSpan();
+ voiceLiveTracer.traceClose();
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ assertEquals(2, spans.size());
+
+ SpanData closeSpan = spans.get(0);
+ assertEquals("close", closeSpan.getName());
+ assertEquals("close", closeSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.operation.name")));
+ }
+
+ @Test
+ void testSessionCountersOnEndConnectSpan() {
+ voiceLiveTracer.startConnectSpan();
+
+ // Simulate a response.cancel (interruption)
+ ClientEventResponseCancel cancelEvent = new ClientEventResponseCancel();
+ voiceLiveTracer.traceSend(cancelEvent, "{\"type\":\"response.cancel\"}");
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ // Find the connect span (last to finish)
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ assertEquals(1L,
+ connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.interruption_count")).longValue());
+ }
+
+ @Test
+ void testResponseCreateTracksLatency() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ // Send response.create
+ ClientEventResponseCreate createEvent = new ClientEventResponseCreate();
+ voiceLiveTracer.traceSend(createEvent, "{\"type\":\"response.create\"}");
+
+ // Simulate small delay and receive audio delta
+ Thread.sleep(10);
+ String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\","
+ + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQID\"}";
+ SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson));
+ voiceLiveTracer.traceRecv(audioDelta, audioJson);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ Long latency = connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.first_token_latency_ms"));
+ assertNotNull(latency);
+ assertTrue(latency >= 0, "Latency should be >= 0, was: " + latency);
+ }
+
+ @Test
+ void testAudioBytesTracking() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ // Send audio (base64 of 3 bytes: [1,2,3] = "AQID")
+ ClientEventInputAudioBufferAppend appendEvent = new ClientEventInputAudioBufferAppend("AQID");
+ voiceLiveTracer.traceSend(appendEvent, "{\"type\":\"input_audio_buffer.append\",\"audio\":\"AQID\"}");
+
+ // Receive audio delta (base64 "AQIDBA==" = 4 bytes)
+ String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\","
+ + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQIDBA==\"}";
+ SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson));
+ voiceLiveTracer.traceRecv(audioDelta, audioJson);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ assertEquals(3L,
+ connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.audio_bytes_sent")).longValue());
+ assertEquals(4L,
+ connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.audio_bytes_received")).longValue());
+ }
+
+ @Test
+ void testTurnCountTracking() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ String doneJson = "{\"type\":\"response.done\",\"event_id\":\"evt1\","
+ + "\"response\":{\"id\":\"r1\",\"status\":\"completed\",\"output\":[]}}";
+ SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson));
+ voiceLiveTracer.traceRecv(responseDone, doneJson);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ assertEquals(1L, connectSpan.getAttributes().get(AttributeKey.longKey("gen_ai.voice.turn_count")).longValue());
+ }
+
+ @Test
+ void testTokenUsageOnResponseDone() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ String doneJson = "{\"type\":\"response.done\",\"event_id\":\"evt1\","
+ + "\"response\":{\"id\":\"r1\",\"status\":\"completed\",\"output\":[],"
+ + "\"usage\":{\"total_tokens\":150,\"input_tokens\":100,\"output_tokens\":50,"
+ + "\"input_token_details\":{\"cached_tokens\":0,\"text_tokens\":50,\"audio_tokens\":50},"
+ + "\"output_token_details\":{\"text_tokens\":25,\"audio_tokens\":25}}}}";
+ SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson));
+ voiceLiveTracer.traceRecv(responseDone, doneJson);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ // Recv span is first to finish
+ SpanData recvSpan = spans.get(0);
+ assertEquals(100L, recvSpan.getAttributes().get(AttributeKey.longKey("gen_ai.usage.input_tokens")).longValue());
+ assertEquals(50L, recvSpan.getAttributes().get(AttributeKey.longKey("gen_ai.usage.output_tokens")).longValue());
+ }
+
+ @Test
+ void testErrorEventTracking() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ String errorJson = "{\"type\":\"error\",\"event_id\":\"evt1\","
+ + "\"error\":{\"type\":\"server_error\",\"code\":\"500\",\"message\":\"Internal error\"}}";
+ SessionUpdate errorUpdate = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(errorJson));
+ voiceLiveTracer.traceRecv(errorUpdate, errorJson);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData recvSpan = spans.get(0);
+
+ // Verify error status is set on the span
+ assertEquals(StatusCode.ERROR, recvSpan.getStatus().getStatusCode());
+ assertEquals("Internal error", recvSpan.getStatus().getDescription());
+ assertEquals("server_error", recvSpan.getAttributes().get(AttributeKey.stringKey("error.type")));
+
+ // Verify error event was added
+ List events = recvSpan.getEvents();
+ EventData errorEvent = events.stream()
+ .filter(e -> "gen_ai.voice.error".equals(e.getName()))
+ .findFirst()
+ .orElseThrow(() -> new AssertionError("Expected gen_ai.voice.error event"));
+ assertEquals("500", errorEvent.getAttributes().get(AttributeKey.stringKey("error.code")));
+ assertEquals("Internal error", errorEvent.getAttributes().get(AttributeKey.stringKey("error.message")));
+ }
+
+ @Test
+ void testConnectSpanErrorStatus() {
+ voiceLiveTracer.startConnectSpan();
+ voiceLiveTracer.endConnectSpan(new RuntimeException("Connection lost"));
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData connectSpan = spans.get(0);
+ assertEquals(StatusCode.ERROR, connectSpan.getStatus().getStatusCode());
+ assertEquals("Connection lost", connectSpan.getStatus().getDescription());
+ assertEquals("java.lang.RuntimeException",
+ connectSpan.getAttributes().get(AttributeKey.stringKey("error.type")));
+
+ // Verify exception was recorded
+ assertTrue(connectSpan.getEvents().stream().anyMatch(e -> "exception".equals(e.getName())));
+ }
+
+ @Test
+ void testSessionIdFromSessionCreated() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\","
+ + "\"session\":{\"id\":\"sess_abc123\",\"model\":\"gpt-4o\"}}";
+ SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json));
+ voiceLiveTracer.traceRecv(update, json);
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ assertEquals("sess_abc123", connectSpan.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.session_id")));
+ }
+
+ @Test
+ void testConnectSpanWithoutModel() throws Exception {
+ VoiceLiveTracer tracerNoModel
+ = new VoiceLiveTracer(tracer, new URI("wss://test.cognitiveservices.azure.com"), null, null);
+
+ tracerNoModel.startConnectSpan();
+ tracerNoModel.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData span = spans.get(0);
+ assertEquals("connect", span.getName());
+ assertFalse(span.getAttributes().asMap().containsKey(AttributeKey.stringKey("gen_ai.request.model")));
+ }
+
+ @Test
+ void testParentChildSpanHierarchy() throws Exception {
+ voiceLiveTracer.startConnectSpan();
+
+ // Send
+ ClientEventResponseCreate event = new ClientEventResponseCreate();
+ voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}");
+
+ // Recv
+ String json = "{\"type\":\"session.created\",\"event_id\":\"evt1\","
+ + "\"session\":{\"id\":\"sess_123\",\"model\":\"gpt-4o\"}}";
+ SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json));
+ voiceLiveTracer.traceRecv(update, json);
+
+ // Close
+ voiceLiveTracer.traceClose();
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ assertEquals(4, spans.size());
+
+ // All child spans share the same trace ID and reference the connect span as parent
+ SpanData connectSpan = spans.get(spans.size() - 1);
+ String traceId = connectSpan.getSpanContext().getTraceId();
+ String connectSpanId = connectSpan.getSpanContext().getSpanId();
+
+ for (int i = 0; i < spans.size() - 1; i++) {
+ SpanData child = spans.get(i);
+ assertEquals(traceId, child.getSpanContext().getTraceId(), "Child span should be in same trace");
+ assertEquals(connectSpanId, child.getParentSpanId(), "Child span should have connect as parent");
+ }
+ }
+
+ @Test
+ void testBuilderWithOpenTelemetry() {
+ VoiceLiveAsyncClient client = new VoiceLiveClientBuilder().endpoint("https://test.cognitiveservices.azure.com")
+ .credential(new com.azure.core.credential.KeyCredential("fake"))
+ .openTelemetry(io.opentelemetry.api.OpenTelemetry.noop())
+ .buildAsyncClient();
+
+ assertNotNull(client);
+ }
+
+ @Test
+ void testSpanEventsContainAttributes() {
+ voiceLiveTracer.startConnectSpan();
+
+ ClientEventResponseCreate event = new ClientEventResponseCreate();
+ voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}");
+
+ voiceLiveTracer.endConnectSpan(null);
+
+ List spans = spanExporter.getFinishedSpanItems();
+ SpanData sendSpan = spans.get(0);
+ List events = sendSpan.getEvents();
+ assertFalse(events.isEmpty());
+
+ EventData inputEvent = events.stream()
+ .filter(e -> "gen_ai.input.messages".equals(e.getName()))
+ .findFirst()
+ .orElseThrow(() -> new AssertionError("Expected gen_ai.input.messages event"));
+ assertEquals("az.ai.voicelive", inputEvent.getAttributes().get(AttributeKey.stringKey("gen_ai.system")));
+ assertEquals("response.create",
+ inputEvent.getAttributes().get(AttributeKey.stringKey("gen_ai.voice.event_type")));
+ }
+}
From 914e9472988ecbe6c62158a7bb6e7aa05303fabb Mon Sep 17 00:00:00 2001
From: xitzhang
Date: Wed, 25 Mar 2026 15:41:36 -0700
Subject: [PATCH 2/3] Update sdk/voicelive/azure-ai-voicelive/README.md
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
sdk/voicelive/azure-ai-voicelive/README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md
index e305a6ebabef..30455e0cfa77 100644
--- a/sdk/voicelive/azure-ai-voicelive/README.md
+++ b/sdk/voicelive/azure-ai-voicelive/README.md
@@ -447,13 +447,13 @@ connect gpt-4o-realtime-preview ← session lifetime span
```
**Session-level attributes** (on the connect span):
-- `gen_ai.system` — `openai`
+- `gen_ai.system` — `az.ai.voicelive`
- `gen_ai.request.model` — Model name (e.g., `gpt-4o-realtime-preview`)
- `server.address` — Service endpoint
- `gen_ai.voice.session_id` — Voice session ID
- `gen_ai.voice.turn_count` — Completed response turns
- `gen_ai.voice.interruption_count` — User interruptions
-- `gen_ai.voice.audio_bytes_sent` / `audio_bytes_received` — Audio payload bytes
+- `gen_ai.voice.audio_bytes_sent` / `gen_ai.voice.audio_bytes_received` — Audio payload bytes
- `gen_ai.voice.first_token_latency_ms` — Time to first audio response
#### Content recording
From f88e342d2ef7a1e3b918decbae8b083d92a1fe6e Mon Sep 17 00:00:00 2001
From: Xiting Zhang
Date: Wed, 25 Mar 2026 15:50:55 -0700
Subject: [PATCH 3/3] Derive tracer version from azure-ai-voicelive.properties
at runtime
---
.../com/azure/ai/voicelive/VoiceLiveClientBuilder.java | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
index 41ff285b4f1e..9b6098a96190 100644
--- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
+++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java
@@ -21,6 +21,8 @@
import io.opentelemetry.api.OpenTelemetry;
import io.opentelemetry.api.trace.Tracer;
+import java.util.Map;
+
/**
* Builder for creating instances of {@link VoiceLiveAsyncClient}.
*/
@@ -28,6 +30,9 @@
public final class VoiceLiveClientBuilder implements TokenCredentialTrait,
KeyCredentialTrait, EndpointTrait {
private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveClientBuilder.class);
+ private static final Map PROPERTIES = CoreUtils.getProperties("azure-ai-voicelive.properties");
+ private static final String SDK_NAME = "azure-ai-voicelive";
+ private static final String SDK_VERSION = PROPERTIES.getOrDefault("version", "unknown");
private URI endpoint;
private KeyCredential keyCredential;
@@ -166,7 +171,7 @@ public VoiceLiveAsyncClient buildAsyncClient() {
HttpHeaders additionalHeaders = CoreUtils.createHttpHeadersFromClientOptions(clientOptions);
OpenTelemetry otel = openTelemetry != null ? openTelemetry : GlobalOpenTelemetry.getOrNoop();
- Tracer tracer = otel.getTracer("azure-ai-voicelive", "1.0.0-beta.6");
+ Tracer tracer = otel.getTracer(SDK_NAME, SDK_VERSION);
if (keyCredential != null) {
return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders, tracer,