diff --git a/.changeset/config.json b/.changeset/config.json
index af66336b2..29b38eb85 100644
--- a/.changeset/config.json
+++ b/.changeset/config.json
@@ -8,13 +8,7 @@
   ],
   "commit": false,
   "ignore": ["livekit-agents-examples"],
-  "fixed": [
-    [
-      "@livekit/agents",
-      "@livekit/agents-plugin-*",
-      "@livekit/agents-plugins-test"
-    ]
-  ],
+  "fixed": [["@livekit/agents", "@livekit/agents-plugin-*", "@livekit/agents-plugins-test"]],
   "access": "public",
   "baseBranch": "main",
   "updateInternalDependencies": "patch",
diff --git a/.changeset/metal-teeth-buy.md b/.changeset/metal-teeth-buy.md
new file mode 100644
index 000000000..ef1520c3e
--- /dev/null
+++ b/.changeset/metal-teeth-buy.md
@@ -0,0 +1,6 @@
+---
+'@livekit/agents': minor
+---
+
+- Add adaptive interruption handling
+- Add remote session event handler
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f5a577688..b4472c81b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -46,11 +46,11 @@ jobs:
       - name: Test agents
         if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name == 'push'
         run: pnpm test agents
-      - name: Test examples
-        if: (steps.filter.outputs.examples == 'true' || github.event_name == 'push') && secrets.OPENAI_API_KEY != ''
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        run: pnpm test:examples
+      # - name: Test examples
+      #   if: (steps.filter.outputs.examples == 'true' || github.event_name == 'push')
+      #   env:
+      #     OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      #   run: pnpm test:examples
       # TODO (AJS-83) Re-enable once plugins are refactored with abort controllers
       # - name: Test all plugins
       #   if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name != 'pull_request'
diff --git a/agents/package.json b/agents/package.json
index ebf94a7bb..c1bf3e5e7 100644
--- a/agents/package.json
+++ b/agents/package.json
@@ -46,9 +46,10 @@
     "zod": "^3.25.76"
   },
   "dependencies": {
+    "@bufbuild/protobuf": "^1.10.0",
     "@ffmpeg-installer/ffmpeg": "^1.1.0",
     "@livekit/mutex": "^1.1.1",
-    "@livekit/protocol": "^1.43.0",
+    "@livekit/protocol": "^1.45.1",
     "@livekit/typed-emitter": "^3.0.0",
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/api-logs": "^0.54.0",
@@ -69,6 +70,7 @@
     "heap-js": "^2.6.0",
     "json-schema": "^0.4.0",
     "livekit-server-sdk": "^2.14.1",
+    "ofetch": "^1.5.1",
     "openai": "^6.8.1",
     "pidusage": "^4.0.1",
     "pino": "^8.19.0",
diff --git a/agents/src/cli.ts b/agents/src/cli.ts
index 1e53c16c0..058456e82 100644
--- a/agents/src/cli.ts
+++ b/agents/src/cli.ts
@@ -148,6 +148,7 @@ export const runApp = (opts: ServerOptions) => {
       opts.apiSecret = globalOptions.apiSecret || opts.apiSecret;
       opts.logLevel = commandOptions.logLevel;
       opts.workerToken = globalOptions.workerToken || opts.workerToken;
+      process.env.LIVEKIT_DEV_MODE = '1';
       runServer({
         opts,
         production: false,
@@ -169,6 +170,7 @@ export const runApp = (opts: ServerOptions) => {
       opts.apiSecret = globalOptions.apiSecret || opts.apiSecret;
       opts.logLevel = commandOptions.logLevel;
       opts.workerToken = globalOptions.workerToken || opts.workerToken;
+      process.env.LIVEKIT_DEV_MODE = '1';
       runServer({
         opts,
         production: false,
diff --git a/agents/src/constants.ts b/agents/src/constants.ts
index 86ead5b4c..3da61af46 100644
--- a/agents/src/constants.ts
+++ b/agents/src/constants.ts
@@ -7,3 +7,17 @@ export const TOPIC_TRANSCRIPTION = 'lk.transcription';
 export const ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID = 'lk.segment_id';
 export const ATTRIBUTE_PUBLISH_ON_BEHALF = 'lk.publish_on_behalf';
 export const TOPIC_CHAT = 'lk.chat';
+
+export const ATTRIBUTE_AGENT_STATE = 'lk.agent.state';
+export const ATTRIBUTE_AGENT_NAME = 'lk.agent.name';
+
+// TODO(eval): export const ATTRIBUTE_SIMULATOR = 'lk.simulator';
+
+export const TOPIC_CLIENT_EVENTS = 'lk.agent.events';
+export const RPC_GET_SESSION_STATE = 'lk.agent.get_session_state';
+export const RPC_GET_CHAT_HISTORY = 'lk.agent.get_chat_history';
+export const RPC_GET_AGENT_INFO = 'lk.agent.get_agent_info';
+export const RPC_SEND_MESSAGE = 'lk.agent.send_message';
+export const TOPIC_AGENT_REQUEST = 'lk.agent.request';
+export const TOPIC_AGENT_RESPONSE = 'lk.agent.response';
+export const TOPIC_SESSION_MESSAGES = 'lk.agent.session';
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
new file mode 100644
index 000000000..01d7b9290
--- /dev/null
+++ b/agents/src/inference/interruption/defaults.ts
@@ -0,0 +1,51 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { ApiConnectOptions } from './interruption_stream.js';
+import type { InterruptionOptions } from './types.js';
+
+export const MIN_INTERRUPTION_DURATION_IN_S = 0.025 * 2; // 25ms per frame, 2 consecutive frames
+export const THRESHOLD = 0.5;
+export const MAX_AUDIO_DURATION_IN_S = 3.0;
+export const AUDIO_PREFIX_DURATION_IN_S = 0.5;
+export const DETECTION_INTERVAL_IN_S = 0.1;
+export const REMOTE_INFERENCE_TIMEOUT_IN_S = 0.7;
+export const SAMPLE_RATE = 16000;
+export const FRAMES_PER_SECOND = 40;
+export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
+
+export const apiConnectDefaults: ApiConnectOptions = {
+  maxRetries: 3,
+  retryInterval: 2_000,
+  timeout: 10_000,
+} as const;
+
+/**
+ * Calculate the retry interval using exponential backoff with jitter.
+ * Matches the Python implementation's _interval_for_retry behavior.
+ */
+export function intervalForRetry(
+  attempt: number,
+  baseInterval: number = apiConnectDefaults.retryInterval,
+): number {
+  // Exponential backoff: baseInterval * 2^attempt with some jitter
+  const exponentialDelay = baseInterval * Math.pow(2, attempt);
+  // Add jitter (0-25% of the delay)
+  const jitter = exponentialDelay * Math.random() * 0.25;
+  return exponentialDelay + jitter;
+}
+
+// baseUrl and useProxy are resolved dynamically in the constructor
+// to respect LIVEKIT_REMOTE_EOT_URL environment variable
+export const interruptionOptionDefaults: Omit<InterruptionOptions, 'baseUrl' | 'useProxy'> = {
+  sampleRate: SAMPLE_RATE,
+  threshold: THRESHOLD,
+  minFrames: Math.ceil(MIN_INTERRUPTION_DURATION_IN_S * FRAMES_PER_SECOND),
+  maxAudioDurationInS: MAX_AUDIO_DURATION_IN_S,
+  audioPrefixDurationInS: AUDIO_PREFIX_DURATION_IN_S,
+  detectionIntervalInS: DETECTION_INTERVAL_IN_S,
+  inferenceTimeout: REMOTE_INFERENCE_TIMEOUT_IN_S * 1_000,
+  apiKey: process.env.LIVEKIT_API_KEY || '',
+  apiSecret: process.env.LIVEKIT_API_SECRET || '',
+  minInterruptionDurationInS: MIN_INTERRUPTION_DURATION_IN_S,
+} as const;
diff --git a/agents/src/inference/interruption/errors.ts b/agents/src/inference/interruption/errors.ts
new file mode 100644
index 000000000..5b5f6d370
--- /dev/null
+++ b/agents/src/inference/interruption/errors.ts
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Error thrown during interruption detection.
+ */
+export class InterruptionDetectionError extends Error {
+  readonly type = 'interruption_detection_error' as const;
+
+  readonly timestamp: number;
+  readonly label: string;
+  readonly recoverable: boolean;
+
+  constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
+    super(message);
+    this.name = 'InterruptionDetectionError';
+    this.timestamp = timestamp;
+    this.label = label;
+    this.recoverable = recoverable;
+  }
+
+  toString(): string {
+    return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
+  }
+}
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
new file mode 100644
index 000000000..6d4cafbf5
--- /dev/null
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -0,0 +1,206 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { FetchError, ofetch } from 'ofetch';
+import { TransformStream } from 'stream/web';
+import { z } from 'zod';
+import { APIConnectionError, APIError, APIStatusError, isAPIError } from '../../_exceptions.js';
+import { log } from '../../log.js';
+import { createAccessToken } from '../utils.js';
+import { InterruptionCacheEntry } from './interruption_cache_entry.js';
+import type { OverlappingSpeechEvent } from './types.js';
+import type { BoundedCache } from './utils.js';
+
+export interface PostOptions {
+  baseUrl: string;
+  token: string;
+  signal?: AbortSignal;
+  timeout?: number;
+  maxRetries?: number;
+}
+
+export interface PredictOptions {
+  threshold: number;
+  minFrames: number;
+}
+
+export const predictEndpointResponseSchema = z.object({
+  created_at: z.number(),
+  is_bargein: z.boolean(),
+  probabilities: z.array(z.number()),
+});
+
+export type PredictEndpointResponse = z.infer<typeof predictEndpointResponseSchema>;
+
+export interface PredictResponse {
+  createdAt: number;
+  isBargein: boolean;
+  probabilities: number[];
+  predictionDurationInS: number;
+}
+
+export async function predictHTTP(
+  data: Int16Array,
+  predictOptions: PredictOptions,
+  options: PostOptions,
+): Promise<PredictResponse> {
+  const createdAt = performance.now();
+  const url = new URL(`/bargein`, options.baseUrl);
+  url.searchParams.append('threshold', predictOptions.threshold.toString());
+  url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
+  url.searchParams.append('created_at', createdAt.toFixed());
+
+  try {
+    const response = await ofetch(url.toString(), {
+      retry: 0,
+      headers: {
+        'Content-Type': 'application/octet-stream',
+        Authorization: `Bearer ${options.token}`,
+      },
+      signal: options.signal,
+      timeout: options.timeout,
+      method: 'POST',
+      body: data,
+    });
+    const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);
+
+    return {
+      createdAt: created_at,
+      isBargein: is_bargein,
+      probabilities,
+      predictionDurationInS: (performance.now() - createdAt) / 1000,
+    };
+  } catch (err) {
+    if (isAPIError(err)) throw err;
+    if (err instanceof FetchError) {
+      if (err.statusCode) {
+        throw new APIStatusError({
+          message: `error during interruption prediction: ${err.message}`,
+          options: { statusCode: err.statusCode, body: err.data },
+        });
+      }
+      if (
+        err.cause instanceof Error &&
+        (err.cause.name === 'TimeoutError' || err.cause.name === 'AbortError')
+      ) {
+        throw new APIStatusError({
+          message: `interruption inference timeout: ${err.message}`,
+          options: { statusCode: 408, retryable: false },
+        });
+      }
+      throw new APIConnectionError({
+        message: `interruption inference connection error: ${err.message}`,
+      });
+    }
+    throw new APIError(`error during interruption prediction: ${err}`);
+  }
+}
+
+export interface HttpTransportOptions {
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  threshold: number;
+  minFrames: number;
+  timeout: number;
+  maxRetries?: number;
+}
+
+export interface HttpTransportState {
+  overlapSpeechStarted: boolean;
+  overlapSpeechStartedAt: number | undefined;
+  cache: BoundedCache<number, InterruptionCacheEntry>;
+}
+
+/**
+ * Creates an HTTP transport TransformStream for interruption detection.
+ *
+ * This transport receives Int16Array audio slices and outputs InterruptionEvents.
+ * Each audio slice triggers an HTTP POST request.
+ *
+ * @param options - Transport options object. This is read on each request, so mutations
+ *                  to threshold/minFrames will be picked up dynamically.
+ */
+export function createHttpTransport(
+  options: HttpTransportOptions,
+  getState: () => HttpTransportState,
+  setState: (partial: Partial<HttpTransportState>) => void,
+  updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
+  getAndResetNumRequests?: () => number,
+): TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent> {
+  const logger = log();
+
+  return new TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>(
+    {
+      async transform(chunk, controller) {
+        if (!(chunk instanceof Int16Array)) {
+          controller.enqueue(chunk);
+          return;
+        }
+
+        const state = getState();
+        const overlapSpeechStartedAt = state.overlapSpeechStartedAt;
+        if (overlapSpeechStartedAt === undefined || !state.overlapSpeechStarted) return;
+
+        try {
+          const resp = await predictHTTP(
+            chunk,
+            { threshold: options.threshold, minFrames: options.minFrames },
+            {
+              baseUrl: options.baseUrl,
+              timeout: options.timeout,
+              maxRetries: options.maxRetries,
+              token: await createAccessToken(options.apiKey, options.apiSecret),
+            },
+          );
+
+          const { createdAt, isBargein, probabilities, predictionDurationInS } = resp;
+          const entry = state.cache.setOrUpdate(
+            createdAt,
+            () => new InterruptionCacheEntry({ createdAt }),
+            {
+              probabilities,
+              isInterruption: isBargein,
+              speechInput: chunk,
+              totalDurationInS: (performance.now() - createdAt) / 1000,
+              detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,
+              predictionDurationInS,
+            },
+          );
+
+          if (state.overlapSpeechStarted && entry.isInterruption) {
+            if (updateUserSpeakingSpan) {
+              updateUserSpeakingSpan(entry);
+            }
+            const event: OverlappingSpeechEvent = {
+              type: 'overlapping_speech',
+              detectedAt: Date.now(),
+              overlapStartedAt: overlapSpeechStartedAt,
+              isInterruption: entry.isInterruption,
+              speechInput: entry.speechInput,
+              probabilities: entry.probabilities,
+              totalDurationInS: entry.totalDurationInS,
+              predictionDurationInS: entry.predictionDurationInS,
+              detectionDelayInS: entry.detectionDelayInS,
+              probability: entry.probability,
+              numRequests: getAndResetNumRequests?.() ?? 0,
+            };
+            logger.debug(
+              {
+                detectionDelayInS: entry.detectionDelayInS,
+                totalDurationInS: entry.totalDurationInS,
+              },
+              'interruption detected',
+            );
+            setState({ overlapSpeechStarted: false });
+            controller.enqueue(event);
+          }
+        } catch (err) {
+          controller.error(err);
+        }
+      },
+    },
+    { highWaterMark: 2 },
+    { highWaterMark: 2 },
+  );
+}
diff --git a/agents/src/inference/interruption/interruption_cache_entry.ts b/agents/src/inference/interruption/interruption_cache_entry.ts
new file mode 100644
index 000000000..f318a04f5
--- /dev/null
+++ b/agents/src/inference/interruption/interruption_cache_entry.ts
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { estimateProbability } from './utils.js';
+
+/**
+ * Typed cache entry for interruption inference results.
+ * Mutable to support setOrUpdate pattern from Python's _BoundedCache.
+ */
+export class InterruptionCacheEntry {
+  createdAt: number;
+  requestStartedAt?: number;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
+  speechInput?: Int16Array;
+  probabilities?: number[];
+  isInterruption?: boolean;
+
+  constructor(params: {
+    createdAt: number;
+    requestStartedAt?: number;
+    speechInput?: Int16Array;
+    totalDurationInS?: number;
+    predictionDurationInS?: number;
+    detectionDelayInS?: number;
+    probabilities?: number[];
+    isInterruption?: boolean;
+  }) {
+    this.createdAt = params.createdAt;
+    this.requestStartedAt = params.requestStartedAt;
+    this.totalDurationInS = params.totalDurationInS ?? 0;
+    this.predictionDurationInS = params.predictionDurationInS ?? 0;
+    this.detectionDelayInS = params.detectionDelayInS ?? 0;
+    this.speechInput = params.speechInput;
+    this.probabilities = params.probabilities;
+    this.isInterruption = params.isInterruption;
+  }
+
+  /**
+   * The conservative estimated probability of the interruption event.
+   */
+  get probability(): number {
+    return this.probabilities ? estimateProbability(this.probabilities) : 0;
+  }
+
+  static default(): InterruptionCacheEntry {
+    return new InterruptionCacheEntry({ createdAt: 0 });
+  }
+}
diff --git a/agents/src/inference/interruption/interruption_detector.ts b/agents/src/inference/interruption/interruption_detector.ts
new file mode 100644
index 000000000..d115918f6
--- /dev/null
+++ b/agents/src/inference/interruption/interruption_detector.ts
@@ -0,0 +1,204 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { TypedEventEmitter } from '@livekit/typed-emitter';
+import EventEmitter from 'events';
+import { log } from '../../log.js';
+import type { InterruptionMetrics } from '../../metrics/base.js';
+import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
+import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
+import { InterruptionDetectionError } from './errors.js';
+import { InterruptionStreamBase } from './interruption_stream.js';
+import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
+
+type InterruptionCallbacks = {
+  overlapping_speech: (event: OverlappingSpeechEvent) => void;
+  metrics_collected: (metrics: InterruptionMetrics) => void;
+  error: (error: InterruptionDetectionError) => void;
+};
+
+export type AdaptiveInterruptionDetectorOptions = Omit<Partial<InterruptionOptions>, 'useProxy'>;
+
+export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
+  options: InterruptionOptions;
+  private readonly _label: string;
+  private logger = log();
+  // Use Set instead of WeakSet to allow iteration for propagating option updates
+  private streams: Set<InterruptionStreamBase> = new Set();
+
+  constructor(options: AdaptiveInterruptionDetectorOptions = {}) {
+    super();
+
+    const {
+      maxAudioDurationInS,
+      baseUrl,
+      apiKey,
+      apiSecret,
+      audioPrefixDurationInS,
+      threshold,
+      detectionIntervalInS,
+      inferenceTimeout,
+      minInterruptionDurationInS,
+    } = { ...interruptionOptionDefaults, ...options };
+
+    if (maxAudioDurationInS > 3.0) {
+      throw new RangeError('maxAudioDurationInS must be less than or equal to 3.0 seconds');
+    }
+
+    const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? getDefaultInferenceUrl();
+    let lkApiKey = apiKey ?? '';
+    let lkApiSecret = apiSecret ?? '';
+    let useProxy: boolean;
+
+    // Use LiveKit credentials if using the inference service (production or staging)
+    const isInferenceUrl =
+      lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;
+    if (isInferenceUrl) {
+      lkApiKey =
+        apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';
+      if (!lkApiKey) {
+        throw new TypeError(
+          'apiKey is required, either as argument or set LIVEKIT_API_KEY environmental variable',
+        );
+      }
+
+      lkApiSecret =
+        apiSecret ??
+        process.env.LIVEKIT_INFERENCE_API_SECRET ??
+        process.env.LIVEKIT_API_SECRET ??
+        '';
+      if (!lkApiSecret) {
+        throw new TypeError(
+          'apiSecret is required, either as argument or set LIVEKIT_API_SECRET environmental variable',
+        );
+      }
+      useProxy = true;
+    } else {
+      useProxy = false;
+    }
+    const transport = useProxy ? 'websocket' : 'http';
+    this.logger.debug(
+      {
+        baseUrl: lkBaseUrl,
+        useProxy,
+        transport,
+      },
+      '=== Resolved interruption detector transport configuration',
+    );
+
+    this.options = {
+      sampleRate: SAMPLE_RATE,
+      threshold,
+      minFrames: Math.ceil(minInterruptionDurationInS * FRAMES_PER_SECOND),
+      maxAudioDurationInS,
+      audioPrefixDurationInS,
+      detectionIntervalInS,
+      inferenceTimeout,
+      baseUrl: lkBaseUrl,
+      apiKey: lkApiKey,
+      apiSecret: lkApiSecret,
+      useProxy,
+      minInterruptionDurationInS,
+    };
+
+    this._label = `${this.constructor.name}`;
+
+    this.logger.debug(
+      {
+        baseUrl: this.options.baseUrl,
+        detectionIntervalInS: this.options.detectionIntervalInS,
+        audioPrefixDurationInS: this.options.audioPrefixDurationInS,
+        maxAudioDurationInS: this.options.maxAudioDurationInS,
+        minFrames: this.options.minFrames,
+        threshold: this.options.threshold,
+        inferenceTimeout: this.options.inferenceTimeout,
+        useProxy: this.options.useProxy,
+        transport,
+      },
+      '=== Adaptive interruption detector initialized',
+    );
+  }
+
+  /**
+   * The model identifier for this detector.
+   */
+  get model(): string {
+    return 'adaptive interruption';
+  }
+
+  /**
+   * The provider identifier for this detector.
+   */
+  get provider(): string {
+    return 'livekit';
+  }
+
+  /**
+   * The label for this detector instance.
+   */
+  get label(): string {
+    return this._label;
+  }
+
+  /**
+   * The sample rate used for audio processing.
+   */
+  get sampleRate(): number {
+    return this.options.sampleRate;
+  }
+
+  /**
+   * Emit an error event from the detector.
+   */
+  emitError(error: InterruptionDetectionError): void {
+    this.emit('error', error);
+  }
+
+  /**
+   * Creates a new InterruptionStreamBase for internal use.
+   * The stream can receive audio frames and sentinels via pushFrame().
+   * Use this when you need direct access to the stream for pushing frames.
+   */
+  createStream(): InterruptionStreamBase {
+    try {
+      const streamBase = new InterruptionStreamBase(this, {});
+      this.streams.add(streamBase);
+      return streamBase;
+    } catch (e) {
+      const cause = e instanceof Error ? e : new Error(String(e));
+      this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));
+      throw e;
+    }
+  }
+
+  /**
+   * Remove a stream from tracking (called when stream is closed).
+   */
+  removeStream(stream: InterruptionStreamBase): void {
+    this.streams.delete(stream);
+  }
+
+  /**
+   * Update options for the detector and propagate to all active streams.
+   * For WebSocket streams, this triggers a reconnection with new settings.
+   */
+  async updateOptions(options: {
+    threshold?: number;
+    minInterruptionDurationInS?: number;
+  }): Promise<void> {
+    if (options.threshold !== undefined) {
+      this.options.threshold = options.threshold;
+    }
+    if (options.minInterruptionDurationInS !== undefined) {
+      this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
+      this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
+    }
+
+    // Propagate option updates to all active streams (matching Python behavior)
+    const updatePromises: Promise<void>[] = [];
+    for (const stream of this.streams) {
+      updatePromises.push(stream.updateOptions(options));
+    }
+    await Promise.all(updatePromises);
+  }
+}
diff --git a/agents/src/inference/interruption/interruption_stream.ts b/agents/src/inference/interruption/interruption_stream.ts
new file mode 100644
index 000000000..df6162aae
--- /dev/null
+++ b/agents/src/inference/interruption/interruption_stream.ts
@@ -0,0 +1,467 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
+import type { Span } from '@opentelemetry/api';
+import { type ReadableStream, TransformStream } from 'stream/web';
+import { log } from '../../log.js';
+import type { InterruptionMetrics } from '../../metrics/base.js';
+import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
+import { traceTypes } from '../../telemetry/index.js';
+import { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';
+import type { InterruptionDetectionError } from './errors.js';
+import { createHttpTransport } from './http_transport.js';
+import { InterruptionCacheEntry } from './interruption_cache_entry.js';
+import type { AdaptiveInterruptionDetector } from './interruption_detector.js';
+import {
+  type AgentSpeechEnded,
+  type AgentSpeechStarted,
+  type ApiConnectOptions,
+  type Flush,
+  type InterruptionOptions,
+  type InterruptionSentinel,
+  type OverlapSpeechEnded,
+  type OverlapSpeechStarted,
+  type OverlappingSpeechEvent,
+} from './types.js';
+import { BoundedCache } from './utils.js';
+import { createWsTransport } from './ws_transport.js';
+
+// Re-export sentinel types for backwards compatibility
+export type {
+  AgentSpeechEnded,
+  AgentSpeechStarted,
+  ApiConnectOptions,
+  Flush,
+  InterruptionSentinel,
+  OverlapSpeechEnded,
+  OverlapSpeechStarted,
+};
+
+export class InterruptionStreamSentinel {
+  static agentSpeechStarted(): AgentSpeechStarted {
+    return { type: 'agent-speech-started' };
+  }
+
+  static agentSpeechEnded(): AgentSpeechEnded {
+    return { type: 'agent-speech-ended' };
+  }
+
+  static overlapSpeechStarted(
+    speechDuration: number,
+    startedAt: number,
+    userSpeakingSpan?: Span,
+  ): OverlapSpeechStarted {
+    return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };
+  }
+
+  static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {
+    return { type: 'overlap-speech-ended', endedAt };
+  }
+
+  static flush(): Flush {
+    return { type: 'flush' };
+  }
+}
+
+function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
+  span.setAttribute(
+    traceTypes.ATTR_IS_INTERRUPTION,
+    (entry.isInterruption ?? false).toString().toLowerCase(),
+  );
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);
+}
+
+export class InterruptionStreamBase {
+  private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
+
+  private eventStream: ReadableStream<OverlappingSpeechEvent>;
+
+  private resampler?: AudioResampler;
+
+  private numRequests = 0;
+
+  private userSpeakingSpan: Span | undefined;
+
+  private overlapSpeechStartedAt: number | undefined;
+
+  private options: InterruptionOptions;
+
+  private apiOptions: ApiConnectOptions;
+
+  private model: AdaptiveInterruptionDetector;
+
+  private logger = log();
+
+  // Store reconnect function for WebSocket transport
+  private wsReconnect?: () => Promise<void>;
+
+  // Mutable transport options that can be updated via updateOptions()
+  private transportOptions: {
+    baseUrl: string;
+    apiKey: string;
+    apiSecret: string;
+    sampleRate: number;
+    threshold: number;
+    minFrames: number;
+    timeout: number;
+    maxRetries: number;
+  };
+
+  constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
+    this.inputStream = createStreamChannel<
+      InterruptionSentinel | AudioFrame,
+      InterruptionDetectionError
+    >();
+
+    this.model = model;
+    this.options = { ...model.options };
+    this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
+
+    // Initialize mutable transport options
+    this.transportOptions = {
+      baseUrl: this.options.baseUrl,
+      apiKey: this.options.apiKey,
+      apiSecret: this.options.apiSecret,
+      sampleRate: this.options.sampleRate,
+      threshold: this.options.threshold,
+      minFrames: this.options.minFrames,
+      timeout: this.options.inferenceTimeout,
+      maxRetries: this.apiOptions.maxRetries,
+    };
+
+    this.eventStream = this.setupTransform();
+  }
+
+  /**
+   * Update stream options. For WebSocket transport, this triggers a reconnection.
+   */
+  async updateOptions(options: {
+    threshold?: number;
+    minInterruptionDurationInS?: number;
+  }): Promise<void> {
+    if (options.threshold !== undefined) {
+      this.options.threshold = options.threshold;
+      this.transportOptions.threshold = options.threshold;
+    }
+    if (options.minInterruptionDurationInS !== undefined) {
+      this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
+      this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
+      this.transportOptions.minFrames = this.options.minFrames;
+    }
+    // Trigger WebSocket reconnection if using proxy (WebSocket transport)
+    if (this.options.useProxy && this.wsReconnect) {
+      await this.wsReconnect();
+    }
+  }
+
+  private setupTransform(): ReadableStream<OverlappingSpeechEvent> {
+    let agentSpeechStarted = false;
+    let startIdx = 0;
+    let accumulatedSamples = 0;
+    let overlapSpeechStarted = false;
+    let overlapCount = 0;
+    const cache = new BoundedCache<number, InterruptionCacheEntry>(10);
+    const inferenceS16Data = new Int16Array(
+      Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),
+    ).fill(0);
+
+    // State accessors for transport
+    const getState = () => ({
+      overlapSpeechStarted,
+      overlapSpeechStartedAt: this.overlapSpeechStartedAt,
+      cache,
+      overlapCount,
+    });
+    const setState = (partial: { overlapSpeechStarted?: boolean }) => {
+      if (partial.overlapSpeechStarted !== undefined) {
+        overlapSpeechStarted = partial.overlapSpeechStarted;
+      }
+    };
+    const handleSpanUpdate = (entry: InterruptionCacheEntry) => {
+      if (this.userSpeakingSpan) {
+        updateUserSpeakingSpan(this.userSpeakingSpan, entry);
+        this.userSpeakingSpan = undefined;
+      }
+    };
+
+    const onRequestSent = () => {
+      this.numRequests++;
+    };
+
+    const getAndResetNumRequests = (): number => {
+      const n = this.numRequests;
+      this.numRequests = 0;
+      return n;
+    };
+
+    // First transform: process input frames/sentinels and output audio slices or events
+    const audioTransformer = new TransformStream<
+      InterruptionSentinel | AudioFrame,
+      Int16Array | OverlappingSpeechEvent
+    >(
+      {
+        transform: (chunk, controller) => {
+          if (chunk instanceof AudioFrame) {
+            if (!agentSpeechStarted) {
+              return;
+            }
+            if (this.options.sampleRate !== chunk.sampleRate) {
+              controller.error('the sample rate of the input frames must be consistent');
+              this.logger.error('the sample rate of the input frames must be consistent');
+              return;
+            }
+            const result = writeToInferenceS16Data(
+              chunk,
+              startIdx,
+              inferenceS16Data,
+              this.options.maxAudioDurationInS,
+            );
+            startIdx = result.startIdx;
+            accumulatedSamples += result.samplesWritten;
+
+            if (
+              accumulatedSamples >=
+                Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&
+              overlapSpeechStarted
+            ) {
+              const audioSlice = inferenceS16Data.slice(0, startIdx);
+              accumulatedSamples = 0;
+              controller.enqueue(audioSlice);
+            }
+          } else if (chunk.type === 'agent-speech-started') {
+            this.logger.debug('agent speech started');
+            agentSpeechStarted = true;
+            overlapSpeechStarted = false;
+            this.overlapSpeechStartedAt = undefined;
+            accumulatedSamples = 0;
+            overlapCount = 0;
+            startIdx = 0;
+            this.numRequests = 0;
+            cache.clear();
+          } else if (chunk.type === 'agent-speech-ended') {
+            this.logger.debug('agent speech ended');
+            agentSpeechStarted = false;
+            overlapSpeechStarted = false;
+            this.overlapSpeechStartedAt = undefined;
+            accumulatedSamples = 0;
+            overlapCount = 0;
+            startIdx = 0;
+            this.numRequests = 0;
+            cache.clear();
+          } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
+            this.overlapSpeechStartedAt = chunk.startedAt;
+            this.userSpeakingSpan = chunk.userSpeakingSpan;
+            this.logger.debug('overlap speech started, starting interruption inference');
+            overlapSpeechStarted = true;
+            accumulatedSamples = 0;
+            overlapCount += 1;
+            if (overlapCount <= 1) {
+              const keepSize =
+                Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +
+                Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);
+              const shiftCount = Math.max(0, startIdx - keepSize);
+              inferenceS16Data.copyWithin(0, shiftCount, startIdx);
+              startIdx -= shiftCount;
+            }
+            cache.clear();
+          } else if (chunk.type === 'overlap-speech-ended') {
+            this.logger.debug('overlap speech ended');
+            if (overlapSpeechStarted) {
+              this.userSpeakingSpan = undefined;
+              let latestEntry = cache.pop(
+                (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,
+              );
+              if (!latestEntry) {
+                this.logger.debug('no request made for overlap speech');
+                latestEntry = InterruptionCacheEntry.default();
+              }
+              const e = latestEntry ?? InterruptionCacheEntry.default();
+              const event: OverlappingSpeechEvent = {
+                type: 'overlapping_speech',
+                detectedAt: chunk.endedAt,
+                isInterruption: false,
+                overlapStartedAt: this.overlapSpeechStartedAt,
+                speechInput: e.speechInput,
+                probabilities: e.probabilities,
+                totalDurationInS: e.totalDurationInS,
+                detectionDelayInS: e.detectionDelayInS,
+                predictionDurationInS: e.predictionDurationInS,
+                probability: e.probability,
+                numRequests: getAndResetNumRequests(),
+              };
+              controller.enqueue(event);
+              overlapSpeechStarted = false;
+              accumulatedSamples = 0;
+            }
+            this.overlapSpeechStartedAt = undefined;
+          } else if (chunk.type === 'flush') {
+            // no-op
+          }
+        },
+      },
+      { highWaterMark: 32 },
+      { highWaterMark: 32 },
+    );
+
+    // Second transform: transport layer (HTTP or WebSocket based on useProxy)
+    const transportOptions = this.transportOptions;
+
+    let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;
+    if (this.options.useProxy) {
+      const wsResult = createWsTransport(
+        transportOptions,
+        getState,
+        setState,
+        handleSpanUpdate,
+        onRequestSent,
+        getAndResetNumRequests,
+      );
+      transport = wsResult.transport;
+      this.wsReconnect = wsResult.reconnect;
+    } else {
+      transport = createHttpTransport(
+        transportOptions,
+        getState,
+        setState,
+        handleSpanUpdate,
+        getAndResetNumRequests,
+      );
+    }
+
+    const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({
+      transform: (chunk, controller) => {
+        this.model.emit('overlapping_speech', chunk);
+
+        const metrics: InterruptionMetrics = {
+          type: 'interruption_metrics',
+          timestamp: chunk.detectedAt,
+          totalDuration: chunk.totalDurationInS * 1000,
+          predictionDuration: chunk.predictionDurationInS * 1000,
+          detectionDelay: chunk.detectionDelayInS * 1000,
+          numInterruptions: chunk.isInterruption ? 1 : 0,
+          numBackchannels: chunk.isInterruption ? 0 : 1,
+          numRequests: chunk.numRequests,
+          metadata: {
+            modelProvider: this.model.provider,
+            modelName: this.model.model,
+          },
+        };
+        this.model.emit('metrics_collected', metrics);
+
+        controller.enqueue(chunk);
+      },
+    });
+
+    // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream
+    return this.inputStream
+      .stream()
+      .pipeThrough(audioTransformer)
+      .pipeThrough(transport)
+      .pipeThrough(eventEmitter);
+  }
+
+  private ensureInputNotEnded() {
+    if (this.inputStream.closed) {
+      throw new Error('input stream is closed');
+    }
+  }
+
+  private ensureStreamsNotEnded() {
+    this.ensureInputNotEnded();
+  }
+
+  private getResamplerFor(inputSampleRate: number): AudioResampler {
+    if (!this.resampler) {
+      this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
+    }
+    return this.resampler;
+  }
+
+  stream(): ReadableStream<OverlappingSpeechEvent> {
+    return this.eventStream;
+  }
+
+  async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
+    this.ensureStreamsNotEnded();
+    if (!(frame instanceof AudioFrame)) {
+      return this.inputStream.write(frame);
+    } else if (this.options.sampleRate !== frame.sampleRate) {
+      const resampler = this.getResamplerFor(frame.sampleRate);
+      if (resampler.inputRate !== frame.sampleRate) {
+        throw new Error('the sample rate of the input frames must be consistent');
+      }
+      for (const resampledFrame of resampler.push(frame)) {
+        await this.inputStream.write(resampledFrame);
+      }
+    } else {
+      await this.inputStream.write(frame);
+    }
+  }
+
+  async flush(): Promise<void> {
+    this.ensureStreamsNotEnded();
+    await this.inputStream.write(InterruptionStreamSentinel.flush());
+  }
+
+  async endInput(): Promise<void> {
+    await this.flush();
+    await this.inputStream.close();
+  }
+
+  async close(): Promise<void> {
+    if (!this.inputStream.closed) await this.inputStream.close();
+    this.model.removeStream(this);
+  }
+}
+
+/**
+ * Write the audio frame to the output data array and return the new start index
+ * and the number of samples written.
+ */
+function writeToInferenceS16Data(
+  frame: AudioFrame,
+  startIdx: number,
+  outData: Int16Array,
+  maxAudioDuration: number,
+): { startIdx: number; samplesWritten: number } {
+  const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
+
+  if (frame.samplesPerChannel > outData.length) {
+    throw new Error('frame samples are greater than the max window size');
+  }
+
+  // Shift the data to the left if the window would overflow
+  const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
+  if (shift > 0) {
+    outData.copyWithin(0, shift, startIdx);
+    startIdx -= shift;
+  }
+
+  // Get the frame data as Int16Array
+  const frameData = new Int16Array(
+    frame.data.buffer,
+    frame.data.byteOffset,
+    frame.samplesPerChannel * frame.channels,
+  );
+
+  if (frame.channels > 1) {
+    // Mix down multiple channels to mono by averaging
+    for (let i = 0; i < frame.samplesPerChannel; i++) {
+      let sum = 0;
+      for (let ch = 0; ch < frame.channels; ch++) {
+        sum += frameData[i * frame.channels + ch] ?? 0;
+      }
+      outData[startIdx + i] = Math.floor(sum / frame.channels);
+    }
+  } else {
+    // Single channel - copy directly
+    outData.set(frameData, startIdx);
+  }
+
+  startIdx += frame.samplesPerChannel;
+  return { startIdx, samplesWritten: frame.samplesPerChannel };
+}
diff --git a/agents/src/inference/interruption/types.ts b/agents/src/inference/interruption/types.ts
new file mode 100644
index 000000000..d3aae7b95
--- /dev/null
+++ b/agents/src/inference/interruption/types.ts
@@ -0,0 +1,84 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { Span } from '@opentelemetry/api';
+
+export interface OverlappingSpeechEvent {
+  type: 'overlapping_speech';
+  detectedAt: number;
+  isInterruption: boolean;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
+  overlapStartedAt?: number;
+  speechInput?: Int16Array;
+  probabilities?: number[];
+  probability: number;
+  numRequests: number;
+}
+
+/**
+ * Configuration options for interruption detection.
+ */
+export interface InterruptionOptions {
+  sampleRate: number;
+  threshold: number;
+  minFrames: number;
+  maxAudioDurationInS: number;
+  audioPrefixDurationInS: number;
+  detectionIntervalInS: number;
+  inferenceTimeout: number;
+  minInterruptionDurationInS: number;
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  useProxy: boolean;
+}
+
+/**
+ * API connection options for transport layers.
+ */
+export interface ApiConnectOptions {
+  maxRetries: number;
+  retryInterval: number;
+  timeout: number;
+}
+
+// Sentinel types for stream control signals
+
+export interface AgentSpeechStarted {
+  type: 'agent-speech-started';
+}
+
+export interface AgentSpeechEnded {
+  type: 'agent-speech-ended';
+}
+
+export interface OverlapSpeechStarted {
+  type: 'overlap-speech-started';
+  /** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */
+  speechDuration: number;
+  /** Absolute timestamp (ms) when overlap speech started, computed at call-site. */
+  startedAt: number;
+  userSpeakingSpan?: Span;
+}
+
+export interface OverlapSpeechEnded {
+  type: 'overlap-speech-ended';
+  /** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */
+  endedAt: number;
+}
+
+export interface Flush {
+  type: 'flush';
+}
+
+/**
+ * Union type for all stream control signals.
+ */
+export type InterruptionSentinel =
+  | AgentSpeechStarted
+  | AgentSpeechEnded
+  | OverlapSpeechStarted
+  | OverlapSpeechEnded
+  | Flush;
diff --git a/agents/src/inference/interruption/utils.test.ts b/agents/src/inference/interruption/utils.test.ts
new file mode 100644
index 000000000..79b585fe6
--- /dev/null
+++ b/agents/src/inference/interruption/utils.test.ts
@@ -0,0 +1,132 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { describe, expect, it, vi } from 'vitest';
+import { BoundedCache } from './utils.js';
+
+class Entry {
+  createdAt: number;
+  totalDurationInS: number | undefined = undefined;
+  predictionDurationInS: number | undefined = undefined;
+  note: string | undefined = undefined;
+
+  constructor(createdAt: number, note?: string) {
+    this.createdAt = createdAt;
+    this.note = note;
+  }
+}
+
+describe('BoundedCache', () => {
+  it('evicts oldest entry when maxLen is exceeded', () => {
+    const cache = new BoundedCache<number, Entry>(2);
+    cache.set(1, new Entry(1));
+    cache.set(2, new Entry(2));
+    cache.set(3, new Entry(3));
+
+    expect(cache.size).toBe(2);
+    expect([...cache.keys()]).toEqual([2, 3]);
+    expect(cache.get(1)).toBeUndefined();
+    expect(cache.get(2)!.createdAt).toBe(2);
+    expect(cache.get(3)!.createdAt).toBe(3);
+  });
+
+  it('setOrUpdate creates a value via factory when key is missing', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    const factory = vi.fn(() => new Entry(100));
+
+    const value = cache.setOrUpdate(1, factory, { predictionDurationInS: 0.42 });
+
+    expect(factory).toHaveBeenCalledTimes(1);
+    expect(value.createdAt).toBe(100);
+    expect(value.predictionDurationInS).toBe(0.42);
+    expect(cache.get(1)?.predictionDurationInS).toBe(0.42);
+  });
+
+  it('setOrUpdate updates existing value and does not call factory', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    cache.set(1, new Entry(1, 'before'));
+    const factory = vi.fn(() => new Entry(999));
+
+    const value = cache.setOrUpdate(1, factory, { note: 'after', totalDurationInS: 1.5 });
+
+    expect(factory).not.toHaveBeenCalled();
+    expect(value.createdAt).toBe(1);
+    expect(value.note).toBe('after');
+    expect(value.totalDurationInS).toBe(1.5);
+  });
+
+  it('updateValue returns undefined for missing key', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    const result = cache.updateValue(404, { note: 'missing' });
+
+    expect(result).toBeUndefined();
+  });
+
+  it('updateValue ignores undefined fields', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    cache.set(1, new Entry(1, 'keep'));
+
+    const result = cache.updateValue(1, {
+      note: undefined,
+      predictionDurationInS: 0.1,
+    });
+
+    expect(result?.createdAt).toBe(1);
+    expect(result?.note).toBe('keep');
+    expect(result?.predictionDurationInS).toBe(0.1);
+  });
+
+  it('pop without predicate removes the oldest entry (python parity)', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    cache.set(1, new Entry(1));
+    cache.set(2, new Entry(2));
+    cache.set(3, new Entry(3));
+
+    const popped = cache.pop();
+
+    expect(popped?.createdAt).toBe(1);
+    expect([...cache.keys()]).toEqual([2, 3]);
+  });
+
+  it('pop with predicate removes the most recent matching entry', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    const e1 = new Entry(1);
+    e1.totalDurationInS = 0;
+    const e2 = new Entry(2);
+    e2.totalDurationInS = 1;
+    const e3 = new Entry(3);
+    e3.totalDurationInS = 2;
+    cache.set(1, e1);
+    cache.set(2, e2);
+    cache.set(3, e3);
+
+    const popped = cache.pop((entry) => (entry.totalDurationInS ?? 0) > 0);
+
+    expect(popped?.createdAt).toBe(3);
+    expect(popped?.totalDurationInS).toBe(2);
+    expect([...cache.keys()]).toEqual([1, 2]);
+  });
+
+  it('pop with predicate returns undefined when no match exists', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    const e1 = new Entry(1);
+    e1.totalDurationInS = 0;
+    cache.set(1, e1);
+
+    const popped = cache.pop((entry) => (entry.totalDurationInS ?? 0) > 10);
+
+    expect(popped).toBeUndefined();
+    expect(cache.size).toBe(1);
+  });
+
+  it('clear removes all entries', () => {
+    const cache = new BoundedCache<number, Entry>(10);
+    cache.set(1, new Entry(1));
+    cache.set(2, new Entry(2));
+
+    cache.clear();
+
+    expect(cache.size).toBe(0);
+    expect([...cache.keys()]).toEqual([]);
+  });
+});
diff --git a/agents/src/inference/interruption/utils.ts b/agents/src/inference/interruption/utils.ts
new file mode 100644
index 000000000..e614f3b6d
--- /dev/null
+++ b/agents/src/inference/interruption/utils.ts
@@ -0,0 +1,137 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
+
+/**
+ * A bounded cache that automatically evicts the oldest entries when the cache exceeds max size.
+ * Uses FIFO eviction strategy.
+ */
+export class BoundedCache<K, V extends object> {
+  private cache: Map<K, V> = new Map();
+  private readonly maxLen: number;
+
+  constructor(maxLen: number = 10) {
+    this.maxLen = maxLen;
+  }
+
+  set(key: K, value: V): void {
+    this.cache.set(key, value);
+    if (this.cache.size > this.maxLen) {
+      // Remove the oldest entry (first inserted)
+      const firstKey = this.cache.keys().next().value as K;
+      this.cache.delete(firstKey);
+    }
+  }
+
+  /**
+   * Update existing value fields if present and defined.
+   * Mirrors python BoundedDict.update_value behavior.
+   */
+  updateValue(key: K, fields: Partial<V>): V | undefined {
+    const value = this.cache.get(key);
+    if (!value) return value;
+
+    for (const [fieldName, fieldValue] of Object.entries(fields) as [keyof V, V[keyof V]][]) {
+      if (fieldValue === undefined) continue;
+      // Runtime field update parity with python's hasattr + setattr.
+      if (fieldName in (value as object)) {
+        (value as Record<string, unknown>)[String(fieldName)] = fieldValue;
+      }
+    }
+    return value;
+  }
+
+  /**
+   * Set a new value with factory when missing; otherwise update in place.
+   * Mirrors python BoundedDict.set_or_update behavior.
+   */
+  setOrUpdate(key: K, factory: () => V, fields: Partial<V>): V {
+    if (!this.cache.has(key)) {
+      this.set(key, factory());
+    }
+    const result = this.updateValue(key, fields);
+    if (!result) {
+      throw new Error('setOrUpdate invariant failed: entry should exist after set');
+    }
+    return result;
+  }
+
+  get(key: K): V | undefined {
+    return this.cache.get(key);
+  }
+
+  has(key: K): boolean {
+    return this.cache.has(key);
+  }
+
+  delete(key: K): boolean {
+    return this.cache.delete(key);
+  }
+
+  /**
+   * Pop an entry if it satisfies the predicate.
+   * - No predicate: pop oldest (FIFO)
+   * - With predicate: search in reverse order and pop first match
+   */
+  pop(predicate?: (value: V) => boolean): V | undefined {
+    if (predicate === undefined) {
+      const first = this.cache.entries().next().value as [K, V] | undefined;
+      if (!first) return undefined;
+      const [key, value] = first;
+      this.cache.delete(key);
+      return value;
+    }
+
+    const keys = Array.from(this.cache.keys());
+    for (let i = keys.length - 1; i >= 0; i--) {
+      const key = keys[i]!;
+      const value = this.cache.get(key)!;
+      if (predicate(value)) {
+        this.cache.delete(key);
+        return value;
+      }
+    }
+    return undefined;
+  }
+
+  clear(): void {
+    this.cache.clear();
+  }
+
+  get size(): number {
+    return this.cache.size;
+  }
+
+  values(): IterableIterator<V> {
+    return this.cache.values();
+  }
+
+  keys(): IterableIterator<K> {
+    return this.cache.keys();
+  }
+
+  entries(): IterableIterator<[K, V]> {
+    return this.cache.entries();
+  }
+}
+
+/**
+ * Estimate probability by finding the n-th maximum value in the probabilities array.
+ * The n-th position is determined by the window size (25ms per frame).
+ * Returns 0 if there are insufficient probabilities.
+ */
+export function estimateProbability(
+  probabilities: number[],
+  windowSizeInS: number = MIN_INTERRUPTION_DURATION_IN_S,
+): number {
+  const nTh = Math.ceil(windowSizeInS / FRAME_DURATION_IN_S);
+  if (probabilities.length < nTh) {
+    return 0;
+  }
+
+  // Find the n-th maximum value by sorting in descending order
+  // Create a copy to avoid mutating the original array
+  const sorted = [...probabilities].sort((a, b) => b - a);
+  return sorted[nTh - 1]!;
+}
diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
new file mode 100644
index 000000000..ab708addd
--- /dev/null
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -0,0 +1,406 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { TransformStream } from 'stream/web';
+import WebSocket from 'ws';
+import { z } from 'zod';
+import { APIConnectionError, APIStatusError, APITimeoutError } from '../../_exceptions.js';
+import { log } from '../../log.js';
+import { createAccessToken } from '../utils.js';
+import { InterruptionCacheEntry } from './interruption_cache_entry.js';
+import type { OverlappingSpeechEvent } from './types.js';
+import type { BoundedCache } from './utils.js';
+
+// WebSocket message types
+const MSG_SESSION_CREATE = 'session.create';
+const MSG_SESSION_CLOSE = 'session.close';
+const MSG_SESSION_CREATED = 'session.created';
+const MSG_SESSION_CLOSED = 'session.closed';
+const MSG_INTERRUPTION_DETECTED = 'bargein_detected';
+const MSG_INFERENCE_DONE = 'inference_done';
+const MSG_ERROR = 'error';
+
+export interface WsTransportOptions {
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  sampleRate: number;
+  threshold: number;
+  minFrames: number;
+  timeout: number;
+  maxRetries?: number;
+}
+
+export interface WsTransportState {
+  overlapSpeechStarted: boolean;
+  overlapSpeechStartedAt: number | undefined;
+  cache: BoundedCache<number, InterruptionCacheEntry>;
+}
+
+const wsMessageSchema = z.discriminatedUnion('type', [
+  z.object({
+    type: z.literal(MSG_SESSION_CREATED),
+  }),
+  z.object({
+    type: z.literal(MSG_SESSION_CLOSED),
+  }),
+  z.object({
+    type: z.literal(MSG_INTERRUPTION_DETECTED),
+    created_at: z.number(),
+    probabilities: z.array(z.number()).default([]),
+    prediction_duration: z.number().default(0),
+  }),
+  z.object({
+    type: z.literal(MSG_INFERENCE_DONE),
+    created_at: z.number(),
+    probabilities: z.array(z.number()).default([]),
+    prediction_duration: z.number().default(0),
+    is_bargein: z.boolean().optional(),
+  }),
+  z.object({
+    type: z.literal(MSG_ERROR),
+    message: z.string(),
+    code: z.number().optional(),
+    session_id: z.string().optional(),
+  }),
+]);
+
+type WsMessage = z.infer<typeof wsMessageSchema>;
+
+/**
+ * Creates a WebSocket connection and waits for it to open.
+ */
+async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket> {
+  const baseUrl = options.baseUrl.replace(/^http/, 'ws');
+  const token = await createAccessToken(options.apiKey, options.apiSecret);
+  const url = `${baseUrl}/bargein`;
+
+  const ws = new WebSocket(url, {
+    headers: { Authorization: `Bearer ${token}` },
+  });
+
+  await new Promise<void>((resolve, reject) => {
+    const timeout = setTimeout(() => {
+      ws.terminate();
+      reject(
+        new APITimeoutError({
+          message: 'WebSocket connection timeout',
+          options: { retryable: false },
+        }),
+      );
+    }, options.timeout);
+    ws.once('open', () => {
+      clearTimeout(timeout);
+      resolve();
+    });
+    ws.once('unexpected-response', (_req, res) => {
+      clearTimeout(timeout);
+      ws.terminate();
+      const statusCode = res.statusCode ?? -1;
+      reject(
+        new APIStatusError({
+          message: `WebSocket connection rejected with status ${statusCode}`,
+          options: { statusCode, retryable: false },
+        }),
+      );
+    });
+    ws.once('error', (err: Error) => {
+      clearTimeout(timeout);
+      ws.terminate();
+      reject(new APIConnectionError({ message: `WebSocket connection error: ${err.message}` }));
+    });
+  });
+
+  return ws;
+}
+
+export interface WsTransportResult {
+  transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;
+  reconnect: () => Promise<void>;
+}
+
+/**
+ * Creates a WebSocket transport TransformStream for interruption detection.
+ *
+ * This transport receives Int16Array audio slices and outputs InterruptionEvents.
+ * It maintains a persistent WebSocket connection with automatic retry on failure.
+ * Returns both the transport and a reconnect function for option updates.
+ */
+export function createWsTransport(
+  options: WsTransportOptions,
+  getState: () => WsTransportState,
+  setState: (partial: Partial<WsTransportState>) => void,
+  updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
+  onRequestSent?: () => void,
+  getAndResetNumRequests?: () => number,
+): WsTransportResult {
+  const logger = log();
+  let ws: WebSocket | null = null;
+  let outputController: TransformStreamDefaultController<OverlappingSpeechEvent> | null = null;
+
+  function setupMessageHandler(socket: WebSocket): void {
+    socket.on('message', (data: WebSocket.Data) => {
+      try {
+        const message = wsMessageSchema.parse(JSON.parse(data.toString()));
+        handleMessage(message);
+      } catch {
+        logger.warn({ data: data.toString() }, 'Failed to parse WebSocket message');
+      }
+    });
+
+    socket.on('error', (err: Error) => {
+      outputController?.error(
+        new APIConnectionError({ message: `WebSocket error: ${err.message}` }),
+      );
+    });
+
+    socket.on('close', (code: number, reason: Buffer) => {
+      logger.debug({ code, reason: reason.toString() }, 'WebSocket closed');
+    });
+  }
+
+  async function ensureConnection(): Promise<void> {
+    if (ws && ws.readyState === WebSocket.OPEN) return;
+
+    ws = await connectWebSocket(options);
+    setupMessageHandler(ws);
+
+    const sessionCreateMsg = JSON.stringify({
+      type: MSG_SESSION_CREATE,
+      settings: {
+        sample_rate: options.sampleRate,
+        num_channels: 1,
+        threshold: options.threshold,
+        min_frames: options.minFrames,
+        encoding: 's16le',
+      },
+    });
+    ws.send(sessionCreateMsg);
+  }
+
+  function handleMessage(message: WsMessage): void {
+    const state = getState();
+
+    switch (message.type) {
+      case MSG_SESSION_CREATED:
+        logger.debug('WebSocket session created');
+        break;
+
+      case MSG_INTERRUPTION_DETECTED: {
+        const createdAt = message.created_at;
+        const overlapSpeechStartedAt = state.overlapSpeechStartedAt;
+        if (state.overlapSpeechStarted && overlapSpeechStartedAt !== undefined) {
+          const existing = state.cache.get(createdAt);
+
+          const totalDurationInS =
+            existing?.requestStartedAt !== undefined
+              ? (performance.now() - existing.requestStartedAt) / 1000
+              : (performance.now() - createdAt) / 1000;
+
+          const entry = state.cache.setOrUpdate(
+            createdAt,
+            () => new InterruptionCacheEntry({ createdAt }),
+            {
+              speechInput: existing?.speechInput,
+              requestStartedAt: existing?.requestStartedAt,
+              totalDurationInS,
+              probabilities: message.probabilities,
+              isInterruption: true,
+              predictionDurationInS: message.prediction_duration,
+              detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,
+            },
+          );
+
+          if (updateUserSpeakingSpan) {
+            updateUserSpeakingSpan(entry);
+          }
+
+          logger.debug(
+            {
+              totalDuration: entry.totalDurationInS,
+              predictionDuration: entry.predictionDurationInS,
+              detectionDelay: entry.detectionDelayInS,
+              probability: entry.probability,
+            },
+            'interruption detected',
+          );
+
+          const event: OverlappingSpeechEvent = {
+            type: 'overlapping_speech',
+            detectedAt: Date.now(),
+            isInterruption: true,
+            totalDurationInS: entry.totalDurationInS,
+            predictionDurationInS: entry.predictionDurationInS,
+            overlapStartedAt: overlapSpeechStartedAt,
+            speechInput: entry.speechInput,
+            probabilities: entry.probabilities,
+            detectionDelayInS: entry.detectionDelayInS,
+            probability: entry.probability,
+            numRequests: getAndResetNumRequests?.() ?? 0,
+          };
+
+          outputController?.enqueue(event);
+          setState({ overlapSpeechStarted: false });
+        }
+        break;
+      }
+
+      case MSG_INFERENCE_DONE: {
+        const createdAt = message.created_at;
+        const overlapSpeechStartedAt = state.overlapSpeechStartedAt;
+        if (state.overlapSpeechStarted && overlapSpeechStartedAt !== undefined) {
+          const existing = state.cache.get(createdAt);
+          const totalDurationInS =
+            existing?.requestStartedAt !== undefined
+              ? (performance.now() - existing.requestStartedAt) / 1000
+              : (performance.now() - createdAt) / 1000;
+          const entry = state.cache.setOrUpdate(
+            createdAt,
+            () => new InterruptionCacheEntry({ createdAt }),
+            {
+              speechInput: existing?.speechInput,
+              requestStartedAt: existing?.requestStartedAt,
+              totalDurationInS,
+              predictionDurationInS: message.prediction_duration,
+              probabilities: message.probabilities,
+              isInterruption: message.is_bargein ?? false,
+              detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,
+            },
+          );
+
+          logger.debug(
+            {
+              totalDurationInS: entry.totalDurationInS,
+              predictionDurationInS: entry.predictionDurationInS,
+            },
+            'interruption inference done',
+          );
+        }
+        break;
+      }
+
+      case MSG_SESSION_CLOSED:
+        logger.debug('WebSocket session closed');
+        break;
+
+      case MSG_ERROR:
+        outputController?.error(
+          new APIStatusError({
+            message: `LiveKit Adaptive Interruption error: ${message.message}`,
+            options: { statusCode: message.code ?? -1 },
+          }),
+        );
+        break;
+    }
+  }
+
+  function sendAudioData(audioSlice: Int16Array): void {
+    if (!ws || ws.readyState !== WebSocket.OPEN) {
+      throw new APIConnectionError({ message: 'WebSocket not connected' });
+    }
+
+    const state = getState();
+    const createdAt = Math.floor(performance.now());
+
+    state.cache.set(
+      createdAt,
+      new InterruptionCacheEntry({
+        createdAt,
+        requestStartedAt: performance.now(),
+        speechInput: audioSlice,
+      }),
+    );
+
+    const header = new ArrayBuffer(8);
+    const view = new DataView(header);
+    view.setUint32(0, createdAt >>> 0, true);
+    view.setUint32(4, Math.floor(createdAt / 0x100000000) >>> 0, true);
+
+    const audioBytes = new Uint8Array(
+      audioSlice.buffer,
+      audioSlice.byteOffset,
+      audioSlice.byteLength,
+    );
+    const combined = new Uint8Array(8 + audioBytes.length);
+    combined.set(new Uint8Array(header), 0);
+    combined.set(audioBytes, 8);
+
+    ws.send(combined);
+    onRequestSent?.();
+  }
+
+  function close(): void {
+    if (ws?.readyState === WebSocket.OPEN) {
+      const closeMsg = JSON.stringify({ type: MSG_SESSION_CLOSE });
+      try {
+        ws.send(closeMsg);
+      } catch (e: unknown) {
+        logger.error(e, 'failed to send close message');
+      }
+    }
+    ws?.close(1000); // signal normal websocket closure
+    ws = null;
+  }
+
+  /**
+   * Reconnect the WebSocket with updated options.
+   * This is called when options are updated via updateOptions().
+   */
+  async function reconnect(): Promise<void> {
+    close();
+  }
+
+  const transport = new TransformStream<
+    Int16Array | OverlappingSpeechEvent,
+    OverlappingSpeechEvent
+  >(
+    {
+      async start(controller) {
+        outputController = controller;
+        await ensureConnection();
+      },
+
+      transform(chunk, controller) {
+        if (!(chunk instanceof Int16Array)) {
+          controller.enqueue(chunk);
+          return;
+        }
+
+        // Only forwards buffered audio while overlap speech is actively on.
+        const state = getState();
+        if (!state.overlapSpeechStartedAt || !state.overlapSpeechStarted) return;
+
+        if (options.timeout > 0) {
+          const now = performance.now();
+          for (const [, entry] of state.cache.entries()) {
+            if (entry.totalDurationInS !== 0) continue;
+            if (now - entry.createdAt > options.timeout) {
+              controller.error(
+                new APIStatusError({
+                  message: `interruption inference timed out after ${((now - entry.createdAt) / 1000).toFixed(1)}s (ws)`,
+                  options: { statusCode: 408, retryable: false },
+                }),
+              );
+              return;
+            }
+            break;
+          }
+        }
+
+        try {
+          sendAudioData(chunk);
+        } catch (err) {
+          controller.error(err);
+        }
+      },
+
+      flush() {
+        close();
+      },
+    },
+    { highWaterMark: 2 },
+    { highWaterMark: 2 },
+  );
+
+  return { transport, reconnect };
+}
diff --git a/agents/src/inference/llm.ts b/agents/src/inference/llm.ts
index 312a97a0c..b731672d4 100644
--- a/agents/src/inference/llm.ts
+++ b/agents/src/inference/llm.ts
@@ -4,12 +4,10 @@
 import OpenAI from 'openai';
 import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
 import * as llm from '../llm/index.js';
-import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
 import type { APIConnectOptions } from '../types.js';
+import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
 import { type Expand, toError } from '../utils.js';
-import { type AnyString, createAccessToken } from './utils.js';
-
-const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
+import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type OpenAIModels =
   | 'openai/gpt-5.4'
@@ -124,7 +122,7 @@ export class LLM extends llm.LLM {
       strictToolSchema = false,
     } = opts;
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -160,6 +158,10 @@ export class LLM extends llm.LLM {
     return this.opts.model;
   }
 
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): LLM {
     return new LLM({ model: modelString });
   }
diff --git a/agents/src/inference/stt.ts b/agents/src/inference/stt.ts
index e8b7b666d..1da16e179 100644
--- a/agents/src/inference/stt.ts
+++ b/agents/src/inference/stt.ts
@@ -23,7 +23,7 @@ import {
   type SttTranscriptEvent,
   sttServerEventSchema,
 } from './api_protos.js';
-import { type AnyString, connectWs, createAccessToken } from './utils.js';
+import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type DeepgramModels =
   | 'deepgram/flux-general'
@@ -152,7 +152,6 @@ export type STTEncoding = 'pcm_s16le';
 
 const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
 const DEFAULT_SAMPLE_RATE = 16000;
-const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
 const DEFAULT_CANCEL_TIMEOUT = 5000;
 
 export interface InferenceSTTOptions<TModel extends STTModels> {
@@ -204,7 +203,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
       connOptions,
     } = opts || {};
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -253,6 +252,14 @@ export class STT<TModel extends STTModels> extends BaseSTT {
     return 'inference.STT';
   }
 
+  get model(): string {
+    return this.opts.model ?? 'auto';
+  }
+
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): STT<AnyString> {
     const [model, language] = parseSTTModelString(modelString);
     return new STT({ model, language });
diff --git a/agents/src/inference/tts.ts b/agents/src/inference/tts.ts
index 8c32672cb..2e83c619f 100644
--- a/agents/src/inference/tts.ts
+++ b/agents/src/inference/tts.ts
@@ -20,7 +20,7 @@ import {
   ttsClientEventSchema,
   ttsServerEventSchema,
 } from './api_protos.js';
-import { type AnyString, connectWs, createAccessToken } from './utils.js';
+import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type CartesiaModels =
   | 'cartesia/sonic-3'
@@ -144,7 +144,6 @@ type TTSEncoding = 'pcm_s16le';
 
 const DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';
 const DEFAULT_SAMPLE_RATE = 16000;
-const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
 const NUM_CHANNELS = 1;
 const DEFAULT_LANGUAGE = 'en';
 
@@ -201,7 +200,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
       connOptions,
     } = opts || {};
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -262,6 +261,14 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
     return 'inference.TTS';
   }
 
+  get model(): string {
+    return this.opts.model ?? 'unknown';
+  }
+
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): TTS<AnyString> {
     const [model, voice] = parseTTSModelString(modelString);
     return new TTS({ model, voice: voice || undefined });
diff --git a/agents/src/inference/utils.ts b/agents/src/inference/utils.ts
index 801e89319..a80017c0b 100644
--- a/agents/src/inference/utils.ts
+++ b/agents/src/inference/utils.ts
@@ -7,6 +7,34 @@ import { APIConnectionError, APIStatusError } from '../_exceptions.js';
 
 export type AnyString = string & NonNullable<unknown>;
 
+/** Default production inference URL */
+export const DEFAULT_INFERENCE_URL = 'https://agent-gateway.livekit.cloud/v1';
+
+/** Staging inference URL */
+export const STAGING_INFERENCE_URL = 'https://agent-gateway.staging.livekit.cloud/v1';
+
+/**
+ * Get the default inference URL based on the environment.
+ *
+ * Priority:
+ * 1. LIVEKIT_INFERENCE_URL if set
+ * 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
+ * 3. Otherwise, use production gateway
+ */
+export function getDefaultInferenceUrl(): string {
+  const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
+  if (inferenceUrl) {
+    return inferenceUrl;
+  }
+
+  const livekitUrl = process.env.LIVEKIT_URL || '';
+  if (livekitUrl.includes('.staging.livekit.cloud')) {
+    return STAGING_INFERENCE_URL;
+  }
+
+  return DEFAULT_INFERENCE_URL;
+}
+
 export async function createAccessToken(
   apiKey: string,
   apiSecret: string,
diff --git a/agents/src/job.ts b/agents/src/job.ts
index e8ad3fac2..87e67195b 100644
--- a/agents/src/job.ts
+++ b/agents/src/job.ts
@@ -276,7 +276,7 @@ export class JobContext {
       jobId: this.job.id,
       roomId: this.job.room?.sid || '',
       room: this.job.room?.name || '',
-      options: targetSession.options,
+      options: targetSession.sessionOptions,
       events: targetSession._recordedEvents,
       enableRecording: targetSession._enableRecording,
       chatHistory: targetSession.history.copy(),
diff --git a/agents/src/llm/chat_context.ts b/agents/src/llm/chat_context.ts
index 5ac15c0c7..114b4ac75 100644
--- a/agents/src/llm/chat_context.ts
+++ b/agents/src/llm/chat_context.ts
@@ -81,6 +81,17 @@ export function createAudioContent(params: {
   };
 }
 
+export interface MetricsReport {
+  startedSpeakingAt?: number;
+  stoppedSpeakingAt?: number;
+  transcriptionDelay?: number;
+  endOfTurnDelay?: number;
+  onUserTurnCompletedDelay?: number;
+  llmNodeTtft?: number;
+  ttsNodeTtfb?: number;
+  e2eLatency?: number;
+}
+
 export class ChatMessage {
   readonly id: string;
 
@@ -92,18 +103,24 @@ export class ChatMessage {
 
   interrupted: boolean;
 
+  transcriptConfidence?: number;
+
+  extra: Record<string, unknown>;
+
+  metrics: MetricsReport;
+
   hash?: Uint8Array;
 
   createdAt: number;
 
-  extra: Record<string, unknown>;
-
   constructor(params: {
     role: ChatRole;
     content: ChatContent[] | string;
     id?: string;
     interrupted?: boolean;
     createdAt?: number;
+    transcriptConfidence?: number;
+    metrics?: MetricsReport;
     extra?: Record<string, unknown>;
   }) {
     const {
@@ -112,6 +129,8 @@ export class ChatMessage {
       id = shortuuid('item_'),
       interrupted = false,
       createdAt = Date.now(),
+      transcriptConfidence,
+      metrics = {},
       extra = {},
     } = params;
     this.id = id;
@@ -119,6 +138,8 @@ export class ChatMessage {
     this.content = Array.isArray(content) ? content : [content];
     this.interrupted = interrupted;
     this.createdAt = createdAt;
+    this.transcriptConfidence = transcriptConfidence;
+    this.metrics = metrics;
     this.extra = extra;
   }
 
@@ -128,6 +149,8 @@ export class ChatMessage {
     id?: string;
     interrupted?: boolean;
     createdAt?: number;
+    transcriptConfidence?: number;
+    metrics?: MetricsReport;
     extra?: Record<string, unknown>;
   }) {
     return new ChatMessage(params);
@@ -179,6 +202,16 @@ export class ChatMessage {
       result.createdAt = this.createdAt;
     }
 
+    if (this.transcriptConfidence !== undefined) {
+      result.transcriptConfidence = this.transcriptConfidence;
+    }
+    if (Object.keys(this.metrics).length > 0) {
+      result.metrics = { ...this.metrics };
+    }
+    if (Object.keys(this.extra).length > 0) {
+      result.extra = this.extra as JSONValue;
+    }
+
     return result;
   }
 }
@@ -439,6 +472,8 @@ export class ChatContext {
     id?: string;
     interrupted?: boolean;
     createdAt?: number;
+    transcriptConfidence?: number;
+    metrics?: MetricsReport;
     extra?: Record<string, unknown>;
   }): ChatMessage {
     const msg = new ChatMessage(params);
@@ -623,6 +658,9 @@ export class ChatContext {
           id: item.id,
           interrupted: item.interrupted,
           createdAt: item.createdAt,
+          transcriptConfidence: item.transcriptConfidence,
+          metrics: item.metrics,
+          extra: item.extra,
         });
 
         // Filter content based on options
diff --git a/agents/src/llm/index.ts b/agents/src/llm/index.ts
index 3eb5b2117..e68ee9380 100644
--- a/agents/src/llm/index.ts
+++ b/agents/src/llm/index.ts
@@ -30,6 +30,7 @@ export {
   type ChatItem,
   type ChatRole,
   type ImageContent,
+  type MetricsReport,
 } from './chat_context.js';
 
 export type { ProviderFormat } from './provider_format/index.js';
diff --git a/agents/src/llm/llm.ts b/agents/src/llm/llm.ts
index 624bea490..a71bd4714 100644
--- a/agents/src/llm/llm.ts
+++ b/agents/src/llm/llm.ts
@@ -65,6 +65,18 @@ export abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCal
     return 'unknown';
   }
 
+  /**
+   * Get the provider name for this LLM instance.
+   *
+   * @returns The provider name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their provider information.
+   */
+  get provider(): string {
+    return 'unknown';
+  }
+
   /**
    * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.
    */
@@ -248,6 +260,10 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
         }
         return (usage?.completionTokens || 0) / (durationMs / 1000);
       })(),
+      metadata: {
+        modelProvider: this.#llm.provider,
+        modelName: this.#llm.model,
+      },
     };
 
     if (this.#llmRequestSpan) {
diff --git a/agents/src/llm/realtime.ts b/agents/src/llm/realtime.ts
index 5c132afd0..864e25d2d 100644
--- a/agents/src/llm/realtime.ts
+++ b/agents/src/llm/realtime.ts
@@ -73,6 +73,10 @@ export abstract class RealtimeModel {
   /** The model name/identifier used by this realtime model */
   abstract get model(): string;
 
+  get provider(): string {
+    return 'unknown';
+  }
+
   abstract session(): RealtimeSession;
 
   abstract close(): Promise<void>;
diff --git a/agents/src/metrics/base.ts b/agents/src/metrics/base.ts
index 7f6d6a0cc..1c9c317c1 100644
--- a/agents/src/metrics/base.ts
+++ b/agents/src/metrics/base.ts
@@ -2,13 +2,21 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
+export type MetricsMetadata = {
+  /** The provider name (e.g., 'openai', 'anthropic'). */
+  modelProvider?: string;
+  /** The model name (e.g., 'gpt-4o', 'claude-3-5-sonnet'). */
+  modelName?: string;
+};
+
 export type AgentMetrics =
   | STTMetrics
   | LLMMetrics
   | TTSMetrics
   | VADMetrics
   | EOUMetrics
-  | RealtimeModelMetrics;
+  | RealtimeModelMetrics
+  | InterruptionMetrics;
 
 export type LLMMetrics = {
   type: 'llm_metrics';
@@ -26,6 +34,8 @@ export type LLMMetrics = {
   totalTokens: number;
   tokensPerSecond: number;
   speechId?: string;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type STTMetrics = {
@@ -41,10 +51,16 @@ export type STTMetrics = {
    * The duration of the pushed audio in milliseconds.
    */
   audioDurationMs: number;
+  /** Input audio tokens (for token-based billing). */
+  inputTokens?: number;
+  /** Output text tokens (for token-based billing). */
+  outputTokens?: number;
   /**
    * Whether the STT is streaming (e.g using websocket).
    */
   streamed: boolean;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type TTSMetrics = {
@@ -59,10 +75,17 @@ export type TTSMetrics = {
   /** Generated audio duration in milliseconds. */
   audioDurationMs: number;
   cancelled: boolean;
+  /** Number of characters synthesized (for character-based billing). */
   charactersCount: number;
+  /** Input text tokens (for token-based billing, e.g., OpenAI TTS). */
+  inputTokens?: number;
+  /** Output audio tokens (for token-based billing, e.g., OpenAI TTS). */
+  outputTokens?: number;
   streamed: boolean;
   segmentId?: string;
   speechId?: string;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type VADMetrics = {
@@ -133,6 +156,10 @@ export type RealtimeModelMetrics = {
    * The duration of the response from created to done in milliseconds.
    */
   durationMs: number;
+  /**
+   * The duration of the session connection in milliseconds (for session-based billing like xAI).
+   */
+  sessionDurationMs?: number;
   /**
    * Time to first audio token in milliseconds. -1 if no audio token was sent.
    */
@@ -165,4 +192,24 @@ export type RealtimeModelMetrics = {
    * Details about the output tokens used in the Response.
    */
   outputTokenDetails: RealtimeModelMetricsOutputTokenDetails;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
+};
+
+export type InterruptionMetrics = {
+  type: 'interruption_metrics';
+  timestamp: number;
+  /** Latest RTT time taken to perform inference, in milliseconds. */
+  totalDuration: number;
+  /** Latest time taken by the model side, in milliseconds. */
+  predictionDuration: number;
+  /** Latest total time from onset of speech to final prediction, in milliseconds. */
+  detectionDelay: number;
+  /** Number of interruptions detected (incremental). */
+  numInterruptions: number;
+  /** Number of backchannels detected (incremental). */
+  numBackchannels: number;
+  /** Number of requests sent to the model (incremental). */
+  numRequests: number;
+  metadata?: MetricsMetadata;
 };
diff --git a/agents/src/metrics/index.ts b/agents/src/metrics/index.ts
index f400a9638..f3cce796c 100644
--- a/agents/src/metrics/index.ts
+++ b/agents/src/metrics/index.ts
@@ -5,11 +5,22 @@
 export type {
   AgentMetrics,
   EOUMetrics,
+  InterruptionMetrics,
   LLMMetrics,
+  MetricsMetadata,
   RealtimeModelMetrics,
   STTMetrics,
   TTSMetrics,
   VADMetrics,
 } from './base.js';
+export {
+  filterZeroValues,
+  ModelUsageCollector,
+  type InterruptionModelUsage,
+  type LLMModelUsage,
+  type ModelUsage,
+  type STTModelUsage,
+  type TTSModelUsage,
+} from './model_usage.js';
 export { UsageCollector, type UsageSummary } from './usage_collector.js';
 export { logMetrics } from './utils.js';
diff --git a/agents/src/metrics/model_usage.test.ts b/agents/src/metrics/model_usage.test.ts
new file mode 100644
index 000000000..d2f983beb
--- /dev/null
+++ b/agents/src/metrics/model_usage.test.ts
@@ -0,0 +1,545 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { beforeEach, describe, expect, it } from 'vitest';
+import type { LLMMetrics, RealtimeModelMetrics, STTMetrics, TTSMetrics } from './base.js';
+import {
+  type LLMModelUsage,
+  ModelUsageCollector,
+  type STTModelUsage,
+  type TTSModelUsage,
+  filterZeroValues,
+} from './model_usage.js';
+
+describe('model_usage', () => {
+  describe('filterZeroValues', () => {
+    it('should filter out zero values from LLMModelUsage', () => {
+      const usage: LLMModelUsage = {
+        type: 'llm_usage',
+        provider: 'openai',
+        model: 'gpt-4o',
+        inputTokens: 100,
+        inputCachedTokens: 0,
+        inputAudioTokens: 0,
+        inputCachedAudioTokens: 0,
+        inputTextTokens: 0,
+        inputCachedTextTokens: 0,
+        inputImageTokens: 0,
+        inputCachedImageTokens: 0,
+        outputTokens: 50,
+        outputAudioTokens: 0,
+        outputTextTokens: 0,
+        sessionDurationMs: 0,
+      };
+
+      const filtered = filterZeroValues(usage);
+
+      expect(filtered.type).toBe('llm_usage');
+      expect(filtered.provider).toBe('openai');
+      expect(filtered.model).toBe('gpt-4o');
+      expect(filtered.inputTokens).toBe(100);
+      expect(filtered.outputTokens).toBe(50);
+      // Zero values should be filtered out
+      expect(filtered.inputCachedTokens).toBeUndefined();
+      expect(filtered.inputAudioTokens).toBeUndefined();
+      expect(filtered.sessionDurationMs).toBeUndefined();
+    });
+
+    it('should filter out zero values from TTSModelUsage', () => {
+      const usage: TTSModelUsage = {
+        type: 'tts_usage',
+        provider: 'elevenlabs',
+        model: 'eleven_turbo_v2',
+        inputTokens: 0,
+        outputTokens: 0,
+        charactersCount: 500,
+        audioDurationMs: 3000,
+      };
+
+      const filtered = filterZeroValues(usage);
+
+      expect(filtered.type).toBe('tts_usage');
+      expect(filtered.provider).toBe('elevenlabs');
+      expect(filtered.charactersCount).toBe(500);
+      expect(filtered.audioDurationMs).toBe(3000);
+      expect(filtered.inputTokens).toBeUndefined();
+      expect(filtered.outputTokens).toBeUndefined();
+    });
+
+    it('should keep all values when none are zero', () => {
+      const usage: STTModelUsage = {
+        type: 'stt_usage',
+        provider: 'deepgram',
+        model: 'nova-2',
+        inputTokens: 10,
+        outputTokens: 20,
+        audioDurationMs: 5000,
+      };
+
+      const filtered = filterZeroValues(usage);
+
+      expect(Object.keys(filtered)).toHaveLength(6);
+      expect(filtered).toEqual(usage);
+    });
+  });
+
+  describe('ModelUsageCollector', () => {
+    let collector: ModelUsageCollector;
+
+    beforeEach(() => {
+      collector = new ModelUsageCollector();
+    });
+
+    describe('collect LLM metrics', () => {
+      it('should aggregate LLM metrics by provider and model', () => {
+        const metrics1: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 100,
+          ttftMs: 50,
+          cancelled: false,
+          completionTokens: 100,
+          promptTokens: 200,
+          promptCachedTokens: 50,
+          totalTokens: 300,
+          tokensPerSecond: 10,
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o',
+          },
+        };
+
+        const metrics2: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req2',
+          timestamp: Date.now(),
+          durationMs: 150,
+          ttftMs: 60,
+          cancelled: false,
+          completionTokens: 150,
+          promptTokens: 300,
+          promptCachedTokens: 75,
+          totalTokens: 450,
+          tokensPerSecond: 12,
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o',
+          },
+        };
+
+        collector.collect(metrics1);
+        collector.collect(metrics2);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(1);
+
+        const llmUsage = usage[0] as LLMModelUsage;
+        expect(llmUsage.type).toBe('llm_usage');
+        expect(llmUsage.provider).toBe('openai');
+        expect(llmUsage.model).toBe('gpt-4o');
+        expect(llmUsage.inputTokens).toBe(500); // 200 + 300
+        expect(llmUsage.inputCachedTokens).toBe(125); // 50 + 75
+        expect(llmUsage.outputTokens).toBe(250); // 100 + 150
+      });
+
+      it('should separate metrics by different providers', () => {
+        const openaiMetrics: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 100,
+          ttftMs: 50,
+          cancelled: false,
+          completionTokens: 100,
+          promptTokens: 200,
+          promptCachedTokens: 0,
+          totalTokens: 300,
+          tokensPerSecond: 10,
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o',
+          },
+        };
+
+        const anthropicMetrics: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req2',
+          timestamp: Date.now(),
+          durationMs: 120,
+          ttftMs: 55,
+          cancelled: false,
+          completionTokens: 80,
+          promptTokens: 150,
+          promptCachedTokens: 0,
+          totalTokens: 230,
+          tokensPerSecond: 8,
+          metadata: {
+            modelProvider: 'anthropic',
+            modelName: 'claude-3-5-sonnet',
+          },
+        };
+
+        collector.collect(openaiMetrics);
+        collector.collect(anthropicMetrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(2);
+
+        const openaiUsage = usage.find(
+          (u) => u.type === 'llm_usage' && u.provider === 'openai',
+        ) as LLMModelUsage;
+        const anthropicUsage = usage.find(
+          (u) => u.type === 'llm_usage' && u.provider === 'anthropic',
+        ) as LLMModelUsage;
+
+        expect(openaiUsage.inputTokens).toBe(200);
+        expect(openaiUsage.outputTokens).toBe(100);
+        expect(anthropicUsage.inputTokens).toBe(150);
+        expect(anthropicUsage.outputTokens).toBe(80);
+      });
+    });
+
+    describe('collect TTS metrics', () => {
+      it('should aggregate TTS metrics by provider and model', () => {
+        const metrics1: TTSMetrics = {
+          type: 'tts_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          ttfbMs: 100,
+          durationMs: 500,
+          audioDurationMs: 3000,
+          cancelled: false,
+          charactersCount: 100,
+          inputTokens: 10,
+          outputTokens: 20,
+          streamed: true,
+          metadata: {
+            modelProvider: 'elevenlabs',
+            modelName: 'eleven_turbo_v2',
+          },
+        };
+
+        const metrics2: TTSMetrics = {
+          type: 'tts_metrics',
+          label: 'test',
+          requestId: 'req2',
+          timestamp: Date.now(),
+          ttfbMs: 120,
+          durationMs: 600,
+          audioDurationMs: 4000,
+          cancelled: false,
+          charactersCount: 200,
+          inputTokens: 15,
+          outputTokens: 25,
+          streamed: true,
+          metadata: {
+            modelProvider: 'elevenlabs',
+            modelName: 'eleven_turbo_v2',
+          },
+        };
+
+        collector.collect(metrics1);
+        collector.collect(metrics2);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(1);
+
+        const ttsUsage = usage[0] as TTSModelUsage;
+        expect(ttsUsage.type).toBe('tts_usage');
+        expect(ttsUsage.provider).toBe('elevenlabs');
+        expect(ttsUsage.model).toBe('eleven_turbo_v2');
+        expect(ttsUsage.charactersCount).toBe(300); // 100 + 200
+        expect(ttsUsage.audioDurationMs).toBe(7000); // 3000 + 4000
+        expect(ttsUsage.inputTokens).toBe(25); // 10 + 15
+        expect(ttsUsage.outputTokens).toBe(45); // 20 + 25
+      });
+    });
+
+    describe('collect STT metrics', () => {
+      it('should aggregate STT metrics by provider and model', () => {
+        const metrics1: STTMetrics = {
+          type: 'stt_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 0,
+          audioDurationMs: 5000,
+          inputTokens: 50,
+          outputTokens: 100,
+          streamed: true,
+          metadata: {
+            modelProvider: 'deepgram',
+            modelName: 'nova-2',
+          },
+        };
+
+        const metrics2: STTMetrics = {
+          type: 'stt_metrics',
+          label: 'test',
+          requestId: 'req2',
+          timestamp: Date.now(),
+          durationMs: 0,
+          audioDurationMs: 3000,
+          inputTokens: 30,
+          outputTokens: 60,
+          streamed: true,
+          metadata: {
+            modelProvider: 'deepgram',
+            modelName: 'nova-2',
+          },
+        };
+
+        collector.collect(metrics1);
+        collector.collect(metrics2);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(1);
+
+        const sttUsage = usage[0] as STTModelUsage;
+        expect(sttUsage.type).toBe('stt_usage');
+        expect(sttUsage.provider).toBe('deepgram');
+        expect(sttUsage.model).toBe('nova-2');
+        expect(sttUsage.audioDurationMs).toBe(8000); // 5000 + 3000
+        expect(sttUsage.inputTokens).toBe(80); // 50 + 30
+        expect(sttUsage.outputTokens).toBe(160); // 100 + 60
+      });
+    });
+
+    describe('collect realtime model metrics', () => {
+      it('should aggregate realtime model metrics with detailed token breakdown', () => {
+        const metrics: RealtimeModelMetrics = {
+          type: 'realtime_model_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 1000,
+          ttftMs: 100,
+          cancelled: false,
+          inputTokens: 500,
+          outputTokens: 300,
+          totalTokens: 800,
+          tokensPerSecond: 10,
+          sessionDurationMs: 5000,
+          inputTokenDetails: {
+            audioTokens: 200,
+            textTokens: 250,
+            imageTokens: 50,
+            cachedTokens: 100,
+            cachedTokensDetails: {
+              audioTokens: 30,
+              textTokens: 50,
+              imageTokens: 20,
+            },
+          },
+          outputTokenDetails: {
+            textTokens: 200,
+            audioTokens: 100,
+            imageTokens: 0,
+          },
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o-realtime',
+          },
+        };
+
+        collector.collect(metrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(1);
+
+        const llmUsage = usage[0] as LLMModelUsage;
+        expect(llmUsage.type).toBe('llm_usage');
+        expect(llmUsage.provider).toBe('openai');
+        expect(llmUsage.model).toBe('gpt-4o-realtime');
+        expect(llmUsage.inputTokens).toBe(500);
+        expect(llmUsage.inputCachedTokens).toBe(100);
+        expect(llmUsage.inputAudioTokens).toBe(200);
+        expect(llmUsage.inputCachedAudioTokens).toBe(30);
+        expect(llmUsage.inputTextTokens).toBe(250);
+        expect(llmUsage.inputCachedTextTokens).toBe(50);
+        expect(llmUsage.inputImageTokens).toBe(50);
+        expect(llmUsage.inputCachedImageTokens).toBe(20);
+        expect(llmUsage.outputTokens).toBe(300);
+        expect(llmUsage.outputTextTokens).toBe(200);
+        expect(llmUsage.outputAudioTokens).toBe(100);
+        expect(llmUsage.sessionDurationMs).toBe(5000);
+      });
+    });
+
+    describe('mixed metrics collection', () => {
+      it('should collect and separate LLM, TTS, and STT metrics', () => {
+        const llmMetrics: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 100,
+          ttftMs: 50,
+          cancelled: false,
+          completionTokens: 100,
+          promptTokens: 200,
+          promptCachedTokens: 0,
+          totalTokens: 300,
+          tokensPerSecond: 10,
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o',
+          },
+        };
+
+        const ttsMetrics: TTSMetrics = {
+          type: 'tts_metrics',
+          label: 'test',
+          requestId: 'req2',
+          timestamp: Date.now(),
+          ttfbMs: 100,
+          durationMs: 500,
+          audioDurationMs: 3000,
+          cancelled: false,
+          charactersCount: 100,
+          streamed: true,
+          metadata: {
+            modelProvider: 'elevenlabs',
+            modelName: 'eleven_turbo_v2',
+          },
+        };
+
+        const sttMetrics: STTMetrics = {
+          type: 'stt_metrics',
+          label: 'test',
+          requestId: 'req3',
+          timestamp: Date.now(),
+          durationMs: 0,
+          audioDurationMs: 5000,
+          streamed: true,
+          metadata: {
+            modelProvider: 'deepgram',
+            modelName: 'nova-2',
+          },
+        };
+
+        collector.collect(llmMetrics);
+        collector.collect(ttsMetrics);
+        collector.collect(sttMetrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(3);
+
+        const llmUsage = usage.find((u) => u.type === 'llm_usage');
+        const ttsUsage = usage.find((u) => u.type === 'tts_usage');
+        const sttUsage = usage.find((u) => u.type === 'stt_usage');
+
+        expect(llmUsage).toBeDefined();
+        expect(ttsUsage).toBeDefined();
+        expect(sttUsage).toBeDefined();
+      });
+    });
+
+    describe('flatten returns copies', () => {
+      it('should return deep copies of usage objects', () => {
+        const metrics: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 100,
+          ttftMs: 50,
+          cancelled: false,
+          completionTokens: 100,
+          promptTokens: 200,
+          promptCachedTokens: 0,
+          totalTokens: 300,
+          tokensPerSecond: 10,
+          metadata: {
+            modelProvider: 'openai',
+            modelName: 'gpt-4o',
+          },
+        };
+
+        collector.collect(metrics);
+
+        const usage1 = collector.flatten();
+        const usage2 = collector.flatten();
+
+        // Should be equal values
+        expect(usage1[0]).toEqual(usage2[0]);
+
+        // But not the same object reference
+        expect(usage1[0]).not.toBe(usage2[0]);
+
+        // Modifying one shouldn't affect the other
+        (usage1[0] as LLMModelUsage).inputTokens = 9999;
+        expect((usage2[0] as LLMModelUsage).inputTokens).toBe(200);
+      });
+    });
+
+    describe('handles missing metadata', () => {
+      it('should use empty strings when metadata is missing', () => {
+        const metrics: LLMMetrics = {
+          type: 'llm_metrics',
+          label: 'test',
+          requestId: 'req1',
+          timestamp: Date.now(),
+          durationMs: 100,
+          ttftMs: 50,
+          cancelled: false,
+          completionTokens: 100,
+          promptTokens: 200,
+          promptCachedTokens: 0,
+          totalTokens: 300,
+          tokensPerSecond: 10,
+          // No metadata
+        };
+
+        collector.collect(metrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(1);
+
+        const llmUsage = usage[0] as LLMModelUsage;
+        expect(llmUsage.provider).toBe('');
+        expect(llmUsage.model).toBe('');
+      });
+    });
+
+    describe('ignores VAD and EOU metrics', () => {
+      it('should not collect VAD metrics', () => {
+        const vadMetrics = {
+          type: 'vad_metrics' as const,
+          label: 'test',
+          timestamp: Date.now(),
+          idleTimeMs: 100,
+          inferenceDurationTotalMs: 50,
+          inferenceCount: 10,
+        };
+
+        collector.collect(vadMetrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(0);
+      });
+
+      it('should not collect EOU metrics', () => {
+        const eouMetrics = {
+          type: 'eou_metrics' as const,
+          timestamp: Date.now(),
+          endOfUtteranceDelayMs: 100,
+          transcriptionDelayMs: 50,
+          onUserTurnCompletedDelayMs: 30,
+          lastSpeakingTimeMs: Date.now(),
+        };
+
+        collector.collect(eouMetrics);
+
+        const usage = collector.flatten();
+        expect(usage).toHaveLength(0);
+      });
+    });
+  });
+});
diff --git a/agents/src/metrics/model_usage.ts b/agents/src/metrics/model_usage.ts
new file mode 100644
index 000000000..5e723fb51
--- /dev/null
+++ b/agents/src/metrics/model_usage.ts
@@ -0,0 +1,262 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type {
+  AgentMetrics,
+  InterruptionMetrics,
+  LLMMetrics,
+  RealtimeModelMetrics,
+  STTMetrics,
+  TTSMetrics,
+} from './base.js';
+
+export type LLMModelUsage = {
+  type: 'llm_usage';
+  /** The provider name (e.g., 'openai', 'anthropic'). */
+  provider: string;
+  /** The model name (e.g., 'gpt-4o', 'claude-3-5-sonnet'). */
+  model: string;
+  /** Total input tokens. */
+  inputTokens: number;
+  /** Input tokens served from cache. */
+  inputCachedTokens: number;
+  /** Input audio tokens (for multimodal models). */
+  inputAudioTokens: number;
+  /** Cached input audio tokens. */
+  inputCachedAudioTokens: number;
+  /** Input text tokens. */
+  inputTextTokens: number;
+  /** Cached input text tokens. */
+  inputCachedTextTokens: number;
+  /** Input image tokens (for multimodal models). */
+  inputImageTokens: number;
+  /** Cached input image tokens. */
+  inputCachedImageTokens: number;
+  /** Total output tokens. */
+  outputTokens: number;
+  /** Output audio tokens (for multimodal models). */
+  outputAudioTokens: number;
+  /** Output text tokens. */
+  outputTextTokens: number;
+  /** Total session connection duration in milliseconds (for session-based billing like xAI). */
+  sessionDurationMs: number;
+};
+
+export type TTSModelUsage = {
+  type: 'tts_usage';
+  /** The provider name (e.g., 'elevenlabs', 'cartesia'). */
+  provider: string;
+  /** The model name (e.g., 'eleven_turbo_v2', 'sonic'). */
+  model: string;
+  /** Input text tokens (for token-based TTS billing, e.g., OpenAI TTS). */
+  inputTokens: number;
+  /** Output audio tokens (for token-based TTS billing, e.g., OpenAI TTS). */
+  outputTokens: number;
+  /** Number of characters synthesized (for character-based TTS billing). */
+  charactersCount: number;
+  /**
+   * Duration of generated audio in milliseconds.
+   */
+  audioDurationMs: number;
+};
+
+export type STTModelUsage = {
+  type: 'stt_usage';
+  /** The provider name (e.g., 'deepgram', 'assemblyai'). */
+  provider: string;
+  /** The model name (e.g., 'nova-2', 'best'). */
+  model: string;
+  /** Input audio tokens (for token-based STT billing). */
+  inputTokens: number;
+  /** Output text tokens (for token-based STT billing). */
+  outputTokens: number;
+  /** Duration of processed audio in milliseconds. */
+  audioDurationMs: number;
+};
+
+export type InterruptionModelUsage = {
+  type: 'interruption_usage';
+  /** The provider name (e.g., 'livekit'). */
+  provider: string;
+  /** The model name (e.g., 'adaptive interruption'). */
+  model: string;
+  /** Total number of requests sent. */
+  totalRequests: number;
+};
+
+export type ModelUsage = LLMModelUsage | TTSModelUsage | STTModelUsage | InterruptionModelUsage;
+
+export function filterZeroValues<T extends ModelUsage>(usage: T): Partial<T> {
+  const result: Partial<T> = {} as Partial<T>;
+  for (const [key, value] of Object.entries(usage)) {
+    if (value !== 0 && value !== 0.0) {
+      (result as Record<string, unknown>)[key] = value;
+    }
+  }
+  return result;
+}
+
+export class ModelUsageCollector {
+  private llmUsage: Map<string, LLMModelUsage> = new Map();
+  private ttsUsage: Map<string, TTSModelUsage> = new Map();
+  private sttUsage: Map<string, STTModelUsage> = new Map();
+
+  private interruptionUsage: Map<string, InterruptionModelUsage> = new Map();
+
+  /** Extract provider and model from metrics metadata. */
+  private extractProviderModel(
+    metrics: LLMMetrics | STTMetrics | TTSMetrics | RealtimeModelMetrics | InterruptionMetrics,
+  ): [string, string] {
+    let provider = '';
+    let model = '';
+    if (metrics.metadata) {
+      provider = metrics.metadata.modelProvider || '';
+      model = metrics.metadata.modelName || '';
+    }
+    return [provider, model];
+  }
+
+  /** Get or create an LLMModelUsage for the given provider/model combination. */
+  private getLLMUsage(provider: string, model: string): LLMModelUsage {
+    const key = `${provider}:${model}`;
+    let usage = this.llmUsage.get(key);
+    if (!usage) {
+      usage = {
+        type: 'llm_usage',
+        provider,
+        model,
+        inputTokens: 0,
+        inputCachedTokens: 0,
+        inputAudioTokens: 0,
+        inputCachedAudioTokens: 0,
+        inputTextTokens: 0,
+        inputCachedTextTokens: 0,
+        inputImageTokens: 0,
+        inputCachedImageTokens: 0,
+        outputTokens: 0,
+        outputAudioTokens: 0,
+        outputTextTokens: 0,
+        sessionDurationMs: 0,
+      };
+      this.llmUsage.set(key, usage);
+    }
+    return usage;
+  }
+
+  /** Get or create a TTSModelUsage for the given provider/model combination. */
+  private getTTSUsage(provider: string, model: string): TTSModelUsage {
+    const key = `${provider}:${model}`;
+    let usage = this.ttsUsage.get(key);
+    if (!usage) {
+      usage = {
+        type: 'tts_usage',
+        provider,
+        model,
+        inputTokens: 0,
+        outputTokens: 0,
+        charactersCount: 0,
+        audioDurationMs: 0,
+      };
+      this.ttsUsage.set(key, usage);
+    }
+    return usage;
+  }
+
+  /** Get or create an STTModelUsage for the given provider/model combination. */
+  private getSTTUsage(provider: string, model: string): STTModelUsage {
+    const key = `${provider}:${model}`;
+    let usage = this.sttUsage.get(key);
+    if (!usage) {
+      usage = {
+        type: 'stt_usage',
+        provider,
+        model,
+        inputTokens: 0,
+        outputTokens: 0,
+        audioDurationMs: 0,
+      };
+      this.sttUsage.set(key, usage);
+    }
+    return usage;
+  }
+
+  private getInterruptionUsage(provider: string, model: string): InterruptionModelUsage {
+    const key = `${provider}:${model}`;
+    let usage = this.interruptionUsage.get(key);
+    if (!usage) {
+      usage = {
+        type: 'interruption_usage',
+        provider,
+        model,
+        totalRequests: 0,
+      };
+      this.interruptionUsage.set(key, usage);
+    }
+    return usage;
+  }
+
+  /** Collect metrics and aggregate usage by model/provider. */
+  collect(metrics: AgentMetrics): void {
+    if (metrics.type === 'llm_metrics') {
+      const [provider, model] = this.extractProviderModel(metrics);
+      const usage = this.getLLMUsage(provider, model);
+      usage.inputTokens += metrics.promptTokens;
+      usage.inputCachedTokens += metrics.promptCachedTokens;
+      usage.outputTokens += metrics.completionTokens;
+    } else if (metrics.type === 'realtime_model_metrics') {
+      const [provider, model] = this.extractProviderModel(metrics);
+      const usage = this.getLLMUsage(provider, model);
+      usage.inputTokens += metrics.inputTokens;
+      usage.inputCachedTokens += metrics.inputTokenDetails.cachedTokens;
+
+      usage.inputTextTokens += metrics.inputTokenDetails.textTokens;
+      usage.inputCachedTextTokens += metrics.inputTokenDetails.cachedTokensDetails?.textTokens ?? 0;
+      usage.inputImageTokens += metrics.inputTokenDetails.imageTokens;
+      usage.inputCachedImageTokens +=
+        metrics.inputTokenDetails.cachedTokensDetails?.imageTokens ?? 0;
+      usage.inputAudioTokens += metrics.inputTokenDetails.audioTokens;
+      usage.inputCachedAudioTokens +=
+        metrics.inputTokenDetails.cachedTokensDetails?.audioTokens ?? 0;
+
+      usage.outputTextTokens += metrics.outputTokenDetails.textTokens;
+      usage.outputAudioTokens += metrics.outputTokenDetails.audioTokens;
+      usage.outputTokens += metrics.outputTokens;
+      usage.sessionDurationMs += metrics.sessionDurationMs ?? 0;
+    } else if (metrics.type === 'tts_metrics') {
+      const [provider, model] = this.extractProviderModel(metrics);
+      const ttsUsage = this.getTTSUsage(provider, model);
+      ttsUsage.inputTokens += metrics.inputTokens ?? 0;
+      ttsUsage.outputTokens += metrics.outputTokens ?? 0;
+      ttsUsage.charactersCount += metrics.charactersCount;
+      ttsUsage.audioDurationMs += metrics.audioDurationMs;
+    } else if (metrics.type === 'stt_metrics') {
+      const [provider, model] = this.extractProviderModel(metrics);
+      const sttUsage = this.getSTTUsage(provider, model);
+      sttUsage.inputTokens += metrics.inputTokens ?? 0;
+      sttUsage.outputTokens += metrics.outputTokens ?? 0;
+      sttUsage.audioDurationMs += metrics.audioDurationMs;
+    } else if (metrics.type === 'interruption_metrics') {
+      const [provider, model] = this.extractProviderModel(metrics);
+      const usage = this.getInterruptionUsage(provider, model);
+      usage.totalRequests += metrics.numRequests;
+    }
+    // VAD and EOU metrics are not aggregated for usage tracking.
+  }
+
+  flatten(): ModelUsage[] {
+    const result: ModelUsage[] = [];
+    for (const u of this.llmUsage.values()) {
+      result.push({ ...u });
+    }
+    for (const u of this.ttsUsage.values()) {
+      result.push({ ...u });
+    }
+    for (const u of this.sttUsage.values()) {
+      result.push({ ...u });
+    }
+    for (const u of this.interruptionUsage.values()) {
+      result.push({ ...u });
+    }
+    return result;
+  }
+}
diff --git a/agents/src/metrics/usage_collector.ts b/agents/src/metrics/usage_collector.ts
index c7f0e6c3d..c815c8394 100644
--- a/agents/src/metrics/usage_collector.ts
+++ b/agents/src/metrics/usage_collector.ts
@@ -1,8 +1,13 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+import { log } from '../log.js';
 import type { AgentMetrics } from './base.js';
 
+/**
+ * @deprecated Use LLMModelUsage, TTSModelUsage, or STTModelUsage instead.
+ * These new types provide per-model/provider usage aggregation for more detailed tracking.
+ */
 export interface UsageSummary {
   llmPromptTokens: number;
   llmPromptCachedTokens: number;
@@ -11,10 +16,16 @@ export interface UsageSummary {
   sttAudioDurationMs: number;
 }
 
+/**
+ * @deprecated Use ModelUsageCollector instead.
+ * ModelUsageCollector provides per-model/provider usage aggregation for more detailed tracking.
+ */
 export class UsageCollector {
   private summary: UsageSummary;
+  private logger = log();
 
   constructor() {
+    this.logger.warn('UsageCollector is deprecated. Use ModelUsageCollector instead.');
     this.summary = {
       llmPromptTokens: 0,
       llmPromptCachedTokens: 0,
diff --git a/agents/src/metrics/utils.ts b/agents/src/metrics/utils.ts
index cf98f8d1d..ced021e63 100644
--- a/agents/src/metrics/utils.ts
+++ b/agents/src/metrics/utils.ts
@@ -60,5 +60,16 @@ export const logMetrics = (metrics: AgentMetrics) => {
         audioDurationMs: Math.round(metrics.audioDurationMs),
       })
       .info('STT metrics');
+  } else if (metrics.type === 'interruption_metrics') {
+    logger
+      .child({
+        totalDurationMs: roundTwoDecimals(metrics.totalDuration),
+        predictionDurationMs: roundTwoDecimals(metrics.predictionDuration),
+        detectionDelayMs: roundTwoDecimals(metrics.detectionDelay),
+        numInterruptions: metrics.numInterruptions,
+        numBackchannels: metrics.numBackchannels,
+        numRequests: metrics.numRequests,
+      })
+      .info('Interruption metrics');
   }
 };
diff --git a/agents/src/stream/multi_input_stream.test.ts b/agents/src/stream/multi_input_stream.test.ts
index cda78b62b..fb648ff32 100644
--- a/agents/src/stream/multi_input_stream.test.ts
+++ b/agents/src/stream/multi_input_stream.test.ts
@@ -2,7 +2,8 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import { ReadableStream } from 'node:stream/web';
-import { describe, expect, it } from 'vitest';
+import { beforeAll, describe, expect, it } from 'vitest';
+import { initializeLogger } from '../log.js';
 import { delay } from '../utils.js';
 import { MultiInputStream } from './multi_input_stream.js';
 
@@ -16,6 +17,10 @@ function streamFrom<T>(values: T[]): ReadableStream<T> {
 }
 
 describe('MultiInputStream', () => {
+  beforeAll(() => {
+    initializeLogger({ pretty: false });
+  });
+
   // ---------------------------------------------------------------------------
   // Basic functionality
   // ---------------------------------------------------------------------------
diff --git a/agents/src/stream/stream_channel.ts b/agents/src/stream/stream_channel.ts
index 1fb68bab2..edaeaa856 100644
--- a/agents/src/stream/stream_channel.ts
+++ b/agents/src/stream/stream_channel.ts
@@ -4,14 +4,16 @@
 import type { ReadableStream } from 'node:stream/web';
 import { IdentityTransform } from './identity_transform.js';
 
-export interface StreamChannel<T> {
+export interface StreamChannel<T, E extends Error = Error> {
   write(chunk: T): Promise<void>;
   close(): Promise<void>;
   stream(): ReadableStream<T>;
+  abort(error: E): Promise<void>;
   readonly closed: boolean;
+  addStreamInput(stream: ReadableStream<T>): void;
 }
 
-export function createStreamChannel<T>(): StreamChannel<T> {
+export function createStreamChannel<T, E extends Error = Error>(): StreamChannel<T, E> {
   const transform = new IdentityTransform<T>();
   const writer = transform.writable.getWriter();
   let isClosed = false;
@@ -19,6 +21,36 @@ export function createStreamChannel<T>(): StreamChannel<T> {
   return {
     write: (chunk: T) => writer.write(chunk),
     stream: () => transform.readable,
+    abort: async (error: E) => {
+      if (isClosed) return;
+      isClosed = true;
+      try {
+        await writer.abort(error);
+      } catch (e) {
+        if (e instanceof Error && e.name === 'TypeError') return;
+        throw e;
+      }
+    },
+    addStreamInput: (newInputStream) => {
+      if (isClosed) return;
+      const reader = newInputStream.getReader();
+      (async () => {
+        try {
+          while (!isClosed) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            await writer.write(value);
+          }
+        } catch (err) {
+          if (!isClosed) {
+            isClosed = true;
+            await writer.abort(err as E);
+          }
+        } finally {
+          reader.releaseLock();
+        }
+      })().catch(() => {});
+    },
     close: async () => {
       try {
         const result = await writer.close();
diff --git a/agents/src/stt/stt.ts b/agents/src/stt/stt.ts
index b7a4a7ea1..72cdb4a26 100644
--- a/agents/src/stt/stt.ts
+++ b/agents/src/stt/stt.ts
@@ -67,6 +67,10 @@ export interface SpeechData {
 export interface RecognitionUsage {
   /** Duration of the audio that was recognized in seconds. */
   audioDuration: number;
+  /** Input audio tokens (for token-based STT billing). */
+  inputTokens?: number;
+  /** Output text tokens (for token-based STT billing). */
+  outputTokens?: number;
 }
 
 /** SpeechEvent is a packet of speech-to-text data. */
@@ -129,6 +133,30 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
     return this.#capabilities;
   }
 
+  /**
+   * Get the model name/identifier for this STT instance.
+   *
+   * @returns The model name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their model information.
+   */
+  get model(): string {
+    return 'unknown';
+  }
+
+  /**
+   * Get the provider name for this STT instance.
+   *
+   * @returns The provider name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their provider information.
+   */
+  get provider(): string {
+    return 'unknown';
+  }
+
   /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
   async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {
     const startTime = process.hrtime.bigint();
@@ -142,6 +170,10 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
       label: this.label,
       audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),
       streamed: false,
+      metadata: {
+        modelProvider: this.provider,
+        modelName: this.model,
+      },
     });
     return event;
   }
@@ -303,7 +335,13 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
         durationMs: 0,
         label: this.#stt.label,
         audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),
+        inputTokens: event.recognitionUsage!.inputTokens ?? 0,
+        outputTokens: event.recognitionUsage!.outputTokens ?? 0,
         streamed: true,
+        metadata: {
+          modelProvider: this.#stt.provider,
+          modelName: this.#stt.model,
+        },
       };
       this.#stt.emit('metrics_collected', metrics);
     }
diff --git a/agents/src/telemetry/otel_http_exporter.ts b/agents/src/telemetry/otel_http_exporter.ts
index ae6b6590e..43f01faea 100644
--- a/agents/src/telemetry/otel_http_exporter.ts
+++ b/agents/src/telemetry/otel_http_exporter.ts
@@ -58,6 +58,16 @@ export class SimpleOTLPHttpLogExporter {
   private readonly config: SimpleOTLPHttpLogExporterConfig;
   private jwt: string | null = null;
 
+  private static readonly FORCE_DOUBLE_KEYS = new Set([
+    'transcriptConfidence',
+    'transcriptionDelay',
+    'endOfTurnDelay',
+    'onUserTurnCompletedDelay',
+    'llmNodeTtft',
+    'ttsNodeTtfb',
+    'e2eLatency',
+  ]);
+
   constructor(config: SimpleOTLPHttpLogExporterConfig) {
     this.config = config;
   }
@@ -72,6 +82,7 @@ export class SimpleOTLPHttpLogExporter {
 
     const endpoint = `https://${this.config.cloudHostname}/observability/logs/otlp/v0`;
     const payload = this.buildPayload(records);
+    const payloadJson = JSON.stringify(payload);
 
     const response = await fetch(endpoint, {
       method: 'POST',
@@ -79,7 +90,7 @@ export class SimpleOTLPHttpLogExporter {
         Authorization: `Bearer ${this.jwt}`,
         'Content-Type': 'application/json',
       },
-      body: JSON.stringify(payload),
+      body: payloadJson,
     });
 
     if (!response.ok) {
@@ -160,11 +171,11 @@ export class SimpleOTLPHttpLogExporter {
   ): Array<{ key: string; value: unknown }> {
     return Object.entries(attrs).map(([key, value]) => ({
       key,
-      value: this.convertValue(value),
+      value: this.convertValue(value, key),
     }));
   }
 
-  private convertValue(value: unknown): unknown {
+  private convertValue(value: unknown, path: string = ''): unknown {
     if (value === null || value === undefined) {
       return { stringValue: '' };
     }
@@ -172,20 +183,32 @@ export class SimpleOTLPHttpLogExporter {
       return { stringValue: value };
     }
     if (typeof value === 'number') {
+      const leafKey =
+        path
+          .split('.')
+          .pop()
+          ?.replace(/\[\d+\]$/, '') ?? path;
+      if (SimpleOTLPHttpLogExporter.FORCE_DOUBLE_KEYS.has(leafKey)) {
+        return { doubleValue: value };
+      }
       return Number.isInteger(value) ? { intValue: String(value) } : { doubleValue: value };
     }
     if (typeof value === 'boolean') {
       return { boolValue: value };
     }
     if (Array.isArray(value)) {
-      return { arrayValue: { values: value.map((v) => this.convertValue(v)) } };
+      return {
+        arrayValue: {
+          values: value.map((v, i) => this.convertValue(v, `${path}[${i}]`)),
+        },
+      };
     }
     if (typeof value === 'object') {
       return {
         kvlistValue: {
           values: Object.entries(value as Record<string, unknown>).map(([k, v]) => ({
             key: k,
-            value: this.convertValue(v),
+            value: this.convertValue(v, path ? `${path}.${k}` : k),
           })),
         },
       };
diff --git a/agents/src/telemetry/trace_types.ts b/agents/src/telemetry/trace_types.ts
index 1663bd75d..cc4d89443 100644
--- a/agents/src/telemetry/trace_types.ts
+++ b/agents/src/telemetry/trace_types.ts
@@ -33,6 +33,7 @@ export const ATTR_PROVIDER_TOOLS = 'lk.provider_tools';
 export const ATTR_TOOL_SETS = 'lk.tool_sets';
 export const ATTR_RESPONSE_TEXT = 'lk.response.text';
 export const ATTR_RESPONSE_FUNCTION_CALLS = 'lk.response.function_calls';
+/** Time to first token in seconds. */
 export const ATTR_RESPONSE_TTFT = 'lk.response.ttft';
 
 // function tool
@@ -46,6 +47,7 @@ export const ATTR_FUNCTION_TOOL_OUTPUT = 'lk.function_tool.output';
 export const ATTR_TTS_INPUT_TEXT = 'lk.input_text';
 export const ATTR_TTS_STREAMING = 'lk.tts.streaming';
 export const ATTR_TTS_LABEL = 'lk.tts.label';
+/** Time to first byte in seconds. */
 export const ATTR_RESPONSE_TTFB = 'lk.response.ttfb';
 
 // eou detection
@@ -58,18 +60,26 @@ export const ATTR_TRANSCRIPT_CONFIDENCE = 'lk.transcript_confidence';
 export const ATTR_TRANSCRIPTION_DELAY = 'lk.transcription_delay';
 export const ATTR_END_OF_TURN_DELAY = 'lk.end_of_turn_delay';
 
+// Adaptive Interruption attributes
+export const ATTR_IS_INTERRUPTION = 'lk.is_interruption';
+export const ATTR_INTERRUPTION_PROBABILITY = 'lk.interruption.probability';
+export const ATTR_INTERRUPTION_TOTAL_DURATION = 'lk.interruption.total_duration';
+export const ATTR_INTERRUPTION_PREDICTION_DURATION = 'lk.interruption.prediction_duration';
+export const ATTR_INTERRUPTION_DETECTION_DELAY = 'lk.interruption.detection_delay';
+
 // metrics
 export const ATTR_LLM_METRICS = 'lk.llm_metrics';
 export const ATTR_TTS_METRICS = 'lk.tts_metrics';
 export const ATTR_REALTIME_MODEL_METRICS = 'lk.realtime_model_metrics';
 
-// latency span attributes
+/** End-to-end latency in seconds. */
 export const ATTR_E2E_LATENCY = 'lk.e2e_latency';
 
 // OpenTelemetry GenAI attributes
 // OpenTelemetry specification: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/
 export const ATTR_GEN_AI_OPERATION_NAME = 'gen_ai.operation.name';
 export const ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model';
+/** The provider name (e.g., 'openai', 'anthropic'). */
 export const ATTR_GEN_AI_PROVIDER_NAME = 'gen_ai.provider.name';
 export const ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens';
 export const ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens';
@@ -97,10 +107,3 @@ export const ATTR_EXCEPTION_MESSAGE = 'exception.message';
 
 // Platform-specific attributes
 export const ATTR_LANGFUSE_COMPLETION_START_TIME = 'langfuse.observation.completion_start_time';
-
-// Adaptive Interruption attributes
-export const ATTR_IS_INTERRUPTION = 'lk.is_interruption';
-export const ATTR_INTERRUPTION_PROBABILITY = 'lk.interruption.probability';
-export const ATTR_INTERRUPTION_TOTAL_DURATION = 'lk.interruption.total_duration';
-export const ATTR_INTERRUPTION_PREDICTION_DURATION = 'lk.interruption.prediction_duration';
-export const ATTR_INTERRUPTION_DETECTION_DELAY = 'lk.interruption.detection_delay';
diff --git a/agents/src/telemetry/traces.ts b/agents/src/telemetry/traces.ts
index 28ef4c746..8ee52e586 100644
--- a/agents/src/telemetry/traces.ts
+++ b/agents/src/telemetry/traces.ts
@@ -22,8 +22,9 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
 import FormData from 'form-data';
 import { AccessToken } from 'livekit-server-sdk';
 import fs from 'node:fs/promises';
-import type { ChatContent, ChatItem } from '../llm/index.js';
+import type { ChatContent, ChatItem, ChatRole } from '../llm/index.js';
 import { enableOtelLogging } from '../log.js';
+import { filterZeroValues } from '../metrics/model_usage.js';
 import type { SessionReport } from '../voice/report.js';
 import { type SimpleLogRecord, SimpleOTLPHttpLogExporter } from './otel_http_exporter.js';
 import { flushPinoLogs, initPinoCloudExporter } from './pino_otel_transport.js';
@@ -285,24 +286,80 @@ export async function flushOtelLogs(): Promise<void> {
   await flushPinoLogs();
 }
 
+/** Proto-compatible role enum values. */
+type ProtoRole = 'DEVELOPER' | 'SYSTEM' | 'USER' | 'ASSISTANT';
+
+const ROLE_MAP: Record<ChatRole, ProtoRole> = {
+  developer: 'DEVELOPER',
+  system: 'SYSTEM',
+  user: 'USER',
+  assistant: 'ASSISTANT',
+};
+
+interface ProtoMetricsReport {
+  startedSpeakingAt?: string;
+  stoppedSpeakingAt?: string;
+  transcriptionDelay?: number;
+  endOfTurnDelay?: number;
+  onUserTurnCompletedDelay?: number;
+  llmNodeTtft?: number;
+  ttsNodeTtfb?: number;
+  e2eLatency?: number;
+}
+
+interface ProtoMessage {
+  id: string;
+  role: ProtoRole;
+  content: { text: ChatContent }[];
+  createdAt: string;
+  interrupted?: boolean;
+  extra?: Record<string, unknown>;
+  transcriptConfidence?: number;
+  metrics?: ProtoMetricsReport;
+}
+
+interface ProtoFunctionCall {
+  id: string;
+  callId: string;
+  arguments: string | Record<string, unknown>;
+  name: string;
+  createdAt: string;
+}
+
+interface ProtoFunctionCallOutput {
+  id: string;
+  name: string;
+  callId: string;
+  output: string;
+  isError: boolean;
+  createdAt: string;
+}
+
+interface ProtoAgentHandoff {
+  id: string;
+  newAgentId: string;
+  createdAt: string;
+  oldAgentId?: string;
+}
+
+interface ProtoChatItem {
+  message?: ProtoMessage;
+  functionCall?: ProtoFunctionCall;
+  functionCallOutput?: ProtoFunctionCallOutput;
+  agentHandoff?: ProtoAgentHandoff;
+}
+
 /**
  * Convert ChatItem to proto-compatible dictionary format.
  * TODO: Use actual agent_session proto types once @livekit/protocol v1.43.1+ is published
  */
-function chatItemToProto(item: ChatItem): Record<string, any> {
-  const itemDict: Record<string, any> = {};
+function chatItemToProto(item: ChatItem): ProtoChatItem {
+  const itemDict: ProtoChatItem = {};
 
   if (item.type === 'message') {
-    const roleMap: Record<string, string> = {
-      developer: 'DEVELOPER',
-      system: 'SYSTEM',
-      user: 'USER',
-      assistant: 'ASSISTANT',
-    };
-
-    const msg: Record<string, any> = {
+    const msg: ProtoMessage = {
       id: item.id,
-      role: roleMap[item.role] || item.role.toUpperCase(),
+      role: ROLE_MAP[item.role] ?? (item.role.toUpperCase() as ProtoRole),
       content: item.content.map((c: ChatContent) => ({ text: c })),
       createdAt: toRFC3339(item.createdAt),
     };
@@ -311,44 +368,43 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
       msg.interrupted = item.interrupted;
     }
 
-    // TODO(brian): Add extra and transcriptConfidence to ChatMessage
-    // if (item.extra && Object.keys(item.extra).length > 0) {
-    //   msg.extra = item.extra;
-    // }
-
-    // if (item.transcriptConfidence !== undefined && item.transcriptConfidence !== null) {
-    //   msg.transcriptConfidence = item.transcriptConfidence;
-    // }
-
-    // TODO(brian): Add metrics to ChatMessage
-    // const metrics = item.metrics || {};
-    // if (Object.keys(metrics).length > 0) {
-    //   msg.metrics = {};
-    //   if (metrics.started_speaking_at) {
-    //     msg.metrics.startedSpeakingAt = toRFC3339(metrics.started_speaking_at);
-    //   }
-    //   if (metrics.stopped_speaking_at) {
-    //     msg.metrics.stoppedSpeakingAt = toRFC3339(metrics.stopped_speaking_at);
-    //   }
-    //   if (metrics.transcription_delay !== undefined) {
-    //     msg.metrics.transcriptionDelay = metrics.transcription_delay;
-    //   }
-    //   if (metrics.end_of_turn_delay !== undefined) {
-    //     msg.metrics.endOfTurnDelay = metrics.end_of_turn_delay;
-    //   }
-    //   if (metrics.on_user_turn_completed_delay !== undefined) {
-    //     msg.metrics.onUserTurnCompletedDelay = metrics.on_user_turn_completed_delay;
-    //   }
-    //   if (metrics.llm_node_ttft !== undefined) {
-    //     msg.metrics.llmNodeTtft = metrics.llm_node_ttft;
-    //   }
-    //   if (metrics.tts_node_ttfb !== undefined) {
-    //     msg.metrics.ttsNodeTtfb = metrics.tts_node_ttfb;
-    //   }
-    //   if (metrics.e2e_latency !== undefined) {
-    //     msg.metrics.e2eLatency = metrics.e2e_latency;
-    //   }
-    // }
+    if (item.extra && Object.keys(item.extra).length > 0) {
+      msg.extra = item.extra;
+    }
+
+    if (item.transcriptConfidence !== undefined) {
+      msg.transcriptConfidence = item.transcriptConfidence;
+    }
+
+    const metrics = item.metrics;
+    if (metrics && Object.keys(metrics).length > 0) {
+      const protoMetrics: ProtoMetricsReport = {};
+      if (metrics.startedSpeakingAt !== undefined) {
+        protoMetrics.startedSpeakingAt = toRFC3339(metrics.startedSpeakingAt * 1000);
+      }
+      if (metrics.stoppedSpeakingAt !== undefined) {
+        protoMetrics.stoppedSpeakingAt = toRFC3339(metrics.stoppedSpeakingAt * 1000);
+      }
+      if (metrics.transcriptionDelay !== undefined) {
+        protoMetrics.transcriptionDelay = metrics.transcriptionDelay;
+      }
+      if (metrics.endOfTurnDelay !== undefined) {
+        protoMetrics.endOfTurnDelay = metrics.endOfTurnDelay;
+      }
+      if (metrics.onUserTurnCompletedDelay !== undefined) {
+        protoMetrics.onUserTurnCompletedDelay = metrics.onUserTurnCompletedDelay;
+      }
+      if (metrics.llmNodeTtft !== undefined) {
+        protoMetrics.llmNodeTtft = metrics.llmNodeTtft;
+      }
+      if (metrics.ttsNodeTtfb !== undefined) {
+        protoMetrics.ttsNodeTtfb = metrics.ttsNodeTtfb;
+      }
+      if (metrics.e2eLatency !== undefined) {
+        protoMetrics.e2eLatency = metrics.e2eLatency;
+      }
+      msg.metrics = protoMetrics;
+    }
 
     itemDict.message = msg;
   } else if (item.type === 'function_call') {
@@ -369,7 +425,7 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
       createdAt: toRFC3339(item.createdAt),
     };
   } else if (item.type === 'agent_handoff') {
-    const handoff: Record<string, any> = {
+    const handoff: ProtoAgentHandoff = {
       id: item.id,
       newAgentId: item.newAgentId,
       createdAt: toRFC3339(item.createdAt),
@@ -397,9 +453,7 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
 }
 
 /**
- * Convert timestamp to RFC3339 format matching Python's _to_rfc3339.
- * Note: TypeScript createdAt is in milliseconds (Date.now()), not seconds like Python.
- * @internal
+ * Convert timestamp to RFC3339 format
  */
 function toRFC3339(valueMs: number | Date): string {
   // valueMs is already in milliseconds (from Date.now())
@@ -445,6 +499,8 @@ export async function uploadSessionReport(options: {
     'logger.name': 'chat_history',
   };
 
+  const usage = report.modelUsage?.map(filterZeroValues) || null;
+
   logRecords.push({
     body: 'session report',
     timestampMs: report.startedAt || report.timestamp || 0,
@@ -453,6 +509,7 @@ export async function uploadSessionReport(options: {
       'session.options': report.options || {},
       'session.report_timestamp': report.timestamp,
       agent_name: agentName,
+      usage,
     },
   });
 
diff --git a/agents/src/tts/tts.ts b/agents/src/tts/tts.ts
index 8b8dcfda0..ab0477144 100644
--- a/agents/src/tts/tts.ts
+++ b/agents/src/tts/tts.ts
@@ -96,6 +96,30 @@ export abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCal
     return this.#numChannels;
   }
 
+  /**
+   * Get the model name/identifier for this TTS instance.
+   *
+   * @returns The model name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their model information.
+   */
+  get model(): string {
+    return 'unknown';
+  }
+
+  /**
+   * Get the provider name for this TTS instance.
+   *
+   * @returns The provider name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their provider information.
+   */
+  get provider(): string {
+    return 'unknown';
+  }
+
   /**
    * Receives text and returns synthesis in the form of a {@link ChunkedStream}
    */
@@ -159,6 +183,8 @@ export abstract class SynthesizeStream
   #metricsText = '';
   #monitorMetricsTask?: Promise<void>;
   #ttsRequestSpan?: Span;
+  #inputTokens = 0;
+  #outputTokens = 0;
 
   constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {
     this.#tts = tts;
@@ -284,6 +310,18 @@ export abstract class SynthesizeStream
     }
   }
 
+  /**
+   * Set token usage for token-based TTS billing (e.g., OpenAI TTS).
+   * Plugins should call this method to report token usage.
+   */
+  protected setTokenUsage({
+    inputTokens = 0,
+    outputTokens = 0,
+  }: { inputTokens?: number; outputTokens?: number } = {}): void {
+    this.#inputTokens = inputTokens;
+    this.#outputTokens = outputTokens;
+  }
+
   protected async monitorMetrics() {
     const startTime = process.hrtime.bigint();
     let audioDurationMs = 0;
@@ -305,12 +343,22 @@ export abstract class SynthesizeStream
           audioDurationMs: roundedAudioDurationMs,
           cancelled: this.abortController.signal.aborted,
           label: this.#tts.label,
-          streamed: false,
+          inputTokens: this.#inputTokens,
+          outputTokens: this.#outputTokens,
+          streamed: true,
+          metadata: {
+            modelProvider: this.#tts.provider,
+            modelName: this.#tts.model,
+          },
         };
         if (this.#ttsRequestSpan) {
           this.#ttsRequestSpan.setAttribute(traceTypes.ATTR_TTS_METRICS, JSON.stringify(metrics));
         }
         this.#tts.emit('metrics_collected', metrics);
+
+        // Reset token usage after emitting metrics for the next segment
+        this.#inputTokens = 0;
+        this.#outputTokens = 0;
       }
     };
 
@@ -434,6 +482,8 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
   #ttsRequestSpan?: Span;
   private _connOptions: APIConnectOptions;
   private logger = log();
+  #inputTokens = 0;
+  #outputTokens = 0;
 
   protected abortController = new AbortController();
 
@@ -541,6 +591,18 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
     return this.abortController.signal;
   }
 
+  /**
+   * Set token usage for token-based TTS billing (e.g., OpenAI TTS).
+   * Plugins should call this method to report token usage.
+   */
+  protected setTokenUsage({
+    inputTokens = 0,
+    outputTokens = 0,
+  }: { inputTokens?: number; outputTokens?: number } = {}): void {
+    this.#inputTokens = inputTokens;
+    this.#outputTokens = outputTokens;
+  }
+
   protected async monitorMetrics() {
     const startTime = process.hrtime.bigint();
     let audioDurationMs = 0;
@@ -568,7 +630,13 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
       audioDurationMs: Math.round(audioDurationMs),
       cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here
       label: this.#tts.label,
+      inputTokens: this.#inputTokens,
+      outputTokens: this.#outputTokens,
       streamed: false,
+      metadata: {
+        modelProvider: this.#tts.provider,
+        modelName: this.#tts.model,
+      },
     };
 
     if (this.#ttsRequestSpan) {
diff --git a/agents/src/utils.ts b/agents/src/utils.ts
index 1fa1bff20..a7ab3ec2b 100644
--- a/agents/src/utils.ts
+++ b/agents/src/utils.ts
@@ -959,3 +959,17 @@ export const isCloud = (url: URL) => {
   const hostname = url.hostname;
   return hostname.endsWith('.livekit.cloud') || hostname.endsWith('.livekit.run');
 };
+
+/**
+ * Whether the agent is running in development mode (launched via `dev` or `connect`).
+ */
+export const isDevMode = (): boolean => {
+  return process.env.LIVEKIT_DEV_MODE === '1';
+};
+
+/**
+ * Whether the agent is hosted on LiveKit Cloud.
+ */
+export const isHosted = (): boolean => {
+  return process.env.LIVEKIT_REMOTE_EOT_URL !== undefined;
+};
diff --git a/agents/src/voice/agent.test.ts b/agents/src/voice/agent.test.ts
index fd5f39183..8dd83fee3 100644
--- a/agents/src/voice/agent.test.ts
+++ b/agents/src/voice/agent.test.ts
@@ -7,7 +7,11 @@ import { tool } from '../llm/index.js';
 import { initializeLogger } from '../log.js';
 import { Task } from '../utils.js';
 import { Agent, AgentTask, _setActivityTaskInfo } from './agent.js';
-import { agentActivityStorage } from './agent_activity.js';
+import { AgentActivity, agentActivityStorage } from './agent_activity.js';
+import { defaultEndpointingOptions } from './turn_config/endpointing.js';
+import { defaultInterruptionOptions } from './turn_config/interruption.js';
+
+vi.mock('ofetch', () => ({ ofetch: vi.fn() }));
 
 initializeLogger({ pretty: false, level: 'error' });
 
@@ -215,4 +219,207 @@ describe('Agent', () => {
     await expect(wrapper.result).resolves.toBe('ok');
     expect(closeOldActivity).toHaveBeenCalledTimes(1);
   });
+
+  describe('Agent constructor option migration', () => {
+    it('should set allowInterruptions to false via deprecated constructor field', () => {
+      const agent = new Agent({ instructions: 'test', allowInterruptions: false });
+      expect(agent.turnHandling?.interruption?.enabled).toBe(false);
+    });
+
+    it('should not set derived properties when no compatibility fields are provided', () => {
+      const agent = new Agent({ instructions: 'test' });
+      expect(agent.turnHandling).toBeUndefined();
+    });
+
+    it('should expose minConsecutiveSpeechDelay', () => {
+      const agent = new Agent({ instructions: 'test', minConsecutiveSpeechDelay: 1.5 });
+      expect(agent.minConsecutiveSpeechDelay).toBe(1.5);
+    });
+
+    it('should ignore deprecated constructor fields when turnHandling is provided', () => {
+      const agent = new Agent({
+        instructions: 'test',
+        turnHandling: {
+          endpointing: { minDelay: 999 },
+          interruption: {},
+          turnDetection: 'vad',
+        },
+        allowInterruptions: false,
+      });
+      expect(agent.turnHandling?.endpointing?.minDelay).toBe(999);
+      expect(agent.turnHandling?.endpointing?.maxDelay).toBeUndefined();
+      expect(agent.turnHandling?.interruption?.enabled).toBeUndefined();
+      expect(agent.turnHandling?.turnDetection).toBe('vad');
+    });
+
+    it('should let turnHandling override deprecated constructor fields on conflicts', () => {
+      const agent = new Agent({
+        instructions: 'test',
+        turnHandling: {
+          endpointing: { minDelay: 999, maxDelay: 4000 },
+          interruption: { enabled: true },
+          turnDetection: 'vad',
+        },
+        allowInterruptions: false,
+        turnDetection: 'stt',
+      });
+      expect(agent.turnHandling?.endpointing?.minDelay).toBe(999);
+      expect(agent.turnHandling?.endpointing?.maxDelay).toBe(4000);
+      expect(agent.turnHandling?.interruption?.enabled).toBe(true);
+      expect(agent.turnHandling?.turnDetection).toBe('vad');
+    });
+
+    it('should set interruptionDetection from turnHandling.interruption.mode', () => {
+      const agent = new Agent({
+        instructions: 'test',
+        turnHandling: {
+          interruption: { mode: 'adaptive' },
+          endpointing: {},
+          turnDetection: undefined,
+        },
+      });
+      expect(agent.turnHandling?.interruption?.mode).toBe('adaptive');
+    });
+
+    it('should let AgentActivity prefer agent-level overrides over session defaults', () => {
+      const agent = new Agent({
+        instructions: 'test',
+        turnHandling: {
+          endpointing: { minDelay: 111, maxDelay: 222 },
+          interruption: { enabled: false },
+          turnDetection: 'manual',
+        },
+      });
+      const session = {
+        options: {
+          turnHandling: {
+            endpointing: defaultEndpointingOptions,
+            interruption: defaultInterruptionOptions,
+          },
+        },
+        turnDetection: 'stt',
+        useTtsAlignedTranscript: true,
+        vad: undefined,
+        stt: undefined,
+        llm: undefined,
+        tts: undefined,
+        interruptionDetection: undefined,
+      } as any;
+
+      const activity = new AgentActivity(agent as any, session);
+
+      expect(activity.allowInterruptions).toBe(false);
+      expect(activity.turnDetection).toBe('manual');
+      expect(activity.turnHandling.endpointing?.minDelay).toBe(111);
+      expect(activity.turnHandling.endpointing?.maxDelay).toBe(222);
+    });
+
+    it('should disable adaptive interruption detection in default mode when prerequisites are missing', () => {
+      const previousRemoteEotUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
+      process.env.LIVEKIT_REMOTE_EOT_URL = 'http://localhost:9999';
+
+      try {
+        const agent = new Agent({ instructions: 'test' });
+        const session = {
+          options: {
+            turnHandling: {
+              endpointing: defaultEndpointingOptions,
+              interruption: defaultInterruptionOptions,
+            },
+          },
+          sessionOptions: {
+            turnHandling: {
+              endpointing: defaultEndpointingOptions,
+              interruption: defaultInterruptionOptions,
+            },
+          },
+          turnDetection: 'manual',
+          useTtsAlignedTranscript: true,
+          vad: {},
+          stt: {
+            capabilities: {
+              alignedTranscript: true,
+              streaming: true,
+            },
+          },
+          llm: undefined,
+          tts: undefined,
+          interruptionDetection: undefined,
+        } as any;
+
+        const activity = new AgentActivity(agent as any, session);
+        expect((activity as any).interruptionDetector).toBeUndefined();
+      } finally {
+        if (previousRemoteEotUrl === undefined) {
+          delete process.env.LIVEKIT_REMOTE_EOT_URL;
+        } else {
+          process.env.LIVEKIT_REMOTE_EOT_URL = previousRemoteEotUrl;
+        }
+      }
+    });
+
+    it('should warn when session explicitly requests adaptive detection even if agent overrides it', () => {
+      const activity = Object.create(AgentActivity.prototype) as any;
+      activity.agent = {
+        turnHandling: { interruption: { mode: 'vad' } },
+        turnDetection: undefined,
+      };
+      activity.agentSession = {
+        interruptionDetection: 'adaptive',
+        turnDetection: 'manual',
+      };
+      activity.logger = { warn: vi.fn() };
+
+      expect(activity.resolveInterruptionDetector()).toBeUndefined();
+      expect(activity.logger.warn).toHaveBeenCalledWith(
+        "interruptionDetection is provided, but it's not compatible with the current configuration and will be disabled",
+      );
+    });
+
+    it('should disable adaptive interruption detection when interruptions are disabled', () => {
+      const previousRemoteEotUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
+      process.env.LIVEKIT_REMOTE_EOT_URL = 'http://localhost:9999';
+
+      try {
+        const activity = Object.create(AgentActivity.prototype) as any;
+        activity.agent = {
+          turnHandling: {
+            interruption: { enabled: false },
+          },
+          turnDetection: undefined,
+          stt: undefined,
+          vad: undefined,
+          llm: undefined,
+        };
+        activity.agentSession = {
+          interruptionDetection: undefined,
+          turnDetection: 'stt',
+          sessionOptions: {
+            turnHandling: {
+              interruption: defaultInterruptionOptions,
+              endpointing: defaultEndpointingOptions,
+            },
+          },
+          stt: {
+            capabilities: {
+              alignedTranscript: true,
+              streaming: true,
+            },
+          },
+          vad: {},
+          llm: undefined,
+        };
+        activity.logger = { warn: vi.fn() };
+
+        expect(activity.resolveInterruptionDetector()).toBeUndefined();
+        expect(activity.logger.warn).not.toHaveBeenCalled();
+      } finally {
+        if (previousRemoteEotUrl === undefined) {
+          delete process.env.LIVEKIT_REMOTE_EOT_URL;
+        } else {
+          process.env.LIVEKIT_REMOTE_EOT_URL = previousRemoteEotUrl;
+        }
+      }
+    });
+  });
 });
diff --git a/agents/src/voice/agent.ts b/agents/src/voice/agent.ts
index db937ecf7..3f83aee32 100644
--- a/agents/src/voice/agent.ts
+++ b/agents/src/voice/agent.ts
@@ -35,6 +35,8 @@ import { type AgentActivity, agentActivityStorage } from './agent_activity.js';
 import type { AgentSession, TurnDetectionMode } from './agent_session.js';
 import type { TimedString } from './io.js';
 import type { SpeechHandle } from './speech_handle.js';
+import type { TurnHandlingOptions } from './turn_config/turn_handling.js';
+import { migrateTurnHandling } from './turn_config/utils.js';
 
 export const functionCallStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();
 export const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
@@ -110,23 +112,28 @@ export interface AgentOptions<UserData> {
   instructions: string;
   chatCtx?: ChatContext;
   tools?: ToolContext<UserData>;
-  turnDetection?: TurnDetectionMode;
   stt?: STT | STTModelString;
   vad?: VAD;
   llm?: LLM | RealtimeModel | LLMModels;
   tts?: TTS | TTSModelString;
-  allowInterruptions?: boolean;
+  turnHandling?: TurnHandlingOptions;
   minConsecutiveSpeechDelay?: number;
   useTtsAlignedTranscript?: boolean;
+  /** @deprecated use turnHandling.turnDetection instead */
+  turnDetection?: TurnDetectionMode;
+  /** @deprecated use turnHandling.interruption.enabled instead */
+  allowInterruptions?: boolean;
 }
 
 export class Agent<UserData = any> {
   private _id: string;
-  private turnDetection?: TurnDetectionMode;
   private _stt?: STT;
   private _vad?: VAD;
   private _llm?: LLM | RealtimeModel;
   private _tts?: TTS;
+  private _turnHandling?: Partial<TurnHandlingOptions>;
+
+  private _minConsecutiveSpeechDelay?: number;
   private _useTtsAlignedTranscript?: boolean;
 
   /** @internal */
@@ -151,12 +158,14 @@ export class Agent<UserData = any> {
     vad,
     llm,
     tts,
+    allowInterruptions,
+    turnHandling,
+    minConsecutiveSpeechDelay,
     useTtsAlignedTranscript,
   }: AgentOptions<UserData>) {
     if (id) {
       this._id = id;
     } else {
-      // Convert class name to snake_case
       const className = this.constructor.name;
       if (className === 'Agent') {
         this._id = 'default_agent';
@@ -176,7 +185,14 @@ export class Agent<UserData = any> {
         })
       : ChatContext.empty();
 
-    this.turnDetection = turnDetection;
+    const resolvedTurnHandling = migrateTurnHandling({
+      turnDetection,
+      allowInterruptions,
+      turnHandling,
+    });
+    this._turnHandling =
+      Object.keys(resolvedTurnHandling).length > 0 ? resolvedTurnHandling : undefined;
+
     this._vad = vad;
 
     if (typeof stt === 'string') {
@@ -197,6 +213,7 @@ export class Agent<UserData = any> {
       this._tts = tts;
     }
 
+    this._minConsecutiveSpeechDelay = minConsecutiveSpeechDelay;
     this._useTtsAlignedTranscript = useTtsAlignedTranscript;
 
     this._agentActivity = undefined;
@@ -242,6 +259,14 @@ export class Agent<UserData = any> {
     return this.getActivityOrThrow().agentSession as AgentSession<UserData>;
   }
 
+  get turnHandling(): Partial<TurnHandlingOptions> | undefined {
+    return this._turnHandling;
+  }
+
+  get minConsecutiveSpeechDelay(): number | undefined {
+    return this._minConsecutiveSpeechDelay;
+  }
+
   async onEnter(): Promise<void> {}
 
   async onExit(): Promise<void> {}
@@ -341,7 +366,8 @@ export class Agent<UserData = any> {
 
       // Set startTimeOffset to provide linear timestamps across reconnections
       const audioInputStartedAt =
-        activity.agentSession._recorderIO?.recordingStartedAt ?? // Use recording start time if available
+        activity.inputStartedAt ?? // Use input started at proxied from AudioRecognition if available
+        activity.agentSession._recorderIO?.recordingStartedAt ?? // Fallback to recording start time if available
         activity.agentSession._startedAt ?? // Fallback to session start time
         Date.now(); // Fallback to current time
 
diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
index 7392b536d..8b0fa6b26 100644
--- a/agents/src/voice/agent_activity.ts
+++ b/agents/src/voice/agent_activity.ts
@@ -8,7 +8,10 @@ import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api'
 import { Heap } from 'heap-js';
 import { AsyncLocalStorage } from 'node:async_hooks';
 import { ReadableStream, TransformStream } from 'node:stream/web';
-import { type ChatContext, ChatMessage } from '../llm/chat_context.js';
+import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
+import { AdaptiveInterruptionDetector } from '../inference/interruption/interruption_detector.js';
+import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
+import { type ChatContext, ChatMessage, type MetricsReport } from '../llm/chat_context.js';
 import {
   type ChatItem,
   type FunctionCall,
@@ -30,6 +33,7 @@ import { isSameToolChoice, isSameToolContext } from '../llm/tool_context.js';
 import { log } from '../log.js';
 import type {
   EOUMetrics,
+  InterruptionMetrics,
   LLMMetrics,
   RealtimeModelMetrics,
   STTMetrics,
@@ -41,7 +45,7 @@ import { STT, type STTError, type SpeechEvent } from '../stt/stt.js';
 import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js';
 import { splitWords } from '../tokenize/basic/word.js';
 import { TTS, type TTSError } from '../tts/tts.js';
-import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
+import { Future, Task, cancelAndWait, isDevMode, isHosted, waitFor } from '../utils.js';
 import { VAD, type VADEvent } from '../vad.js';
 import type { Agent, ModelSettings } from './agent.js';
 import {
@@ -57,7 +61,6 @@ import {
   type EndOfTurnInfo,
   type PreemptiveGenerationInfo,
   type RecognitionHooks,
-  type _TurnDetector,
 } from './audio_recognition.js';
 import {
   AgentSessionEventTypes,
@@ -101,6 +104,7 @@ interface PreemptiveGeneration {
   createdAt: number;
 }
 
+// TODO add false interruption handling and barge in handling for https://github.com/livekit/agents/pull/3109/changes
 export class AgentActivity implements RecognitionHooks {
   agent: Agent;
   agentSession: AgentSession;
@@ -111,7 +115,7 @@ export class AgentActivity implements RecognitionHooks {
   private audioRecognition?: AudioRecognition;
   private realtimeSession?: RealtimeSession;
   private realtimeSpans?: Map<string, Span>; // Maps response_id to OTEL span for metrics recording
-  private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
+  private turnDetectionMode?: TurnDetectionMode;
   private logger = log();
   private _schedulingPaused = true;
   private _drainBlockedTasks: Task<any>[] = [];
@@ -126,6 +130,51 @@ export class AgentActivity implements RecognitionHooks {
   // default to null as None, which maps to the default provider tool choice value
   private toolChoice: ToolChoice | null = null;
   private _preemptiveGeneration?: PreemptiveGeneration;
+  private interruptionDetector?: AdaptiveInterruptionDetector;
+  private isInterruptionDetectionEnabled: boolean;
+  private isInterruptionByAudioActivityEnabled: boolean;
+  private isDefaultInterruptionByAudioActivityEnabled: boolean;
+
+  private readonly onRealtimeGenerationCreated = (ev: GenerationCreatedEvent): void =>
+    this.onGenerationCreated(ev);
+
+  private readonly onRealtimeInputSpeechStarted = (ev: InputSpeechStartedEvent): void =>
+    this.onInputSpeechStarted(ev);
+
+  private readonly onRealtimeInputSpeechStopped = (ev: InputSpeechStoppedEvent): void =>
+    this.onInputSpeechStopped(ev);
+
+  private readonly onRealtimeInputAudioTranscriptionCompleted = (
+    ev: InputTranscriptionCompleted,
+  ): void => this.onInputAudioTranscriptionCompleted(ev);
+
+  private readonly onModelError = (ev: RealtimeModelError | STTError | TTSError | LLMError): void =>
+    this.onError(ev);
+
+  private readonly onInterruptionOverlappingSpeech = (ev: OverlappingSpeechEvent): void => {
+    this.agentSession.emit(AgentSessionEventTypes.OverlappingSpeech, ev);
+  };
+
+  private readonly onInterruptionMetricsCollected = (ev: InterruptionMetrics): void => {
+    this.agentSession._usageCollector.collect(ev);
+    this.agentSession.emit(
+      AgentSessionEventTypes.MetricsCollected,
+      createMetricsCollectedEvent({ metrics: ev }),
+    );
+  };
+
+  private readonly onInterruptionError = (ev: InterruptionDetectionError): void => {
+    const errorEvent = createErrorEvent(ev, this.interruptionDetector);
+    this.agentSession.emit(AgentSessionEventTypes.Error, errorEvent);
+
+    if (!ev.recoverable) {
+      this.agentSession._onError(ev);
+      this.fallbackToVadInterruption();
+      return;
+    }
+
+    this.agentSession._onError(ev);
+  };
 
   /** @internal */
   _mainTask?: Task<void>;
@@ -133,16 +182,6 @@ export class AgentActivity implements RecognitionHooks {
   _onExitTask?: Task<void>;
   _userTurnCompletedTask?: Task<void>;
 
-  private readonly onRealtimeGenerationCreated = (ev: GenerationCreatedEvent) =>
-    this.onGenerationCreated(ev);
-  private readonly onRealtimeInputSpeechStarted = (ev: InputSpeechStartedEvent) =>
-    this.onInputSpeechStarted(ev);
-  private readonly onRealtimeInputSpeechStopped = (ev: InputSpeechStoppedEvent) =>
-    this.onInputSpeechStopped(ev);
-  private readonly onRealtimeInputAudioTranscriptionCompleted = (ev: InputTranscriptionCompleted) =>
-    this.onInputAudioTranscriptionCompleted(ev);
-  private readonly onModelError = (ev: RealtimeModelError | STTError | TTSError | LLMError) =>
-    this.onError(ev);
   constructor(agent: Agent, agentSession: AgentSession) {
     this.agent = agent;
     this.agentSession = agentSession;
@@ -235,6 +274,16 @@ export class AgentActivity implements RecognitionHooks {
           'for more responsive interruption handling.',
       );
     }
+
+    this.interruptionDetector = this.resolveInterruptionDetector();
+    this.isInterruptionDetectionEnabled = !!this.interruptionDetector;
+
+    // this allows taking over audio interruption temporarily until interruption is detected
+    // by default is is ture unless turnDetection is manual or realtime_llm
+    this.isInterruptionByAudioActivityEnabled =
+      this.turnDetectionMode !== 'manual' && this.turnDetectionMode !== 'realtime_llm';
+
+    this.isDefaultInterruptionByAudioActivityEnabled = this.isInterruptionByAudioActivityEnabled;
   }
 
   async start(): Promise<void> {
@@ -348,8 +397,13 @@ export class AgentActivity implements RecognitionHooks {
       vad: this.vad,
       turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
       turnDetectionMode: this.turnDetectionMode,
-      minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
-      maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
+      interruptionDetection: this.interruptionDetector,
+      minEndpointingDelay:
+        this.agent.turnHandling?.endpointing?.minDelay ??
+        this.agentSession.sessionOptions.turnHandling.endpointing.minDelay,
+      maxEndpointingDelay:
+        this.agent.turnHandling?.endpointing?.maxDelay ??
+        this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
       rootSpanContext: this.agentSession.rootSpanContext,
       sttModel: this.stt?.label,
       sttProvider: this.getSttProvider(),
@@ -422,8 +476,10 @@ export class AgentActivity implements RecognitionHooks {
   }
 
   get allowInterruptions(): boolean {
-    // TODO(AJS-51): Allow options to be defined in Agent class
-    return this.agentSession.options.allowInterruptions;
+    return (
+      this.agent.turnHandling?.interruption?.enabled ??
+      this.agentSession.sessionOptions.turnHandling.interruption.enabled
+    );
   }
 
   get useTtsAlignedTranscript(): boolean {
@@ -432,14 +488,36 @@ export class AgentActivity implements RecognitionHooks {
   }
 
   get turnDetection(): TurnDetectionMode | undefined {
-    // TODO(brian): prioritize using agent.turn_detection
-    return this.agentSession.turnDetection;
+    return this.agent.turnHandling?.turnDetection ?? this.agentSession.turnDetection;
+  }
+
+  get turnHandling() {
+    return this.agent.turnHandling ?? this.agentSession.sessionOptions.turnHandling;
   }
 
+  // get minEndpointingDelay(): number {
+  //   return (
+  //     this.agent.turnHandling?.endpointing?.minDelay ??
+  //     this.agentSession.sessionOptions.turnHandling.endpointing.minDelay
+  //   );
+  // }
+
+  // get maxEndpointingDelay(): number {
+  //   return (
+  //     this.agent.turnHandling?.endpointing?.maxDelay ??
+  //     this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay
+  //   );
+  // }
+
   get toolCtx(): ToolContext {
     return this.agent.toolCtx;
   }
 
+  /** @internal */
+  get inputStartedAt() {
+    return this.audioRecognition?.inputStartedAt;
+  }
+
   async updateChatCtx(chatCtx: ChatContext): Promise<void> {
     chatCtx = chatCtx.copy({ toolCtx: this.toolCtx });
 
@@ -471,7 +549,13 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
 
-  updateOptions({ toolChoice }: { toolChoice?: ToolChoice | null }): void {
+  updateOptions({
+    toolChoice,
+    turnDetection,
+  }: {
+    toolChoice?: ToolChoice | null;
+    turnDetection?: TurnDetectionMode;
+  }): void {
     if (toolChoice !== undefined) {
       this.toolChoice = toolChoice;
     }
@@ -479,6 +563,22 @@ export class AgentActivity implements RecognitionHooks {
     if (this.realtimeSession) {
       this.realtimeSession.updateOptions({ toolChoice: this.toolChoice });
     }
+
+    if (turnDetection !== undefined) {
+      this.turnDetectionMode = turnDetection;
+      this.isDefaultInterruptionByAudioActivityEnabled =
+        this.turnDetectionMode !== 'manual' && this.turnDetectionMode !== 'realtime_llm';
+
+      // sync live flag immediately when not speaking so the change takes effect right away
+      if (this.agentSession.agentState !== 'speaking') {
+        this.isInterruptionByAudioActivityEnabled =
+          this.isDefaultInterruptionByAudioActivityEnabled;
+      }
+    }
+
+    if (this.audioRecognition) {
+      this.audioRecognition.updateOptions({ turnDetection: this.turnDetectionMode });
+    }
   }
 
   attachAudioInput(audioStream: ReadableStream<AudioFrame>): void {
@@ -629,6 +729,8 @@ export class AgentActivity implements RecognitionHooks {
       }
     }
 
+    this.agentSession._usageCollector.collect(ev);
+
     this.agentSession.emit(
       AgentSessionEventTypes.MetricsCollected,
       createMetricsCollectedEvent({ metrics: ev }),
@@ -660,6 +762,13 @@ export class AgentActivity implements RecognitionHooks {
 
     if (!this.vad) {
       this.agentSession._updateUserState('speaking');
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        this.audioRecognition.onStartOfOverlapSpeech(
+          0,
+          Date.now(),
+          this.agentSession._userSpeakingSpan,
+        );
+      }
     }
 
     // this.interrupt() is going to raise when allow_interruptions is False,
@@ -678,6 +787,9 @@ export class AgentActivity implements RecognitionHooks {
     this.logger.info(ev, 'onInputSpeechStopped');
 
     if (!this.vad) {
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        this.audioRecognition.onEndOfOverlapSpeech(Date.now(), this.agentSession._userSpeakingSpan);
+      }
       this.agentSession._updateUserState('listening');
     }
 
@@ -751,17 +863,40 @@ export class AgentActivity implements RecognitionHooks {
   onStartOfSpeech(ev: VADEvent): void {
     let speechStartTime = Date.now();
     if (ev) {
-      speechStartTime = speechStartTime - ev.speechDuration;
+      // Subtract both speechDuration and inferenceDuration to correct for VAD model latency.
+      speechStartTime = speechStartTime - ev.speechDuration - ev.inferenceDuration;
+    }
+    this.agentSession._updateUserState('speaking', {
+      lastSpeakingTime: speechStartTime,
+      otelContext: otelContext.active(),
+    });
+    if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+      // Pass speechStartTime as the absolute startedAt timestamp.
+      this.audioRecognition.onStartOfOverlapSpeech(
+        ev.speechDuration,
+        speechStartTime,
+        this.agentSession._userSpeakingSpan,
+      );
     }
-    this.agentSession._updateUserState('speaking', speechStartTime);
   }
 
   onEndOfSpeech(ev: VADEvent): void {
     let speechEndTime = Date.now();
     if (ev) {
-      speechEndTime = speechEndTime - ev.silenceDuration;
+      // Subtract both silenceDuration and inferenceDuration to correct for VAD model latency.
+      speechEndTime = speechEndTime - ev.silenceDuration - ev.inferenceDuration;
+    }
+    if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+      // Pass speechEndTime as the absolute endedAt timestamp.
+      this.audioRecognition.onEndOfOverlapSpeech(
+        speechEndTime,
+        this.agentSession._userSpeakingSpan,
+      );
     }
-    this.agentSession._updateUserState('listening', speechEndTime);
+    this.agentSession._updateUserState('listening', {
+      lastSpeakingTime: speechEndTime,
+      otelContext: otelContext.active(),
+    });
   }
 
   onVADInferenceDone(ev: VADEvent): void {
@@ -770,12 +905,18 @@ export class AgentActivity implements RecognitionHooks {
       return;
     }
 
-    if (ev.speechDuration >= this.agentSession.options.minInterruptionDuration) {
+    if (
+      ev.speechDuration >= this.agentSession.sessionOptions.turnHandling.interruption?.minDuration
+    ) {
       this.interruptByAudioActivity();
     }
   }
 
   private interruptByAudioActivity(): void {
+    if (!this.isInterruptionByAudioActivityEnabled) {
+      return;
+    }
+
     if (this.agentSession._aecWarmupRemaining > 0) {
       // Disable interruption from audio activity while AEC warmup is active.
       return;
@@ -790,7 +931,11 @@ export class AgentActivity implements RecognitionHooks {
     // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
     // - Apply check to all STT results: empty string, undefined, or any length
     // - This ensures consistent behavior across all interruption scenarios
-    if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
+    if (
+      this.stt &&
+      this.agentSession.sessionOptions.turnHandling.interruption?.minWords > 0 &&
+      this.audioRecognition
+    ) {
       const text = this.audioRecognition.currentTranscript;
       // TODO(shubhra): better word splitting for multi-language
 
@@ -800,7 +945,7 @@ export class AgentActivity implements RecognitionHooks {
 
       // Only allow interruption if word count meets or exceeds minInterruptionWords
       // This applies to all cases: empty strings, partial speech, and full speech
-      if (wordCount < this.agentSession.options.minInterruptionWords) {
+      if (wordCount < this.agentSession.sessionOptions.turnHandling.interruption?.minWords) {
         return;
       }
     }
@@ -821,6 +966,14 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
 
+  onInterruption(ev: OverlappingSpeechEvent) {
+    this.restoreInterruptionByAudioActivity();
+    this.interruptByAudioActivity();
+    if (this.audioRecognition) {
+      this.audioRecognition.onEndOfAgentSpeech(ev.overlapStartedAt || ev.detectedAt);
+    }
+  }
+
   onInterimTranscript(ev: SpeechEvent): void {
     if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
       // skip stt transcription if userTranscription is enabled on the realtime model
@@ -875,7 +1028,7 @@ export class AgentActivity implements RecognitionHooks {
 
   onPreemptiveGeneration(info: PreemptiveGenerationInfo): void {
     if (
-      !this.agentSession.options.preemptiveGeneration ||
+      !this.agentSession.sessionOptions.preemptiveGeneration ||
       this.schedulingPaused ||
       (this._currentSpeech !== undefined && !this._currentSpeech.interrupted) ||
       !(this.llm instanceof LLM)
@@ -896,6 +1049,7 @@ export class AgentActivity implements RecognitionHooks {
     const userMessage = ChatMessage.create({
       role: 'user',
       content: info.newTranscript,
+      transcriptConfidence: info.transcriptConfidence,
     });
     const chatCtx = this.agent.chatCtx.copy();
     const speechHandle = this.generateReply({
@@ -991,16 +1145,17 @@ export class AgentActivity implements RecognitionHooks {
       this._currentSpeech &&
       this._currentSpeech.allowInterruptions &&
       !this._currentSpeech.interrupted &&
-      this.agentSession.options.minInterruptionWords > 0
+      this.agentSession.sessionOptions.turnHandling.interruption?.minWords > 0
     ) {
       const wordCount = splitWords(info.newTranscript, true).length;
-      if (wordCount < this.agentSession.options.minInterruptionWords) {
+      if (wordCount < this.agentSession.sessionOptions.turnHandling.interruption?.minWords) {
         // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
         this.cancelPreemptiveGeneration();
         this.logger.info(
           {
             wordCount,
-            minInterruptionWords: this.agentSession.options.minInterruptionWords,
+            minInterruptionWords:
+              this.agentSession.sessionOptions.turnHandling.interruption.minWords,
           },
           'skipping user input, word count below minimum interruption threshold',
         );
@@ -1325,6 +1480,7 @@ export class AgentActivity implements RecognitionHooks {
     let userMessage: ChatMessage | undefined = ChatMessage.create({
       role: 'user',
       content: info.newTranscript,
+      transcriptConfidence: info.transcriptConfidence,
     });
 
     // create a temporary mutable chat context to pass to onUserTurnCompleted
@@ -1351,6 +1507,24 @@ export class AgentActivity implements RecognitionHooks {
       return;
     }
 
+    const userMetricsReport: MetricsReport = {};
+    if (info.startedSpeakingAt !== undefined) {
+      userMetricsReport.startedSpeakingAt = info.startedSpeakingAt / 1000; // ms -> seconds
+    }
+    if (info.stoppedSpeakingAt !== undefined) {
+      userMetricsReport.stoppedSpeakingAt = info.stoppedSpeakingAt / 1000; // ms -> seconds
+    }
+    if (info.transcriptionDelay !== undefined) {
+      userMetricsReport.transcriptionDelay = info.transcriptionDelay / 1000; // ms -> seconds
+    }
+    if (info.endOfUtteranceDelay !== undefined) {
+      userMetricsReport.endOfTurnDelay = info.endOfUtteranceDelay / 1000; // ms -> seconds
+    }
+    userMetricsReport.onUserTurnCompletedDelay = callbackDuration / 1000; // ms -> seconds
+    if (userMessage) {
+      userMessage.metrics = userMetricsReport;
+    }
+
     let speechHandle: SpeechHandle | undefined;
     if (this._preemptiveGeneration !== undefined) {
       const preemptive = this._preemptiveGeneration;
@@ -1363,6 +1537,14 @@ export class AgentActivity implements RecognitionHooks {
         isSameToolChoice(preemptive.toolChoice, this.toolChoice)
       ) {
         speechHandle = preemptive.speechHandle;
+        // The preemptive userMessage was created without metrics.
+        // Copy the metrics and transcriptConfidence from the new userMessage
+        // to the preemptive message BEFORE scheduling (so the pipeline inserts
+        // the message with metrics already set).
+        if (preemptive.userMessage && userMessage) {
+          preemptive.userMessage.metrics = userMetricsReport;
+          preemptive.userMessage.transcriptConfidence = userMessage.transcriptConfidence;
+        }
         this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
         this.logger.debug(
           {
@@ -1456,11 +1638,19 @@ export class AgentActivity implements RecognitionHooks {
       tasks.push(textForwardTask);
     }
 
+    let replyStartedSpeakingAt: number | undefined;
+    let replyTtsGenData: _TTSGenerationData | null = null;
+
     const onFirstFrame = (startedSpeakingAt?: number) => {
+      replyStartedSpeakingAt = startedSpeakingAt ?? Date.now();
       this.agentSession._updateAgentState('speaking', {
         startTime: startedSpeakingAt,
         otelContext: speechHandle._agentTurnContext,
       });
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        this.audioRecognition.onStartOfAgentSpeech();
+        this.isInterruptionByAudioActivityEnabled = false;
+      }
     };
 
     if (!audioOutput) {
@@ -1478,8 +1668,11 @@ export class AgentActivity implements RecognitionHooks {
           audioSource,
           modelSettings,
           replyAbortController,
+          this.tts?.model,
+          this.tts?.provider,
         );
         tasks.push(ttsTask);
+        replyTtsGenData = ttsGenData;
 
         const [forwardTask, _audioOut] = performAudioForwarding(
           ttsGenData.audioStream,
@@ -1519,10 +1712,21 @@ export class AgentActivity implements RecognitionHooks {
     }
 
     if (addToChatCtx) {
+      const replyStoppedSpeakingAt = Date.now();
+      const replyAssistantMetrics: MetricsReport = {};
+      if (replyTtsGenData?.ttfb !== undefined) {
+        replyAssistantMetrics.ttsNodeTtfb = replyTtsGenData.ttfb;
+      }
+      if (replyStartedSpeakingAt !== undefined) {
+        replyAssistantMetrics.startedSpeakingAt = replyStartedSpeakingAt / 1000; // ms -> seconds
+        replyAssistantMetrics.stoppedSpeakingAt = replyStoppedSpeakingAt / 1000; // ms -> seconds
+      }
+
       const message = ChatMessage.create({
         role: 'assistant',
         content: textOut?.text || '',
         interrupted: speechHandle.interrupted,
+        metrics: replyAssistantMetrics,
       });
       this.agent._chatCtx.insert(message);
       this.agentSession._conversationItemAdded(message);
@@ -1530,6 +1734,10 @@ export class AgentActivity implements RecognitionHooks {
 
     if (this.agentSession.agentState === 'speaking') {
       this.agentSession._updateAgentState('listening');
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        this.audioRecognition.onEndOfAgentSpeech(Date.now());
+      }
+      this.restoreInterruptionByAudioActivity();
     }
   }
 
@@ -1543,6 +1751,7 @@ export class AgentActivity implements RecognitionHooks {
     newMessage,
     toolsMessages,
     span,
+    _previousUserMetrics,
   }: {
     speechHandle: SpeechHandle;
     chatCtx: ChatContext;
@@ -1553,6 +1762,7 @@ export class AgentActivity implements RecognitionHooks {
     newMessage?: ChatMessage;
     toolsMessages?: ChatItem[];
     span: Span;
+    _previousUserMetrics?: MetricsReport;
   }): Promise<void> => {
     speechHandle._agentTurnContext = otelContext.active();
 
@@ -1605,6 +1815,8 @@ export class AgentActivity implements RecognitionHooks {
       toolCtx,
       modelSettings,
       replyAbortController,
+      this.llm?.model,
+      this.llm?.provider,
     );
     tasks.push(llmTask);
 
@@ -1621,6 +1833,8 @@ export class AgentActivity implements RecognitionHooks {
         ttsTextInput,
         modelSettings,
         replyAbortController,
+        this.tts?.model,
+        this.tts?.provider,
       );
       tasks.push(ttsTask);
     } else {
@@ -1630,10 +1844,12 @@ export class AgentActivity implements RecognitionHooks {
 
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
 
+    let userMetrics: MetricsReport | undefined = _previousUserMetrics;
     // Add new message to actual chat context if the speech is scheduled
     if (newMessage && speechHandle.scheduled) {
       this.agent._chatCtx.insert(newMessage);
       this.agentSession._conversationItemAdded(newMessage);
+      userMetrics = newMessage.metrics;
     }
 
     if (speechHandle.interrupted) {
@@ -1679,11 +1895,17 @@ export class AgentActivity implements RecognitionHooks {
       textOut = _textOut;
     }
 
+    let agentStartedSpeakingAt: number | undefined;
     const onFirstFrame = (startedSpeakingAt?: number) => {
+      agentStartedSpeakingAt = startedSpeakingAt ?? Date.now();
       this.agentSession._updateAgentState('speaking', {
         startTime: startedSpeakingAt,
         otelContext: speechHandle._agentTurnContext,
       });
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        this.audioRecognition.onStartOfAgentSpeech();
+        this.isInterruptionByAudioActivityEnabled = false;
+      }
     };
 
     let audioOut: _AudioOut | null = null;
@@ -1740,6 +1962,29 @@ export class AgentActivity implements RecognitionHooks {
       await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
     }
 
+    const agentStoppedSpeakingAt = Date.now();
+    const assistantMetrics: MetricsReport = {};
+
+    if (llmGenData.ttft !== undefined) {
+      assistantMetrics.llmNodeTtft = llmGenData.ttft; // already in seconds
+    }
+    if (ttsGenData?.ttfb !== undefined) {
+      assistantMetrics.ttsNodeTtfb = ttsGenData.ttfb; // already in seconds
+    }
+    if (agentStartedSpeakingAt !== undefined) {
+      assistantMetrics.startedSpeakingAt = agentStartedSpeakingAt / 1000; // ms -> seconds
+      assistantMetrics.stoppedSpeakingAt = agentStoppedSpeakingAt / 1000; // ms -> seconds
+
+      if (userMetrics?.stoppedSpeakingAt !== undefined) {
+        const e2eLatency = agentStartedSpeakingAt / 1000 - userMetrics.stoppedSpeakingAt;
+        assistantMetrics.e2eLatency = e2eLatency;
+        span.setAttribute(traceTypes.ATTR_E2E_LATENCY, e2eLatency);
+      }
+    }
+
+    span.setAttribute(traceTypes.ATTR_SPEECH_INTERRUPTED, speechHandle.interrupted);
+    let hasSpeechMessage = false;
+
     // add the tools messages that triggers this reply to the chat context
     if (toolsMessages) {
       for (const msg of toolsMessages) {
@@ -1792,45 +2037,54 @@ export class AgentActivity implements RecognitionHooks {
       }
 
       if (forwardedText) {
+        hasSpeechMessage = true;
         const message = ChatMessage.create({
           role: 'assistant',
           content: forwardedText,
           id: llmGenData.id,
           interrupted: true,
           createdAt: replyStartedAt,
+          metrics: assistantMetrics,
         });
         chatCtx.insert(message);
         this.agent._chatCtx.insert(message);
         speechHandle._itemAdded([message]);
         this.agentSession._conversationItemAdded(message);
+        span.setAttribute(traceTypes.ATTR_RESPONSE_TEXT, forwardedText);
       }
 
       if (this.agentSession.agentState === 'speaking') {
         this.agentSession._updateAgentState('listening');
+        if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+          this.audioRecognition.onEndOfAgentSpeech(Date.now());
+          this.restoreInterruptionByAudioActivity();
+        }
       }
 
       this.logger.info(
         { speech_id: speechHandle.id, message: forwardedText },
         'playout completed with interrupt',
       );
-      // TODO(shubhra) add chat message to speech handle
       speechHandle._markGenerationDone();
       await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
       return;
     }
 
     if (textOut && textOut.text) {
+      hasSpeechMessage = true;
       const message = ChatMessage.create({
         role: 'assistant',
         id: llmGenData.id,
         interrupted: false,
         createdAt: replyStartedAt,
         content: textOut.text,
+        metrics: assistantMetrics,
       });
       chatCtx.insert(message);
       this.agent._chatCtx.insert(message);
       speechHandle._itemAdded([message]);
       this.agentSession._conversationItemAdded(message);
+      span.setAttribute(traceTypes.ATTR_RESPONSE_TEXT, textOut.text);
       this.logger.info(
         { speech_id: speechHandle.id, message: textOut.text },
         'playout completed without interruption',
@@ -1841,6 +2095,12 @@ export class AgentActivity implements RecognitionHooks {
       this.agentSession._updateAgentState('thinking');
     } else if (this.agentSession.agentState === 'speaking') {
       this.agentSession._updateAgentState('listening');
+      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        {
+          this.audioRecognition.onEndOfAgentSpeech(Date.now());
+          this.restoreInterruptionByAudioActivity();
+        }
+      }
     }
 
     // mark the playout done before waiting for the tool execution
@@ -1850,7 +2110,7 @@ export class AgentActivity implements RecognitionHooks {
     if (toolOutput.output.length === 0) return;
 
     // important: no agent output should be used after this point
-    const { maxToolSteps } = this.agentSession.options;
+    const { maxToolSteps } = this.agentSession.sessionOptions;
     if (speechHandle.numSteps >= maxToolSteps) {
       this.logger.warn(
         { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
@@ -1900,6 +2160,7 @@ export class AgentActivity implements RecognitionHooks {
             instructions,
             undefined,
             toolMessages,
+            hasSpeechMessage ? undefined : userMetrics,
           ),
         ownedSpeechHandle: speechHandle,
         name: 'AgentActivity.pipelineReply',
@@ -1933,6 +2194,7 @@ export class AgentActivity implements RecognitionHooks {
     instructions?: string,
     newMessage?: ChatMessage,
     toolsMessages?: ChatItem[],
+    _previousUserMetrics?: MetricsReport,
   ): Promise<void> =>
     tracer.startActiveSpan(
       async (span) =>
@@ -1946,6 +2208,7 @@ export class AgentActivity implements RecognitionHooks {
           newMessage,
           toolsMessages,
           span,
+          _previousUserMetrics,
         }),
       {
         name: 'agent_turn',
@@ -2096,6 +2359,8 @@ export class AgentActivity implements RecognitionHooks {
                 ttsTextInput,
                 modelSettings,
                 abortController,
+                this.tts?.model,
+                this.tts?.provider,
               );
               tasks.push(ttsTask);
               realtimeAudioResult = ttsGenData.audioStream;
@@ -2312,7 +2577,7 @@ export class AgentActivity implements RecognitionHooks {
     }
 
     // important: no agent ouput should be used after this point
-    const { maxToolSteps } = this.agentSession.options;
+    const { maxToolSteps } = this.agentSession.sessionOptions;
     if (speechHandle.numSteps >= maxToolSteps) {
       this.logger.warn(
         { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
@@ -2612,6 +2877,11 @@ export class AgentActivity implements RecognitionHooks {
       if (this._mainTask) {
         await this._mainTask.cancelAndWait();
       }
+      if (this.interruptionDetector) {
+        this.interruptionDetector.off('overlapping_speech', this.onInterruptionOverlappingSpeech);
+        this.interruptionDetector.off('metrics_collected', this.onInterruptionMetricsCollected);
+        this.interruptionDetector.off('error', this.onInterruptionError);
+      }
 
       this.agent._agentActivity = undefined;
     } finally {
@@ -2619,6 +2889,95 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
 
+  private resolveInterruptionDetector(): AdaptiveInterruptionDetector | undefined {
+    const agentInterruptionDetection = this.agent.turnHandling?.interruption?.mode;
+    const sessionInterruptionDetection = this.agentSession.interruptionDetection;
+    if (
+      !(
+        this.stt &&
+        this.stt.capabilities.alignedTranscript &&
+        this.stt.capabilities.streaming &&
+        this.vad &&
+        this.turnDetection !== 'manual' &&
+        this.turnDetection !== 'realtime_llm' &&
+        !(this.llm instanceof RealtimeModel)
+      )
+    ) {
+      if (
+        agentInterruptionDetection === 'adaptive' ||
+        sessionInterruptionDetection === 'adaptive'
+      ) {
+        this.logger.warn(
+          "interruptionDetection is provided, but it's not compatible with the current configuration and will be disabled",
+        );
+      }
+      return undefined;
+    }
+
+    if (!this.allowInterruptions) {
+      return undefined;
+    }
+
+    if (agentInterruptionDetection === 'vad') {
+      return undefined;
+    }
+
+    if (sessionInterruptionDetection === 'vad') {
+      return undefined;
+    }
+
+    if (
+      agentInterruptionDetection === undefined &&
+      sessionInterruptionDetection === undefined &&
+      !isHosted() &&
+      !isDevMode()
+    ) {
+      this.logger.info('adaptive interruption is disabled by default in production mode');
+      return undefined;
+    }
+
+    try {
+      const detector = new AdaptiveInterruptionDetector();
+
+      detector.on('overlapping_speech', this.onInterruptionOverlappingSpeech);
+      detector.on('metrics_collected', this.onInterruptionMetricsCollected);
+      detector.on('error', this.onInterruptionError);
+
+      return detector;
+    } catch (error: unknown) {
+      this.logger.warn({ error }, 'could not instantiate AdaptiveInterruptionDetector');
+    }
+    return undefined;
+  }
+
+  private restoreInterruptionByAudioActivity(): void {
+    this.isInterruptionByAudioActivityEnabled = this.isDefaultInterruptionByAudioActivityEnabled;
+  }
+
+  private fallbackToVadInterruption(): void {
+    if (!this.isInterruptionDetectionEnabled) return;
+
+    this.isInterruptionDetectionEnabled = false;
+    this.restoreInterruptionByAudioActivity();
+
+    if (this.interruptionDetector) {
+      this.interruptionDetector.off('overlapping_speech', this.onInterruptionOverlappingSpeech);
+      this.interruptionDetector.off('metrics_collected', this.onInterruptionMetricsCollected);
+      this.interruptionDetector.off('error', this.onInterruptionError);
+      this.interruptionDetector = undefined;
+    }
+
+    if (this.audioRecognition) {
+      this.audioRecognition.disableInterruptionDetection().catch((err) => {
+        this.logger.warn({ err }, 'error while disabling interruption detection');
+      });
+    }
+
+    this.logger.warn(
+      'adaptive interruption disabled due to unrecoverable error, falling back to VAD-based interruption',
+    );
+  }
+
   private async _closeSessionResources(): Promise<void> {
     // Unregister event handlers to prevent duplicate metrics
     if (this.llm instanceof LLM) {
diff --git a/agents/src/voice/agent_session.ts b/agents/src/voice/agent_session.ts
index 74e1247bf..c88eda47e 100644
--- a/agents/src/voice/agent_session.ts
+++ b/agents/src/voice/agent_session.ts
@@ -17,12 +17,15 @@ import {
   type STTModelString,
   type TTSModelString,
 } from '../inference/index.js';
+import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
+import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
 import { type JobContext, getJobContext } from '../job.js';
 import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
 import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
 import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
 import type { LLMError } from '../llm/llm.js';
 import { log } from '../log.js';
+import { type ModelUsage, ModelUsageCollector, filterZeroValues } from '../metrics/model_usage.js';
 import type { STT } from '../stt/index.js';
 import type { STTError } from '../stt/stt.js';
 import { traceTypes, tracer } from '../telemetry/index.js';
@@ -61,39 +64,63 @@ import {
 } from './events.js';
 import { AgentInput, AgentOutput } from './io.js';
 import { RecorderIO } from './recorder_io/index.js';
-import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
+import { RoomSessionTransport, SessionHost } from './remote_session.js';
+import {
+  DEFAULT_TEXT_INPUT_CALLBACK,
+  RoomIO,
+  type RoomInputOptions,
+  type RoomOutputOptions,
+} from './room_io/index.js';
 import type { UnknownUserData } from './run_context.js';
 import type { SpeechHandle } from './speech_handle.js';
 import { RunResult } from './testing/run_result.js';
+import type { InterruptionOptions } from './turn_config/interruption.js';
+import type {
+  InternalTurnHandlingOptions,
+  TurnHandlingOptions,
+} from './turn_config/turn_handling.js';
+import { migrateLegacyOptions } from './turn_config/utils.js';
 import { setParticipantSpanAttributes } from './utils.js';
 
-export interface VoiceOptions {
-  allowInterruptions: boolean;
-  discardAudioIfUninterruptible: boolean;
-  minInterruptionDuration: number;
-  minInterruptionWords: number;
-  minEndpointingDelay: number;
-  maxEndpointingDelay: number;
-  maxToolSteps: number;
-  preemptiveGeneration: boolean;
-  userAwayTimeout?: number | null;
-  aecWarmupDuration: number | null;
+export interface AgentSessionUsage {
+  /** List of usage summaries, one per model/provider combination. */
+  modelUsage: Array<Partial<ModelUsage>>;
+}
+
+export interface InternalSessionOptions<UserData> extends AgentSessionOptions<UserData> {
+  turnHandling: InternalTurnHandlingOptions;
   useTtsAlignedTranscript: boolean;
+  maxToolSteps: number;
+  userAwayTimeout: number | null;
 }
 
-const defaultVoiceOptions: VoiceOptions = {
-  allowInterruptions: true,
-  discardAudioIfUninterruptible: true,
-  minInterruptionDuration: 500,
-  minInterruptionWords: 0,
-  minEndpointingDelay: 500,
-  maxEndpointingDelay: 6000,
+export const defaultAgentSessionOptions = {
   maxToolSteps: 3,
-  preemptiveGeneration: false,
+  preemptiveGeneration: true,
   userAwayTimeout: 15.0,
   aecWarmupDuration: 3000,
+  turnHandling: {},
   useTtsAlignedTranscript: true,
-} as const;
+} as const satisfies AgentSessionOptions;
+
+/** @deprecated {@link VoiceOptions} has been flattened onto to {@link AgentSessionOptions} */
+export type VoiceOptions = {
+  maxToolSteps: number;
+  preemptiveGeneration: boolean;
+  userAwayTimeout?: number | null;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.mode instead. */
+  allowInterruptions?: boolean;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.discardAudioIfUninterruptible instead. */
+  discardAudioIfUninterruptible?: boolean;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minDuration instead. */
+  minInterruptionDuration?: number;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minWords instead. */
+  minInterruptionWords?: number;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.minDelay instead. */
+  minEndpointingDelay?: number;
+  /** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.maxDelay instead. */
+  maxEndpointingDelay?: number;
+};
 
 export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
 
@@ -107,17 +134,53 @@ export type AgentSessionCallbacks = {
   [AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;
   [AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;
   [AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;
+  [AgentSessionEventTypes.OverlappingSpeech]: (ev: OverlappingSpeechEvent) => void;
 };
 
 export type AgentSessionOptions<UserData = UnknownUserData> = {
-  turnDetection?: TurnDetectionMode;
   stt?: STT | STTModelString;
   vad?: VAD;
   llm?: LLM | RealtimeModel | LLMModels;
   tts?: TTS | TTSModelString;
   userData?: UserData;
-  voiceOptions?: Partial<VoiceOptions>;
   connOptions?: SessionConnectOptions;
+
+  /** @deprecated use turnHandling.turnDetection instead */
+  turnDetection?: TurnDetectionMode;
+  /** @deprecated use top-level SessionOptions fields instead */
+  voiceOptions?: Partial<VoiceOptions>;
+
+  maxToolSteps?: number;
+  /**
+   * Whether to speculatively begin LLM and TTS requests before an end-of-turn is detected.
+   * When `true`, the agent sends inference calls as soon as a user transcript is received rather
+   * than waiting for a definitive turn boundary. This can reduce response latency by overlapping
+   * model inference with user audio, but may incur extra compute if the user interrupts or
+   * revises mid-utterance.
+   * @defaultValue true
+   */
+  preemptiveGeneration?: boolean;
+
+  /**
+   * If set, set the user state as "away" after this amount of time after user and agent are
+   * silent. Set to `null` to disable.
+   * @defaultValue 15.0
+   */
+  userAwayTimeout?: number | null;
+
+  /**
+   * Duration in milliseconds for AEC (Acoustic Echo Cancellation) warmup, during which
+   * interruptions from audio activity are suppressed. Set to `null` to disable.
+   * @defaultValue 3000
+   */
+  aecWarmupDuration?: number | null;
+
+  /**
+   * Configuration for turn handling.
+   */
+  turnHandling?: Partial<TurnHandlingOptions>;
+
+  useTtsAlignedTranscript?: boolean;
 };
 
 type ActivityTransitionOptions = {
@@ -136,22 +199,23 @@ export class AgentSession<
   tts?: TTS;
   turnDetection?: TurnDetectionMode;
 
+  /** @deprecated use {@link sessionOptions } instead */
   readonly options: VoiceOptions;
 
+  readonly sessionOptions: InternalSessionOptions<UserData>;
+
+  private readonly activityLock = new Mutex();
+
   private agent?: Agent;
   private activity?: AgentActivity;
   private nextActivity?: AgentActivity;
   private updateActivityTask?: Task<void>;
   private started = false;
-  private userState: UserState = 'listening';
-  private readonly activityLock = new Mutex();
-
-  /** @internal */
-  _roomIO?: RoomIO;
-  private logger = log();
+  private sessionHost?: SessionHost;
 
   private _chatCtx: ChatContext;
   private _userData: UserData | undefined;
+  private _userState: UserState = 'listening';
   private _agentState: AgentState = 'initializing';
 
   private _input: AgentInput;
@@ -170,9 +234,16 @@ export class AgentSession<
   private ttsErrorCounts = 0;
 
   private sessionSpan?: Span;
-  private userSpeakingSpan?: Span;
   private agentSpeakingSpan?: Span;
 
+  private _interruptionDetection?: InterruptionOptions['mode'];
+
+  /** @internal */
+  _usageCollector: ModelUsageCollector = new ModelUsageCollector();
+
+  /** @internal */
+  _roomIO?: RoomIO;
+
   /** @internal */
   _aecWarmupRemaining = 0;
 
@@ -194,20 +265,18 @@ export class AgentSession<
   /** @internal - Current run state for testing */
   _globalRunState?: RunResult;
 
-  constructor(opts: AgentSessionOptions<UserData>) {
+  /** @internal */
+  _userSpeakingSpan?: Span;
+
+  private logger = log();
+
+  constructor(options: AgentSessionOptions<UserData>) {
     super();
 
-    const {
-      vad,
-      stt,
-      llm,
-      tts,
-      turnDetection,
-      userData,
-      voiceOptions = defaultVoiceOptions,
-      connOptions,
-    } = opts;
+    const { agentSessionOptions: opts, legacyVoiceOptions } =
+      migrateLegacyOptions<UserData>(options);
 
+    const { vad, stt, llm, tts, userData, connOptions, ...resolvedSessionOptions } = opts;
     // Merge user-provided connOptions with defaults
     this._connOptions = {
       sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
@@ -238,7 +307,8 @@ export class AgentSession<
       this.tts = tts;
     }
 
-    this.turnDetection = turnDetection;
+    this.turnDetection = resolvedSessionOptions.turnHandling.turnDetection;
+    this._interruptionDetection = resolvedSessionOptions.turnHandling.interruption?.mode;
     this._userData = userData;
 
     // configurable IO
@@ -247,8 +317,9 @@ export class AgentSession<
 
     // This is the "global" chat context, it holds the entire conversation history
     this._chatCtx = ChatContext.empty();
-    this.options = { ...defaultVoiceOptions, ...voiceOptions };
-    this._aecWarmupRemaining = this.options.aecWarmupDuration ?? 0;
+    this.sessionOptions = resolvedSessionOptions;
+    this.options = legacyVoiceOptions;
+    this._aecWarmupRemaining = this.sessionOptions.aecWarmupDuration ?? 0;
 
     this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
     this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
@@ -288,8 +359,20 @@ export class AgentSession<
     return this._connOptions;
   }
 
+  get interruptionDetection() {
+    return this._interruptionDetection;
+  }
+
+  /**
+   * Returns usage summaries for this session, one per model/provider combination.
+   */
+  get usage(): AgentSessionUsage {
+    // Skip zero fields for more concise usage display (matches python behavior).
+    return { modelUsage: this._usageCollector.flatten().map(filterZeroValues) };
+  }
+
   get useTtsAlignedTranscript(): boolean {
-    return this.options.useTtsAlignedTranscript;
+    return this.sessionOptions.useTtsAlignedTranscript;
   }
 
   set userData(value: UserData) {
@@ -342,7 +425,17 @@ export class AgentSession<
         inputOptions,
         outputOptions,
       });
+
       this._roomIO.start();
+
+      const transport = new RoomSessionTransport(room, this._roomIO);
+      this.sessionHost = new SessionHost(transport);
+      this.sessionHost.registerSession(this);
+      if (inputOptions?.textEnabled !== false) {
+        this.sessionHost.registerTextInput(
+          inputOptions?.textInputCallback ?? DEFAULT_TEXT_INPUT_CALLBACK,
+        );
+      }
     }
 
     let ctx: JobContext | undefined = undefined;
@@ -385,6 +478,10 @@ export class AgentSession<
 
     await Promise.allSettled(tasks);
 
+    if (this.sessionHost) {
+      await this.sessionHost.start();
+    }
+
     // Log used IO configuration
     this.logger.debug(
       `using audio io: ${this.input.audio ? '`' + this.input.audio.constructor.name + '`' : '(none)'} -> \`AgentSession\` -> ${this.output.audio ? '`' + this.output.audio.constructor.name + '`' : '(none)'}`,
@@ -416,6 +513,8 @@ export class AgentSession<
       return;
     }
 
+    this._usageCollector = new ModelUsageCollector();
+
     let ctx: JobContext | undefined = undefined;
     try {
       ctx = getJobContext();
@@ -748,6 +847,10 @@ export class AgentSession<
     return this._agentState;
   }
 
+  get userState(): UserState {
+    return this._userState;
+  }
+
   get currentAgent(): Agent {
     if (!this.agent) {
       throw new Error('AgentSession is not running');
@@ -788,7 +891,9 @@ export class AgentSession<
   }
 
   /** @internal */
-  _onError(error: RealtimeModelError | STTError | TTSError | LLMError): void {
+  _onError(
+    error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError,
+  ): void {
     if (this.closingTask || error.recoverable) {
       return;
     }
@@ -804,9 +909,12 @@ export class AgentSession<
       if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
         return;
       }
+    } else if (error.type === 'interruption_detection_error') {
+      this.logger.error(error.toString());
+      return;
     }
 
-    this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
+    this.logger.error(error, 'AgentSession is closing due to an unrecoverable error');
 
     this.closingTask = (async () => {
       await this.closeImpl(CloseReason.ERROR, error);
@@ -833,7 +941,6 @@ export class AgentSession<
     }
 
     if (state === 'speaking') {
-      // Reset error counts when agent starts speaking
       this.llmErrorCounts = 0;
       this.ttsErrorCounts = 0;
 
@@ -867,7 +974,7 @@ export class AgentSession<
     this._agentState = state;
 
     // Handle user away timer based on state changes
-    if (state === 'listening' && this.userState === 'listening') {
+    if (state === 'listening' && this._userState === 'listening') {
       this._setUserAwayTimer();
     } else {
       this._cancelUserAwayTimer();
@@ -880,29 +987,32 @@ export class AgentSession<
   }
 
   /** @internal */
-  _updateUserState(state: UserState, lastSpeakingTime?: number) {
-    if (this.userState === state) {
+  _updateUserState(
+    state: UserState,
+    options?: { lastSpeakingTime?: number; otelContext?: Context },
+  ) {
+    if (this._userState === state) {
       return;
     }
 
-    if (state === 'speaking' && this.userSpeakingSpan === undefined) {
-      this.userSpeakingSpan = tracer.startSpan({
+    if (state === 'speaking' && this._userSpeakingSpan === undefined) {
+      this._userSpeakingSpan = tracer.startSpan({
         name: 'user_speaking',
-        context: this.rootSpanContext,
-        startTime: lastSpeakingTime,
+        context: options?.otelContext ?? this.rootSpanContext,
+        startTime: options?.lastSpeakingTime,
       });
 
       const linked = this._roomIO?.linkedParticipant;
       if (linked) {
-        setParticipantSpanAttributes(this.userSpeakingSpan, linked);
+        setParticipantSpanAttributes(this._userSpeakingSpan, linked);
       }
-    } else if (this.userSpeakingSpan !== undefined) {
-      this.userSpeakingSpan.end(lastSpeakingTime);
-      this.userSpeakingSpan = undefined;
+    } else if (this._userSpeakingSpan !== undefined) {
+      this._userSpeakingSpan.end(options?.lastSpeakingTime);
+      this._userSpeakingSpan = undefined;
     }
 
-    const oldState = this.userState;
-    this.userState = state;
+    const oldState = this._userState;
+    this._userState = state;
 
     // Handle user away timer based on state changes
     if (state === 'listening' && this._agentState === 'listening') {
@@ -935,7 +1045,10 @@ export class AgentSession<
   private _setUserAwayTimer(): void {
     this._cancelUserAwayTimer();
 
-    if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
+    if (
+      this.sessionOptions.userAwayTimeout === null ||
+      this.sessionOptions.userAwayTimeout === undefined
+    ) {
       return;
     }
 
@@ -946,7 +1059,7 @@ export class AgentSession<
     this.userAwayTimer = setTimeout(() => {
       this.logger.debug('User away timeout triggered');
       this._updateUserState('away');
-    }, this.options.userAwayTimeout * 1000);
+    }, this.sessionOptions.userAwayTimeout * 1000);
   }
 
   private _cancelUserAwayTimer(): void {
@@ -970,7 +1083,7 @@ export class AgentSession<
   }
 
   private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
-    if (this.userState === 'away' && ev.isFinal) {
+    if (this._userState === 'away' && ev.isFinal) {
       this.logger.debug('User returned from away state due to speech input');
       this._updateUserState('listening');
     }
@@ -978,7 +1091,13 @@ export class AgentSession<
 
   private async closeImpl(
     reason: ShutdownReason,
-    error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
+    error:
+      | RealtimeModelError
+      | LLMError
+      | TTSError
+      | STTError
+      | InterruptionDetectionError
+      | null = null,
     drain: boolean = false,
   ): Promise<void> {
     if (this.rootSpanContext) {
@@ -992,7 +1111,13 @@ export class AgentSession<
 
   private async closeImplInner(
     reason: ShutdownReason,
-    error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
+    error:
+      | RealtimeModelError
+      | LLMError
+      | TTSError
+      | STTError
+      | InterruptionDetectionError
+      | null = null,
     drain: boolean = false,
   ): Promise<void> {
     if (!this.started) {
@@ -1037,6 +1162,9 @@ export class AgentSession<
     this.output.audio = null;
     this.output.transcription = null;
 
+    await this.sessionHost?.close();
+    this.sessionHost = undefined;
+
     await this._roomIO?.close();
     this._roomIO = undefined;
 
@@ -1048,9 +1176,9 @@ export class AgentSession<
       this.sessionSpan = undefined;
     }
 
-    if (this.userSpeakingSpan) {
-      this.userSpeakingSpan.end();
-      this.userSpeakingSpan = undefined;
+    if (this._userSpeakingSpan) {
+      this._userSpeakingSpan.end();
+      this._userSpeakingSpan = undefined;
     }
 
     if (this.agentSpeakingSpan) {
@@ -1062,7 +1190,7 @@ export class AgentSession<
 
     this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
 
-    this.userState = 'listening';
+    this._userState = 'listening';
     this._agentState = 'initializing';
     this.rootSpanContext = undefined;
     this.llmErrorCounts = 0;
diff --git a/agents/src/voice/audio_recognition.ts b/agents/src/voice/audio_recognition.ts
index 7c76eac37..781af70de 100644
--- a/agents/src/voice/audio_recognition.ts
+++ b/agents/src/voice/audio_recognition.ts
@@ -12,15 +12,25 @@ import {
 } from '@opentelemetry/api';
 import type { WritableStreamDefaultWriter } from 'node:stream/web';
 import { ReadableStream } from 'node:stream/web';
+import { isAPIError } from '../_exceptions.js';
+import { apiConnectDefaults, intervalForRetry } from '../inference/interruption/defaults.js';
+import { InterruptionDetectionError } from '../inference/interruption/errors.js';
+import type { AdaptiveInterruptionDetector } from '../inference/interruption/interruption_detector.js';
+import { InterruptionStreamSentinel } from '../inference/interruption/interruption_stream.js';
+import {
+  type InterruptionSentinel,
+  type OverlappingSpeechEvent,
+} from '../inference/interruption/types.js';
 import type { LanguageCode } from '../language.js';
 import { type ChatContext } from '../llm/chat_context.js';
 import { log } from '../log.js';
 import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
 import { IdentityTransform } from '../stream/identity_transform.js';
 import { mergeReadableStreams } from '../stream/merge_readable_streams.js';
+import { type StreamChannel, createStreamChannel } from '../stream/stream_channel.js';
 import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
 import { traceTypes, tracer } from '../telemetry/index.js';
-import { Task, delay } from '../utils.js';
+import { Task, delay, waitForAbort } from '../utils.js';
 import { type VAD, type VADEvent, VADEventType } from '../vad.js';
 import type { TurnDetectionMode } from './agent_session.js';
 import type { STTNode } from './io.js';
@@ -47,6 +57,7 @@ export interface PreemptiveGenerationInfo {
 }
 
 export interface RecognitionHooks {
+  onInterruption: (ev: OverlappingSpeechEvent) => void;
   onStartOfSpeech: (ev: VADEvent) => void;
   onVADInferenceDone: (ev: VADEvent) => void;
   onEndOfSpeech: (ev: VADEvent) => void;
@@ -59,9 +70,13 @@ export interface RecognitionHooks {
 }
 
 export interface _TurnDetector {
+  /** The model name used by this turn detector. */
+  readonly model: string;
+  /** The provider name for this turn detector. */
+  readonly provider: string;
   unlikelyThreshold: (language?: LanguageCode) => Promise<number | undefined>;
   supportsLanguage: (language?: LanguageCode) => Promise<boolean>;
-  predictEndOfTurn(chatCtx: ChatContext): Promise<number>;
+  predictEndOfTurn(chatCtx: ChatContext, timeout?: number): Promise<number>;
 }
 
 export interface AudioRecognitionOptions {
@@ -74,7 +89,8 @@ export interface AudioRecognitionOptions {
   /** Turn detector for end-of-turn prediction. */
   turnDetector?: _TurnDetector;
   /** Turn detection mode. */
-  turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
+  turnDetectionMode?: TurnDetectionMode;
+  interruptionDetection?: AdaptiveInterruptionDetector;
   /** Minimum endpointing delay in milliseconds. */
   minEndpointingDelay: number;
   /** Maximum endpointing delay in milliseconds. */
@@ -99,12 +115,13 @@ export interface ParticipantLike {
   kind: ParticipantKind;
 }
 
+// TODO add ability to update stt/vad/interruption-detection
 export class AudioRecognition {
   private hooks: RecognitionHooks;
   private stt?: STTNode;
   private vad?: VAD;
   private turnDetector?: _TurnDetector;
-  private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
+  private turnDetectionMode?: TurnDetectionMode;
   private minEndpointingDelay: number;
   private maxEndpointingDelay: number;
   private lastLanguage?: LanguageCode;
@@ -138,6 +155,16 @@ export class AudioRecognition {
   private commitUserTurnTask?: Task<void>;
   private vadTask?: Task<void>;
   private sttTask?: Task<void>;
+  private interruptionTask?: Task<void>;
+
+  // interruption detection
+  private interruptionDetection?: AdaptiveInterruptionDetector;
+  private _inputStartedAt?: number;
+  private ignoreUserTranscriptUntil?: number;
+  private transcriptBuffer: SpeechEvent[];
+  private isInterruptionEnabled: boolean;
+  private isAgentSpeaking: boolean;
+  private interruptionStreamChannel?: StreamChannel<InterruptionSentinel | AudioFrame>;
 
   constructor(opts: AudioRecognitionOptions) {
     this.hooks = opts.recognitionHooks;
@@ -154,9 +181,29 @@ export class AudioRecognition {
     this.getLinkedParticipant = opts.getLinkedParticipant;
 
     this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
-    const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
-    this.vadInputStream = vadInputStream;
-    this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
+    this.interruptionDetection = opts.interruptionDetection;
+    this.transcriptBuffer = [];
+    this.isInterruptionEnabled = !!(opts.interruptionDetection && opts.vad);
+    this.isAgentSpeaking = false;
+
+    if (opts.interruptionDetection) {
+      const [vadInputStream, teedInput] = this.deferredInputStream.stream.tee();
+      const [inputStream, sttInputStream] = teedInput.tee();
+      this.vadInputStream = vadInputStream;
+      this.sttInputStream = mergeReadableStreams(
+        sttInputStream,
+        this.silenceAudioTransform.readable,
+      );
+      this.interruptionStreamChannel = createStreamChannel();
+      this.interruptionStreamChannel.addStreamInput(inputStream);
+    } else {
+      const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
+      this.vadInputStream = vadInputStream;
+      this.sttInputStream = mergeReadableStreams(
+        sttInputStream,
+        this.silenceAudioTransform.readable,
+      );
+    }
     this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
   }
 
@@ -170,6 +217,16 @@ export class AudioRecognition {
     return this.audioTranscript;
   }
 
+  /** @internal */
+  get inputStartedAt() {
+    return this._inputStartedAt;
+  }
+
+  /** @internal */
+  updateOptions(options: { turnDetection: TurnDetectionMode | undefined }): void {
+    this.turnDetectionMode = options.turnDetection;
+  }
+
   async start() {
     this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));
     this.vadTask.result.catch((err) => {
@@ -180,6 +237,220 @@ export class AudioRecognition {
     this.sttTask.result.catch((err) => {
       this.logger.error(`Error running STT task: ${err}`);
     });
+
+    this.interruptionTask = Task.from(({ signal }) =>
+      this.createInterruptionTask(this.interruptionDetection, signal),
+    );
+    this.interruptionTask.result.catch((err) => {
+      this.logger.error(`Error running interruption task: ${err}`);
+    });
+  }
+
+  async stop() {
+    await this.sttTask?.cancelAndWait();
+    await this.vadTask?.cancelAndWait();
+    await this.interruptionTask?.cancelAndWait();
+  }
+
+  async disableInterruptionDetection(): Promise<void> {
+    this.isInterruptionEnabled = false;
+    this.interruptionDetection = undefined;
+    await this.interruptionTask?.cancelAndWait();
+    this.interruptionTask = undefined;
+    await this.interruptionStreamChannel?.close();
+    this.interruptionStreamChannel = undefined;
+  }
+
+  async onStartOfAgentSpeech() {
+    this.isAgentSpeaking = true;
+    return this.trySendInterruptionSentinel(InterruptionStreamSentinel.agentSpeechStarted());
+  }
+
+  async onEndOfAgentSpeech(ignoreUserTranscriptUntil: number) {
+    if (!this.isInterruptionEnabled) {
+      this.isAgentSpeaking = false;
+      return;
+    }
+
+    const inputOpen = await this.trySendInterruptionSentinel(
+      InterruptionStreamSentinel.agentSpeechEnded(),
+    );
+    if (!inputOpen) {
+      this.isAgentSpeaking = false;
+      return;
+    }
+
+    if (this.isAgentSpeaking) {
+      if (this.ignoreUserTranscriptUntil === undefined) {
+        this.onEndOfOverlapSpeech(Date.now());
+      }
+      this.ignoreUserTranscriptUntil = this.ignoreUserTranscriptUntil
+        ? Math.min(ignoreUserTranscriptUntil, this.ignoreUserTranscriptUntil)
+        : ignoreUserTranscriptUntil;
+
+      // flush held transcripts if possible
+      await this.flushHeldTranscripts();
+    }
+    this.isAgentSpeaking = false;
+  }
+
+  /** Start interruption inference when agent is speaking and overlap speech starts. */
+  async onStartOfOverlapSpeech(speechDuration: number, startedAt: number, userSpeakingSpan?: Span) {
+    if (this.isAgentSpeaking) {
+      this.trySendInterruptionSentinel(
+        InterruptionStreamSentinel.overlapSpeechStarted(
+          speechDuration,
+          startedAt,
+          userSpeakingSpan,
+        ),
+      );
+    }
+  }
+
+  /** End interruption inference when overlap speech ends. */
+  async onEndOfOverlapSpeech(endedAt: number, userSpeakingSpan?: Span) {
+    if (!this.isInterruptionEnabled) {
+      return;
+    }
+    if (userSpeakingSpan && userSpeakingSpan.isRecording()) {
+      userSpeakingSpan.setAttribute(traceTypes.ATTR_IS_INTERRUPTION, 'false');
+    }
+
+    return this.trySendInterruptionSentinel(InterruptionStreamSentinel.overlapSpeechEnded(endedAt));
+  }
+
+  /**
+   * Flush held transcripts whose *end time* is after the ignoreUserTranscriptUntil timestamp.
+   * If the event has no timestamps, we assume it is the same as the next valid event.
+   */
+  private async flushHeldTranscripts() {
+    if (
+      !this.isInterruptionEnabled ||
+      this.ignoreUserTranscriptUntil === undefined ||
+      this.transcriptBuffer.length === 0
+    ) {
+      return;
+    }
+
+    if (!this._inputStartedAt) {
+      this.transcriptBuffer = [];
+      this.ignoreUserTranscriptUntil = undefined;
+      return;
+    }
+
+    let emitFromIndex: number | null = null;
+    let shouldFlush = false;
+
+    for (let i = 0; i < this.transcriptBuffer.length; i++) {
+      const ev = this.transcriptBuffer[i];
+      if (!ev || !ev.alternatives || ev.alternatives.length === 0) {
+        emitFromIndex = Math.min(emitFromIndex ?? i, i);
+        continue;
+      }
+      const firstAlternative = ev.alternatives[0];
+      if (
+        firstAlternative.startTime === firstAlternative.endTime &&
+        firstAlternative.startTime === 0
+      ) {
+        this.transcriptBuffer = [];
+        this.ignoreUserTranscriptUntil = undefined;
+        return;
+      }
+
+      if (this.#alternativeEndsBeforeIgnoreWindow(firstAlternative)) {
+        emitFromIndex = null;
+      } else {
+        emitFromIndex = Math.min(emitFromIndex ?? i, i);
+        shouldFlush = true;
+        break;
+      }
+    }
+
+    const eventsToEmit =
+      emitFromIndex !== null && shouldFlush ? this.transcriptBuffer.slice(emitFromIndex) : [];
+
+    this.transcriptBuffer = [];
+    this.ignoreUserTranscriptUntil = undefined;
+
+    for (const event of eventsToEmit) {
+      this.logger.trace(
+        {
+          event: event.type,
+        },
+        're-emitting held user transcript',
+      );
+      this.onSTTEvent(event);
+    }
+  }
+
+  #alternativeEndsBeforeIgnoreWindow(
+    alternative: NonNullable<SpeechEvent['alternatives']>[number],
+  ): boolean {
+    if (
+      this.ignoreUserTranscriptUntil === undefined ||
+      !this._inputStartedAt ||
+      alternative.startTime <= 0
+    ) {
+      return false;
+    }
+
+    // `SpeechData.startTime` is in seconds relative to audio start, while `inputStartedAt` and
+    // `ignoreUserTranscriptUntil` are epoch milliseconds.
+    return alternative.startTime * 1000 + this._inputStartedAt < this.ignoreUserTranscriptUntil;
+  }
+
+  private shouldHoldSttEvent(ev: SpeechEvent): boolean {
+    if (!this.isInterruptionEnabled) {
+      return false;
+    }
+    if (this.isAgentSpeaking) {
+      return true;
+    }
+
+    // reset when the user starts speaking after the agent speech
+    if (ev.type === SpeechEventType.START_OF_SPEECH) {
+      this.ignoreUserTranscriptUntil = undefined;
+      this.transcriptBuffer = [];
+      return false;
+    }
+
+    if (this.ignoreUserTranscriptUntil === undefined) {
+      return false;
+    }
+    // sentinel events are always held until we have something concrete to release them
+    if (!ev.alternatives || ev.alternatives.length === 0) {
+      return true;
+    }
+
+    const alternative = ev.alternatives[0];
+
+    if (
+      alternative.startTime !== alternative.endTime &&
+      this.#alternativeEndsBeforeIgnoreWindow(alternative)
+    ) {
+      return true;
+    }
+    return false;
+  }
+
+  private async trySendInterruptionSentinel(
+    frame: AudioFrame | InterruptionSentinel,
+  ): Promise<boolean> {
+    if (
+      this.isInterruptionEnabled &&
+      this.interruptionStreamChannel &&
+      !this.interruptionStreamChannel.closed
+    ) {
+      try {
+        await this.interruptionStreamChannel.write(frame);
+        return true;
+      } catch (e: unknown) {
+        this.logger.warn(
+          `could not forward interruption sentinel: ${e instanceof Error ? e.message : String(e)}`,
+        );
+      }
+    }
+    return false;
   }
 
   private ensureUserTurnSpan(startTime?: number): Span {
@@ -235,6 +506,25 @@ export class AudioRecognition {
       return;
     }
 
+    // handle interruption detection
+    // - hold the event until the ignore_user_transcript_until expires
+    // - release only relevant events
+    // - allow RECOGNITION_USAGE to pass through immediately
+
+    if (ev.type !== SpeechEventType.RECOGNITION_USAGE && this.isInterruptionEnabled) {
+      if (this.shouldHoldSttEvent(ev)) {
+        this.logger.trace(
+          { event: ev.type, ignoreUserTranscriptUntil: this.ignoreUserTranscriptUntil },
+          'holding STT event until ignore_user_transcript_until expires',
+        );
+        this.transcriptBuffer.push(ev);
+        return;
+      } else {
+        await this.flushHeldTranscripts();
+        // no return here to allow the new event to be processed normally
+      }
+    }
+
     switch (ev.type) {
       case SpeechEventType.FINAL_TRANSCRIPT:
         const transcript = ev.alternatives?.[0]?.text;
@@ -418,6 +708,12 @@ export class AudioRecognition {
     }
   }
 
+  private onOverlapSpeechEvent(ev: OverlappingSpeechEvent) {
+    if (ev.isInterruption) {
+      this.hooks.onInterruption(ev);
+    }
+  }
+
   private runEOUDetection(chatCtx: ChatContext) {
     this.logger.debug(
       {
@@ -676,7 +972,9 @@ export class AudioRecognition {
               this.lastSpeakingTime = Date.now();
 
               if (this.speechStartTime === undefined) {
-                this.speechStartTime = Date.now();
+                // Backdate speechStartTime to the actual start of accumulated speech.
+                // ev.rawAccumulatedSpeech is in ms (VADEvent durations are all ms in TS).
+                this.speechStartTime = Date.now() - ev.rawAccumulatedSpeech;
               }
             }
             break;
@@ -708,6 +1006,136 @@ export class AudioRecognition {
     }
   }
 
+  private async createInterruptionTask(
+    interruptionDetection: AdaptiveInterruptionDetector | undefined,
+    signal: AbortSignal,
+  ) {
+    if (!interruptionDetection || !this.interruptionStreamChannel) return;
+
+    let numRetries = 0;
+    const maxRetries = apiConnectDefaults.maxRetries;
+
+    while (!signal.aborted) {
+      const stream = interruptionDetection.createStream();
+      const eventReader = stream.stream().getReader();
+
+      const cleanup = async () => {
+        try {
+          signal.removeEventListener('abort', cleanup);
+          eventReader.releaseLock();
+          await stream.close();
+        } catch (e) {
+          this.logger.debug('createInterruptionTask: error during cleanup:', e);
+        }
+      };
+
+      signal.addEventListener('abort', cleanup, { once: true });
+
+      let forwardTask: Promise<void> | undefined;
+
+      try {
+        // Unlike Python where _agent_speech_started lives on `self` and survives retries,
+        // JS creates a fresh InterruptionStreamBase per retry with agentSpeechStarted = false.
+        // Re-inject the sentinel so the new stream knows the agent is mid-speech.
+        if (numRetries > 0 && this.isAgentSpeaking) {
+          await stream.pushFrame(InterruptionStreamSentinel.agentSpeechStarted());
+        }
+
+        forwardTask = (async () => {
+          const inputReader = this.interruptionStreamChannel!.stream().getReader();
+          const abortPromise = waitForAbort(signal);
+
+          try {
+            while (!signal.aborted) {
+              const res = await Promise.race([inputReader.read(), abortPromise]);
+              if (!res) break;
+
+              const { value, done } = res;
+              if (done) break;
+
+              if (value instanceof AudioFrame) {
+                const frameDurationMs = (value.samplesPerChannel / value.sampleRate) * 1000;
+                this._inputStartedAt ??= Date.now() - frameDurationMs;
+              } else {
+                this._inputStartedAt ??= Date.now();
+              }
+
+              await stream.pushFrame(value);
+            }
+          } finally {
+            inputReader.releaseLock();
+          }
+        })();
+
+        const abortPromise = waitForAbort(signal);
+
+        while (!signal.aborted) {
+          const res = await Promise.race([eventReader.read(), abortPromise]);
+          if (!res) break;
+          const { done, value: ev } = res;
+          if (done) break;
+          this.onOverlapSpeechEvent(ev);
+        }
+        break;
+      } catch (e) {
+        if (signal.aborted) break;
+
+        if (isAPIError(e)) {
+          if (maxRetries === 0 || !e.retryable) {
+            interruptionDetection.emitError(
+              new InterruptionDetectionError(
+                e.message,
+                Date.now(),
+                interruptionDetection.label,
+                false,
+              ),
+            );
+            break;
+          } else if (numRetries >= maxRetries) {
+            interruptionDetection.emitError(
+              new InterruptionDetectionError(
+                `failed to detect interruption after ${numRetries} attempts`,
+                Date.now(),
+                interruptionDetection.label,
+                false,
+              ),
+            );
+            break;
+          } else {
+            const retryInterval = intervalForRetry(numRetries);
+            interruptionDetection.emitError(
+              new InterruptionDetectionError(
+                e.message,
+                Date.now(),
+                interruptionDetection.label,
+                true,
+              ),
+            );
+            this.logger.warn(
+              { model: interruptionDetection.label, attempt: numRetries },
+              `failed to detect interruption, retrying in ${retryInterval}ms`,
+            );
+            numRetries++;
+            await delay(retryInterval, { signal });
+          }
+        } else {
+          const msg = e instanceof Error ? e.message : String(e);
+          interruptionDetection.emitError(
+            new InterruptionDetectionError(msg, Date.now(), interruptionDetection.label, false),
+          );
+          this.logger.error(e, 'Error in interruption task');
+          break;
+        }
+      } finally {
+        await cleanup();
+        await forwardTask?.catch((e) => {
+          this.logger.debug({ err: e }, 'interruption task exited with error');
+        });
+      }
+    }
+    this.logger.debug('Interruption task closed');
+  }
+
   setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
     this.deferredInputStream.setSource(audioStream);
   }
@@ -784,6 +1212,8 @@ export class AudioRecognition {
     await this.sttTask?.cancelAndWait();
     await this.vadTask?.cancelAndWait();
     await this.bounceEOUTask?.cancelAndWait();
+    await this.interruptionTask?.cancelAndWait();
+    await this.interruptionStreamChannel?.close();
   }
 
   private _endUserTurnSpan({
@@ -810,6 +1240,14 @@ export class AudioRecognition {
   }
 
   private get vadBaseTurnDetection() {
-    return ['vad', undefined].includes(this.turnDetectionMode);
+    if (typeof this.turnDetectionMode === 'object') {
+      return false;
+    }
+
+    if (this.turnDetectionMode === undefined || this.turnDetectionMode === 'vad') {
+      return true;
+    }
+
+    return false;
   }
 }
diff --git a/agents/src/voice/audio_recognition_span.test.ts b/agents/src/voice/audio_recognition_span.test.ts
index 9e9eb521c..cfe92a821 100644
--- a/agents/src/voice/audio_recognition_span.test.ts
+++ b/agents/src/voice/audio_recognition_span.test.ts
@@ -2,14 +2,27 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import { ParticipantKind } from '@livekit/rtc-node';
-import { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
+import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
+import {
+  InMemorySpanExporter,
+  type ReadableSpan,
+  SimpleSpanProcessor,
+} from '@opentelemetry/sdk-trace-base';
 import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
+import { ReadableStream } from 'node:stream/web';
 import { describe, expect, it, vi } from 'vitest';
+import { ChatContext } from '../llm/chat_context.js';
 import { initializeLogger } from '../log.js';
 import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
-import { setTracerProvider } from '../telemetry/index.js';
+import { setTracerProvider, tracer } from '../telemetry/index.js';
 import { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';
-import { AudioRecognition, type _TurnDetector } from './audio_recognition.js';
+import { AgentSession } from './agent_session.js';
+import {
+  AudioRecognition,
+  type RecognitionHooks,
+  type _TurnDetector,
+} from './audio_recognition.js';
+import type { STTNode } from './io.js';
 
 function setupInMemoryTracing() {
   const exporter = new InMemorySpanExporter();
@@ -20,10 +33,25 @@ function setupInMemoryTracing() {
   return { exporter };
 }
 
-function spanByName(spans: any[], name: string) {
+function spanByName(spans: ReadableSpan[], name: string) {
   return spans.find((s) => s.name === name);
 }
 
+function createFakeSession(rootSpanContext = ROOT_CONTEXT): AgentSession {
+  return {
+    _agentState: 'listening',
+    _roomIO: {
+      linkedParticipant: { sid: 'p3', identity: 'charlie', kind: ParticipantKind.AGENT },
+    },
+    _setUserAwayTimer: vi.fn(),
+    _cancelUserAwayTimer: vi.fn(),
+    _userSpeakingSpan: undefined,
+    _userState: 'listening',
+    emit: vi.fn(),
+    rootSpanContext,
+  } as unknown as AgentSession;
+}
+
 class FakeVADStream extends (Object as unknown as { new (): VADStream }) {
   // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
   // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
@@ -61,6 +89,8 @@ class FakeVAD extends VAD {
 }
 
 const alwaysTrueTurnDetector: _TurnDetector = {
+  model: 'test-turn-detector',
+  provider: 'test-provider',
   supportsLanguage: async () => true,
   unlikelyThreshold: async () => undefined,
   predictEndOfTurn: async () => 1.0,
@@ -72,23 +102,15 @@ describe('AudioRecognition user_turn span parity', () => {
   it('creates user_turn and parents eou_detection under it (stt mode)', async () => {
     const { exporter } = setupInMemoryTracing();
 
-    const hooks = {
+    const hooks: RecognitionHooks = {
+      onInterruption: vi.fn(),
       onStartOfSpeech: vi.fn(),
       onVADInferenceDone: vi.fn(),
       onEndOfSpeech: vi.fn(),
       onInterimTranscript: vi.fn(),
       onFinalTranscript: vi.fn(),
       onPreemptiveGeneration: vi.fn(),
-      retrieveChatCtx: () =>
-        ({
-          copy() {
-            return this;
-          },
-          addMessage() {},
-          toJSON() {
-            return { items: [] };
-          },
-        }) as any,
+      retrieveChatCtx: () => ChatContext.empty(),
       onEndOfTurn: vi.fn(async () => true),
     };
 
@@ -109,8 +131,8 @@ describe('AudioRecognition user_turn span parity', () => {
       { type: SpeechEventType.END_OF_SPEECH },
     ];
 
-    const sttNode = async () =>
-      new ReadableStream<SpeechEvent>({
+    const sttNode: STTNode = async () =>
+      new ReadableStream<SpeechEvent | string>({
         start(controller) {
           for (const ev of sttEvents) controller.enqueue(ev);
           controller.close();
@@ -118,8 +140,8 @@ describe('AudioRecognition user_turn span parity', () => {
       });
 
     const ar = new AudioRecognition({
-      recognitionHooks: hooks as any,
-      stt: sttNode as any,
+      recognitionHooks: hooks,
+      stt: sttNode,
       vad: undefined,
       turnDetector: alwaysTrueTurnDetector,
       turnDetectionMode: 'stt',
@@ -140,6 +162,9 @@ describe('AudioRecognition user_turn span parity', () => {
     const eou = spanByName(spans, 'eou_detection');
     expect(userTurn, 'user_turn span missing').toBeTruthy();
     expect(eou, 'eou_detection span missing').toBeTruthy();
+    if (!userTurn || !eou) {
+      throw new Error('expected user_turn and eou_detection spans');
+    }
 
     expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
 
@@ -158,23 +183,15 @@ describe('AudioRecognition user_turn span parity', () => {
   it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {
     const { exporter } = setupInMemoryTracing();
 
-    const hooks = {
+    const hooks: RecognitionHooks = {
+      onInterruption: vi.fn(),
       onStartOfSpeech: vi.fn(),
       onVADInferenceDone: vi.fn(),
       onEndOfSpeech: vi.fn(),
       onInterimTranscript: vi.fn(),
       onFinalTranscript: vi.fn(),
       onPreemptiveGeneration: vi.fn(),
-      retrieveChatCtx: () =>
-        ({
-          copy() {
-            return this;
-          },
-          addMessage() {},
-          toJSON() {
-            return { items: [] };
-          },
-        }) as any,
+      retrieveChatCtx: () => ChatContext.empty(),
       onEndOfTurn: vi.fn(async () => true),
     };
 
@@ -223,8 +240,8 @@ describe('AudioRecognition user_turn span parity', () => {
       },
     ];
 
-    const sttNode = async () =>
-      new ReadableStream<SpeechEvent>({
+    const sttNode: STTNode = async () =>
+      new ReadableStream<SpeechEvent | string>({
         start(controller) {
           for (const ev of sttEvents) controller.enqueue(ev);
           controller.close();
@@ -232,9 +249,9 @@ describe('AudioRecognition user_turn span parity', () => {
       });
 
     const ar = new AudioRecognition({
-      recognitionHooks: hooks as any,
-      stt: sttNode as any,
-      vad: new FakeVAD(vadEvents) as any,
+      recognitionHooks: hooks,
+      stt: sttNode,
+      vad: new FakeVAD(vadEvents),
       turnDetector: alwaysTrueTurnDetector,
       turnDetectionMode: 'vad',
       minEndpointingDelay: 0,
@@ -253,9 +270,72 @@ describe('AudioRecognition user_turn span parity', () => {
     const eou = spanByName(spans, 'eou_detection');
     expect(userTurn).toBeTruthy();
     expect(eou).toBeTruthy();
+    if (!userTurn || !eou) {
+      throw new Error('expected user_turn and eou_detection spans');
+    }
     expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
 
     expect(hooks.onStartOfSpeech).toHaveBeenCalled();
     expect(hooks.onEndOfSpeech).toHaveBeenCalled();
   });
+
+  it('parents user_speaking under user_turn when an explicit speech context is provided', () => {
+    const { exporter } = setupInMemoryTracing();
+    const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
+    const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
+    const fakeSession = createFakeSession(sessionContext);
+    const userTurn = tracer.startSpan({ name: 'user_turn', context: sessionContext });
+    const userTurnContext = trace.setSpan(sessionContext, userTurn);
+    const speakingStartedAt = Date.now() - 100;
+    const speakingEndedAt = Date.now();
+
+    otelContext.with(userTurnContext, () => {
+      AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
+        lastSpeakingTime: speakingStartedAt,
+        otelContext: otelContext.active(),
+      });
+      AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
+        lastSpeakingTime: speakingEndedAt,
+        otelContext: otelContext.active(),
+      });
+    });
+
+    userTurn.end();
+    sessionSpan.end();
+
+    const spans = exporter.getFinishedSpans();
+    const userSpeaking = spanByName(spans, 'user_speaking');
+    const exportedUserTurn = spanByName(spans, 'user_turn');
+    expect(userSpeaking).toBeTruthy();
+    expect(exportedUserTurn).toBeTruthy();
+    if (!userSpeaking || !exportedUserTurn) {
+      throw new Error('expected user_speaking and user_turn spans');
+    }
+    expect(userSpeaking.parentSpanId).toBe(exportedUserTurn.spanContext().spanId);
+    expect(userSpeaking.attributes['lk.participant_id']).toBe('p3');
+  });
+
+  it('keeps user_speaking attached to the session root without an explicit speech context', () => {
+    const { exporter } = setupInMemoryTracing();
+    const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
+    const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
+    const fakeSession = createFakeSession(sessionContext);
+
+    AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
+      lastSpeakingTime: Date.now() - 100,
+    });
+    AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
+      lastSpeakingTime: Date.now(),
+    });
+
+    sessionSpan.end();
+
+    const spans = exporter.getFinishedSpans();
+    const userSpeaking = spanByName(spans, 'user_speaking');
+    expect(userSpeaking).toBeTruthy();
+    if (!userSpeaking) {
+      throw new Error('expected user_speaking span');
+    }
+    expect(userSpeaking.parentSpanId).toBe(sessionSpan.spanContext().spanId);
+  });
 });
diff --git a/agents/src/voice/events.ts b/agents/src/voice/events.ts
index aae866a6e..aedb93c23 100644
--- a/agents/src/voice/events.ts
+++ b/agents/src/voice/events.ts
@@ -1,6 +1,8 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
+import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
 import type { LanguageCode } from '../language.js';
 import type {
   ChatMessage,
@@ -26,6 +28,7 @@ export enum AgentSessionEventTypes {
   FunctionToolsExecuted = 'function_tools_executed',
   MetricsCollected = 'metrics_collected',
   SpeechCreated = 'speech_created',
+  OverlappingSpeech = 'overlapping_speech',
   Error = 'error',
   Close = 'close',
 }
@@ -216,13 +219,13 @@ export const createSpeechCreatedEvent = ({
 
 export type ErrorEvent = {
   type: 'error';
-  error: RealtimeModelError | STTError | TTSError | LLMError | unknown;
+  error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | unknown;
   source: LLM | STT | TTS | RealtimeModel | unknown;
   createdAt: number;
 };
 
 export const createErrorEvent = (
-  error: RealtimeModelError | STTError | TTSError | LLMError | unknown,
+  error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | unknown,
   source: LLM | STT | TTS | RealtimeModel | unknown,
   createdAt: number = Date.now(),
 ): ErrorEvent => ({
@@ -234,14 +237,20 @@ export const createErrorEvent = (
 
 export type CloseEvent = {
   type: 'close';
-  error: RealtimeModelError | STTError | TTSError | LLMError | null;
+  error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | null;
   reason: ShutdownReason;
   createdAt: number;
 };
 
 export const createCloseEvent = (
   reason: ShutdownReason,
-  error: RealtimeModelError | STTError | TTSError | LLMError | null = null,
+  error:
+    | RealtimeModelError
+    | STTError
+    | TTSError
+    | LLMError
+    | InterruptionDetectionError
+    | null = null,
   createdAt: number = Date.now(),
 ): CloseEvent => ({
   type: 'close',
@@ -258,5 +267,6 @@ export type AgentEvent =
   | ConversationItemAddedEvent
   | FunctionToolsExecutedEvent
   | SpeechCreatedEvent
+  | OverlappingSpeechEvent
   | ErrorEvent
   | CloseEvent;
diff --git a/agents/src/voice/generation.ts b/agents/src/voice/generation.ts
index 1f141ab37..d2eba8fc0 100644
--- a/agents/src/voice/generation.ts
+++ b/agents/src/voice/generation.ts
@@ -51,6 +51,7 @@ export class _LLMGenerationData {
   generatedText: string = '';
   generatedToolCalls: FunctionCall[];
   id: string;
+  ttft?: number;
 
   constructor(
     public readonly textStream: ReadableStream<string>,
@@ -416,6 +417,8 @@ export function performLLMInference(
   toolCtx: ToolContext,
   modelSettings: ModelSettings,
   controller: AbortController,
+  model?: string,
+  provider?: string,
 ): [Task<void>, _LLMGenerationData] {
   const textStream = new IdentityTransform<string>();
   const toolCallStream = new IdentityTransform<FunctionCall>();
@@ -431,8 +434,17 @@ export function performLLMInference(
     );
     span.setAttribute(traceTypes.ATTR_FUNCTION_TOOLS, JSON.stringify(Object.keys(toolCtx)));
 
+    if (model) {
+      span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, model);
+    }
+    if (provider) {
+      span.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, provider);
+    }
+
     let llmStreamReader: ReadableStreamDefaultReader<string | ChatChunk> | null = null;
     let llmStream: ReadableStream<string | ChatChunk> | null = null;
+    const startTime = performance.now() / 1000; // Convert to seconds
+    let firstTokenReceived = false;
 
     try {
       llmStream = await node(chatCtx, toolCtx, modelSettings);
@@ -455,6 +467,11 @@ export function performLLMInference(
         const { done, value: chunk } = result;
         if (done) break;
 
+        if (!firstTokenReceived) {
+          firstTokenReceived = true;
+          data.ttft = performance.now() / 1000 - startTime;
+        }
+
         if (typeof chunk === 'string') {
           data.generatedText += chunk;
           await textWriter.write(chunk);
@@ -493,6 +510,9 @@ export function performLLMInference(
       }
 
       span.setAttribute(traceTypes.ATTR_RESPONSE_TEXT, data.generatedText);
+      if (data.ttft !== undefined) {
+        span.setAttribute(traceTypes.ATTR_RESPONSE_TTFT, data.ttft);
+      }
     } catch (error) {
       if (error instanceof DOMException && error.name === 'AbortError') {
         // Abort signal was triggered, handle gracefully
@@ -527,6 +547,8 @@ export function performTTSInference(
   text: ReadableStream<string | TimedString>,
   modelSettings: ModelSettings,
   controller: AbortController,
+  model?: string,
+  provider?: string,
 ): [Task<void>, _TTSGenerationData] {
   const audioStream = new IdentityTransform<AudioFrame>();
   const outputWriter = audioStream.writable.getWriter();
@@ -558,10 +580,27 @@ export function performTTSInference(
     }
   })();
 
-  const _performTTSInferenceImpl = async (signal: AbortSignal) => {
+  let ttfb: number | undefined;
+
+  const genData: _TTSGenerationData = {
+    audioStream: audioOutputStream,
+    timedTextsFut,
+    ttfb: undefined,
+  };
+
+  const _performTTSInferenceImpl = async (signal: AbortSignal, span: Span) => {
+    if (model) {
+      span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, model);
+    }
+    if (provider) {
+      span.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, provider);
+    }
+
     let ttsStreamReader: ReadableStreamDefaultReader<AudioFrame> | null = null;
     let ttsStream: ReadableStream<AudioFrame> | null = null;
     let pushedDuration = 0;
+    const startTime = performance.now() / 1000; // Convert to seconds
+    let firstByteReceived = false;
 
     try {
       ttsStream = await node(textOnlyStream.readable, modelSettings);
@@ -595,6 +634,13 @@ export function performTTSInference(
           break;
         }
 
+        if (!firstByteReceived) {
+          firstByteReceived = true;
+          ttfb = performance.now() / 1000 - startTime;
+          genData.ttfb = ttfb;
+          span.setAttribute(traceTypes.ATTR_RESPONSE_TTFB, ttfb);
+        }
+
         // Write the audio frame to the output stream
         await outputWriter.write(frame);
 
@@ -631,6 +677,10 @@ export function performTTSInference(
       }
       throw error;
     } finally {
+      if (!timedTextsFut.done) {
+        // Ensure downstream consumers don't hang on errors.
+        timedTextsFut.resolve(null);
+      }
       ttsStreamReader?.releaseLock();
       await ttsStream?.cancel();
       await outputWriter.close();
@@ -642,16 +692,11 @@ export function performTTSInference(
   const currentContext = otelContext.active();
 
   const inferenceTask = async (signal: AbortSignal) =>
-    tracer.startActiveSpan(async () => _performTTSInferenceImpl(signal), {
+    tracer.startActiveSpan(async (span) => _performTTSInferenceImpl(signal, span), {
       name: 'tts_node',
       context: currentContext,
     });
 
-  const genData: _TTSGenerationData = {
-    audioStream: audioOutputStream,
-    timedTextsFut,
-  };
-
   return [
     Task.from((controller) => inferenceTask(controller.signal), controller, 'performTTSInference'),
     genData,
@@ -719,7 +764,6 @@ export function performTextForwarding(
 
 export interface _AudioOut {
   audio: Array<AudioFrame>;
-  /** Future that will be set with the timestamp of the first frame's capture */
   firstFrameFut: Future<number>;
 }
 
@@ -807,7 +851,6 @@ export function performAudioForwarding(
   ];
 }
 
-// function_tool span is already implemented in tracableToolExecution below (line ~796)
 export function performToolExecutions({
   session,
   speechHandle,
diff --git a/agents/src/voice/index.ts b/agents/src/voice/index.ts
index 947013336..df3200cea 100644
--- a/agents/src/voice/index.ts
+++ b/agents/src/voice/index.ts
@@ -5,6 +5,16 @@ export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings }
 export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
 export * from './avatar/index.js';
 export * from './background_audio.js';
+export {
+  type TextInputCallback,
+  type TextInputEvent,
+  RemoteSession,
+  type RemoteSessionCallbacks,
+  type RemoteSessionEventTypes,
+  SessionHost,
+  SessionTransport,
+  RoomSessionTransport,
+} from './remote_session.js';
 export * from './events.js';
 export { type TimedString } from './io.js';
 export * from './report.js';
diff --git a/agents/src/voice/remote_session.ts b/agents/src/voice/remote_session.ts
new file mode 100644
index 000000000..a55e39a8c
--- /dev/null
+++ b/agents/src/voice/remote_session.ts
@@ -0,0 +1,1083 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { Timestamp } from '@bufbuild/protobuf';
+import { AgentSession as pb } from '@livekit/protocol';
+import type { ByteStreamReader, Room, TextStreamInfo } from '@livekit/rtc-node';
+import type { TypedEventEmitter } from '@livekit/typed-emitter';
+import EventEmitter from 'events';
+import { TOPIC_SESSION_MESSAGES } from '../constants.js';
+import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
+import type {
+  ChatItem,
+  FunctionCall as FCItem,
+  FunctionCallOutput as FCOItem,
+} from '../llm/chat_context.js';
+import type { ToolContext } from '../llm/tool_context.js';
+import { log } from '../log.js';
+import type {
+  InterruptionModelUsage,
+  LLMModelUsage,
+  STTModelUsage,
+  TTSModelUsage,
+} from '../metrics/model_usage.js';
+import { Future, Task, shortuuid } from '../utils.js';
+import type { AgentSession, AgentSessionUsage } from './agent_session.js';
+import {
+  AgentSessionEventTypes,
+  type AgentState,
+  type AgentStateChangedEvent,
+  type ConversationItemAddedEvent,
+  type ErrorEvent,
+  type FunctionToolsExecutedEvent,
+  type MetricsCollectedEvent,
+  type UserInputTranscribedEvent,
+  type UserState,
+  type UserStateChangedEvent,
+} from './events.js';
+import type { RoomIO } from './room_io/room_io.js';
+
+// ===========================================================================
+// Shared types (TextInput, Client event types, wire format aliases)
+// ===========================================================================
+
+export interface TextInputEvent {
+  text: string;
+  info?: TextStreamInfo;
+  participantIdentity?: string;
+}
+
+export type TextInputCallback = (session: AgentSession, ev: TextInputEvent) => void | Promise<void>;
+
+/** @experimental */
+export type RemoteSessionEventTypes =
+  | 'agent_state_changed'
+  | 'user_state_changed'
+  | 'conversation_item_added'
+  | 'user_input_transcribed'
+  | 'function_tools_executed'
+  | 'overlapping_speech'
+  | 'session_usage'
+  | 'error';
+
+/** @experimental */
+export type RemoteSessionCallbacks = {
+  agent_state_changed: (ev: pb.AgentSessionEvent_AgentStateChanged) => void;
+  user_state_changed: (ev: pb.AgentSessionEvent_UserStateChanged) => void;
+  conversation_item_added: (ev: pb.AgentSessionEvent_ConversationItemAdded) => void;
+  user_input_transcribed: (ev: pb.AgentSessionEvent_UserInputTranscribed) => void;
+  function_tools_executed: (ev: pb.AgentSessionEvent_FunctionToolsExecuted) => void;
+  overlapping_speech: (ev: pb.AgentSessionEvent_OverlappingSpeech) => void;
+  session_usage: (ev: pb.AgentSessionEvent_SessionUsageUpdated) => void;
+  error: (ev: pb.AgentSessionEvent_Error) => void;
+};
+
+// ===========================================================================
+// SessionTransport
+// ===========================================================================
+
+export abstract class SessionTransport {
+  async start(): Promise<void> {}
+  abstract sendMessage(msg: pb.AgentSessionMessage): Promise<void>;
+  abstract close(): Promise<void>;
+  abstract [Symbol.asyncIterator](): AsyncIterator<pb.AgentSessionMessage>;
+}
+
+export class RoomSessionTransport extends SessionTransport {
+  private readonly room: Room;
+  private handlerRegistered = false;
+  private closed = false;
+  private pendingMessages: pb.AgentSessionMessage[] = [];
+  private waitingResolve: ((value: IteratorResult<pb.AgentSessionMessage>) => void) | null = null;
+  private roomIO: RoomIO;
+
+  constructor(room: Room, roomIO: RoomIO) {
+    super();
+    this.room = room;
+    this.roomIO = roomIO;
+  }
+
+  private getRemoteIdentity() {
+    return this.roomIO.linkedParticipant?.identity;
+  }
+
+  override async start(): Promise<void> {
+    if (this.handlerRegistered) return;
+    this.room.registerByteStreamHandler(TOPIC_SESSION_MESSAGES, this.onByteStream);
+    this.handlerRegistered = true;
+  }
+
+  private onByteStream = (reader: ByteStreamReader, participantInfo: { identity: string }) => {
+    if (this.getRemoteIdentity() && participantInfo.identity !== this.getRemoteIdentity()) {
+      return;
+    }
+    this.readStream(reader).catch((e) => {
+      log().warn({ error: e }, 'failed to read binary stream message');
+    });
+  };
+
+  private async readStream(reader: ByteStreamReader): Promise<void> {
+    try {
+      const chunks = await reader.readAll();
+      let totalLength = 0;
+      for (const chunk of chunks) {
+        totalLength += chunk.length;
+      }
+      const data = new Uint8Array(totalLength);
+      let offset = 0;
+      for (const chunk of chunks) {
+        data.set(chunk, offset);
+        offset += chunk.length;
+      }
+      const msg = pb.AgentSessionMessage.fromBinary(data);
+      this.enqueue(msg);
+    } catch (e) {
+      if (!this.closed) {
+        log().warn({ error: e }, 'failed to parse binary stream message');
+      }
+    }
+  }
+
+  override async sendMessage(msg: pb.AgentSessionMessage): Promise<void> {
+    if (this.closed || !this.room.isConnected) return;
+
+    try {
+      const data = msg.toBinary();
+      const opts: Record<string, unknown> = {
+        topic: TOPIC_SESSION_MESSAGES,
+        name: shortuuid('AS_'),
+      };
+      const remoteIdentity = this.getRemoteIdentity();
+      if (remoteIdentity) {
+        opts.destinationIdentities = [remoteIdentity];
+      }
+      const writer = await this.room.localParticipant!.streamBytes(opts);
+      await writer.write(new Uint8Array(data));
+      await writer.close();
+    } catch (e) {
+      log().warn({ error: e }, 'failed to send binary stream message');
+    }
+  }
+
+  override async close(): Promise<void> {
+    if (this.closed) return;
+    this.closed = true;
+
+    if (this.handlerRegistered) {
+      try {
+        this.room.unregisterByteStreamHandler(TOPIC_SESSION_MESSAGES);
+      } catch (e) {
+        log().debug({ error: e }, 'byte stream handler already unregistered');
+      }
+      this.handlerRegistered = false;
+    }
+
+    if (this.waitingResolve) {
+      this.waitingResolve({
+        value: undefined as unknown as pb.AgentSessionMessage,
+        done: true,
+      });
+      this.waitingResolve = null;
+    }
+  }
+
+  private enqueue(msg: pb.AgentSessionMessage): void {
+    if (this.closed) return;
+
+    if (this.waitingResolve) {
+      const resolve = this.waitingResolve;
+      this.waitingResolve = null;
+      resolve({ value: msg, done: false });
+    } else {
+      this.pendingMessages.push(msg);
+    }
+  }
+
+  override [Symbol.asyncIterator](): AsyncIterator<pb.AgentSessionMessage> {
+    return {
+      next: (): Promise<IteratorResult<pb.AgentSessionMessage>> => {
+        if (this.closed && this.pendingMessages.length === 0) {
+          return Promise.resolve({
+            value: undefined as unknown as pb.AgentSessionMessage,
+            done: true,
+          });
+        }
+
+        const pending = this.pendingMessages.shift();
+        if (pending) {
+          return Promise.resolve({ value: pending, done: false });
+        }
+
+        return new Promise<IteratorResult<pb.AgentSessionMessage>>((resolve) => {
+          this.waitingResolve = resolve;
+        });
+      },
+      return: (): Promise<IteratorResult<pb.AgentSessionMessage>> => {
+        this.close();
+        return Promise.resolve({
+          value: undefined as unknown as pb.AgentSessionMessage,
+          done: true,
+        });
+      },
+    };
+  }
+}
+
+// ===========================================================================
+// Enum maps
+// ===========================================================================
+const AGENT_STATE_MAP: Record<AgentState, pb.AgentState> = {
+  initializing: pb.AgentState.AS_INITIALIZING,
+  idle: pb.AgentState.AS_IDLE,
+  listening: pb.AgentState.AS_LISTENING,
+  thinking: pb.AgentState.AS_THINKING,
+  speaking: pb.AgentState.AS_SPEAKING,
+};
+
+const USER_STATE_MAP: Record<UserState, pb.UserState> = {
+  speaking: pb.UserState.US_SPEAKING,
+  listening: pb.UserState.US_LISTENING,
+  away: pb.UserState.US_AWAY,
+};
+
+// ===========================================================================
+// Chat item / timestamp conversion helpers
+// ===========================================================================
+function msToTimestamp(ms: number): Timestamp {
+  return Timestamp.fromDate(new Date(ms));
+}
+
+function nowTimestamp(): Timestamp {
+  return Timestamp.fromDate(new Date());
+}
+
+function chatItemToProto(item: ChatItem): pb.ChatContext_ChatItem {
+  switch (item.type) {
+    case 'message': {
+      const msg = item;
+      const roleMap: Record<string, pb.ChatRole> = {
+        developer: pb.ChatRole.DEVELOPER,
+        system: pb.ChatRole.SYSTEM,
+        user: pb.ChatRole.USER,
+        assistant: pb.ChatRole.ASSISTANT,
+      };
+      const content: pb.ChatMessage_ChatContent[] = [];
+      for (const c of msg.content) {
+        if (typeof c === 'string') {
+          content.push(new pb.ChatMessage_ChatContent({ payload: { case: 'text', value: c } }));
+        }
+      }
+
+      const metricsReport = new pb.MetricsReport();
+      if (msg.metrics.transcriptionDelay !== undefined)
+        metricsReport.transcriptionDelay = msg.metrics.transcriptionDelay;
+      if (msg.metrics.endOfTurnDelay !== undefined)
+        metricsReport.endOfTurnDelay = msg.metrics.endOfTurnDelay;
+      if (msg.metrics.onUserTurnCompletedDelay !== undefined)
+        metricsReport.onUserTurnCompletedDelay = msg.metrics.onUserTurnCompletedDelay;
+      if (msg.metrics.llmNodeTtft !== undefined)
+        metricsReport.llmNodeTtft = msg.metrics.llmNodeTtft;
+      if (msg.metrics.ttsNodeTtfb !== undefined)
+        metricsReport.ttsNodeTtfb = msg.metrics.ttsNodeTtfb;
+      if (msg.metrics.e2eLatency !== undefined) metricsReport.e2eLatency = msg.metrics.e2eLatency;
+
+      const pbMsg = new pb.ChatMessage({
+        id: msg.id,
+        role: roleMap[msg.role] ?? pb.ChatRole.ASSISTANT,
+        content,
+        interrupted: msg.interrupted,
+        metrics: metricsReport,
+        createdAt: msToTimestamp(msg.createdAt),
+      });
+      if (msg.transcriptConfidence !== undefined) {
+        pbMsg.transcriptConfidence = msg.transcriptConfidence;
+      }
+      return new pb.ChatContext_ChatItem({ item: { case: 'message', value: pbMsg } });
+    }
+    case 'function_call': {
+      const fc = item;
+      return new pb.ChatContext_ChatItem({
+        item: {
+          case: 'functionCall',
+          value: new pb.FunctionCall({
+            id: fc.id,
+            callId: fc.callId,
+            name: fc.name,
+            arguments: fc.args,
+            createdAt: msToTimestamp(fc.createdAt),
+          }),
+        },
+      });
+    }
+    case 'function_call_output': {
+      const fco = item;
+      return new pb.ChatContext_ChatItem({
+        item: {
+          case: 'functionCallOutput',
+          value: new pb.FunctionCallOutput({
+            id: fco.id,
+            callId: fco.callId,
+            name: fco.name,
+            output: fco.output,
+            isError: fco.isError,
+            createdAt: msToTimestamp(fco.createdAt),
+          }),
+        },
+      });
+    }
+    case 'agent_handoff': {
+      const ah = item;
+      return new pb.ChatContext_ChatItem({
+        item: {
+          case: 'agentHandoff',
+          value: new pb.AgentHandoff({
+            id: ah.id,
+            oldAgentId: ah.oldAgentId,
+            newAgentId: ah.newAgentId,
+            createdAt: msToTimestamp(ah.createdAt),
+          }),
+        },
+      });
+    }
+  }
+}
+
+// ===========================================================================
+// Usage conversion helpers
+// ===========================================================================
+function sessionUsageToProto(usage: AgentSessionUsage): pb.AgentSessionUsage {
+  const modelUsages: pb.ModelUsage[] = [];
+  for (const mu of usage.modelUsage) {
+    switch (mu.type) {
+      case 'llm_usage': {
+        const lu = mu as Partial<LLMModelUsage>;
+        modelUsages.push(
+          new pb.ModelUsage({
+            usage: {
+              case: 'llm',
+              value: new pb.LLMModelUsage({
+                provider: lu.provider ?? '',
+                model: lu.model ?? '',
+                inputTokens: lu.inputTokens ?? 0,
+                inputCachedTokens: lu.inputCachedTokens ?? 0,
+                inputAudioTokens: lu.inputAudioTokens ?? 0,
+                inputCachedAudioTokens: lu.inputCachedAudioTokens ?? 0,
+                inputTextTokens: lu.inputTextTokens ?? 0,
+                inputCachedTextTokens: lu.inputCachedTextTokens ?? 0,
+                inputImageTokens: lu.inputImageTokens ?? 0,
+                inputCachedImageTokens: lu.inputCachedImageTokens ?? 0,
+                outputTokens: lu.outputTokens ?? 0,
+                outputAudioTokens: lu.outputAudioTokens ?? 0,
+                outputTextTokens: lu.outputTextTokens ?? 0,
+                sessionDuration: (lu.sessionDurationMs ?? 0) / 1000,
+              }),
+            },
+          }),
+        );
+        break;
+      }
+      case 'tts_usage': {
+        const tu = mu as Partial<TTSModelUsage>;
+        modelUsages.push(
+          new pb.ModelUsage({
+            usage: {
+              case: 'tts',
+              value: new pb.TTSModelUsage({
+                provider: tu.provider ?? '',
+                model: tu.model ?? '',
+                inputTokens: tu.inputTokens ?? 0,
+                outputTokens: tu.outputTokens ?? 0,
+                charactersCount: tu.charactersCount ?? 0,
+                audioDuration: (tu.audioDurationMs ?? 0) / 1000,
+              }),
+            },
+          }),
+        );
+        break;
+      }
+      case 'stt_usage': {
+        const su = mu as Partial<STTModelUsage>;
+        modelUsages.push(
+          new pb.ModelUsage({
+            usage: {
+              case: 'stt',
+              value: new pb.STTModelUsage({
+                provider: su.provider ?? '',
+                model: su.model ?? '',
+                inputTokens: su.inputTokens ?? 0,
+                outputTokens: su.outputTokens ?? 0,
+                audioDuration: (su.audioDurationMs ?? 0) / 1000,
+              }),
+            },
+          }),
+        );
+        break;
+      }
+      case 'interruption_usage': {
+        const iu = mu as Partial<InterruptionModelUsage>;
+        modelUsages.push(
+          new pb.ModelUsage({
+            usage: {
+              case: 'interruption',
+              value: new pb.InterruptionModelUsage({
+                provider: iu.provider ?? '',
+                model: iu.model ?? '',
+                totalRequests: iu.totalRequests ?? 0,
+              }),
+            },
+          }),
+        );
+        break;
+      }
+    }
+  }
+  return new pb.AgentSessionUsage({ modelUsage: modelUsages });
+}
+
+function toolNames(toolCtx: ToolContext | undefined): string[] {
+  if (!toolCtx) return [];
+  return Object.keys(toolCtx);
+}
+
+function protoSerializeOptions(opts: {
+  turnHandling?: { endpointing?: unknown; interruption?: unknown };
+  maxToolSteps?: number;
+  userAwayTimeout?: number | null;
+  preemptiveGeneration?: boolean;
+  useTtsAlignedTranscript?: boolean;
+}): Record<string, string> {
+  return {
+    endpointing: JSON.stringify(opts.turnHandling?.endpointing ?? {}),
+    interruption: JSON.stringify(opts.turnHandling?.interruption ?? {}),
+    max_tool_steps: String(opts.maxToolSteps ?? 0),
+    user_away_timeout: String(opts.userAwayTimeout ?? ''),
+    preemptive_generation: String(opts.preemptiveGeneration ?? false),
+    use_tts_aligned_transcript: String(opts.useTtsAlignedTranscript ?? false),
+  };
+}
+
+// ===========================================================================
+// SessionHost (protobuf-based server-side handler)
+// ===========================================================================
+export class SessionHost {
+  private readonly transport: SessionTransport;
+  private session: AgentSession | undefined;
+  private started = false;
+  private eventsRegistered = false;
+  private recvTask: Task<void> | undefined;
+  private readonly tasks = new Set<Task<void>>();
+  private textInputCb: TextInputCallback | undefined;
+
+  constructor(transport: SessionTransport) {
+    this.transport = transport;
+  }
+
+  registerSession(session: AgentSession): void {
+    this.session = session;
+    if (!this.eventsRegistered) {
+      this.eventsRegistered = true;
+      session.on(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
+      session.on(AgentSessionEventTypes.UserStateChanged, this.onUserStateChanged);
+      session.on(AgentSessionEventTypes.ConversationItemAdded, this.onConversationItemAdded);
+      session.on(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
+      session.on(AgentSessionEventTypes.FunctionToolsExecuted, this.onFunctionToolsExecuted);
+      session.on(AgentSessionEventTypes.MetricsCollected, this.onMetricsCollected);
+      session.on(AgentSessionEventTypes.OverlappingSpeech, this.onOverlappingSpeech);
+      session.on(AgentSessionEventTypes.Error, this.onHostError);
+    }
+  }
+
+  registerTextInput(textInputCb: TextInputCallback): void {
+    this.textInputCb = textInputCb;
+  }
+
+  async start(): Promise<void> {
+    if (this.started) return;
+    this.started = true;
+    await this.transport.start();
+    this.recvTask = Task.from(async () => this.recvLoop());
+  }
+
+  async close(): Promise<void> {
+    if (!this.started) return;
+    this.started = false;
+
+    if (this.session && this.eventsRegistered) {
+      this.eventsRegistered = false;
+      this.session.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
+      this.session.off(AgentSessionEventTypes.UserStateChanged, this.onUserStateChanged);
+      this.session.off(AgentSessionEventTypes.ConversationItemAdded, this.onConversationItemAdded);
+      this.session.off(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
+      this.session.off(AgentSessionEventTypes.FunctionToolsExecuted, this.onFunctionToolsExecuted);
+      this.session.off(AgentSessionEventTypes.MetricsCollected, this.onMetricsCollected);
+      this.session.off(AgentSessionEventTypes.OverlappingSpeech, this.onOverlappingSpeech);
+      this.session.off(AgentSessionEventTypes.Error, this.onHostError);
+    }
+
+    if (this.recvTask) {
+      this.recvTask.cancel();
+    }
+
+    await Promise.allSettled([...this.tasks].map((task) => task.cancelAndWait()));
+    this.tasks.clear();
+
+    await this.transport.close();
+  }
+
+  private async recvLoop(): Promise<void> {
+    try {
+      for await (const msg of this.transport) {
+        if (msg.message.case === 'request') {
+          if (this.session) {
+            this.trackTask(
+              Task.from(async () => this.handleRequestSafe(msg.message.value as pb.SessionRequest)),
+            );
+          }
+        }
+      }
+    } catch (e) {
+      if (this.started) {
+        log().warn({ error: e }, 'error processing session message');
+      }
+    }
+  }
+
+  private sendEvent(event: pb.AgentSessionEvent): void {
+    const msg = new pb.AgentSessionMessage({
+      message: { case: 'event', value: event },
+    });
+    this.trackTask(Task.from(async () => this.transport.sendMessage(msg)));
+  }
+
+  private emitEvent<Event extends pb.AgentSessionEvent['event']>(
+    event: Event,
+    createdAt?: number,
+  ): void {
+    this.sendEvent(
+      new pb.AgentSessionEvent({
+        createdAt: createdAt ? msToTimestamp(createdAt) : nowTimestamp(),
+        event: event,
+      }),
+    );
+  }
+
+  private onAgentStateChanged = (event: AgentStateChangedEvent): void => {
+    this.emitEvent(
+      {
+        case: 'agentStateChanged',
+        value: new pb.AgentSessionEvent_AgentStateChanged({
+          oldState: AGENT_STATE_MAP[event.oldState],
+          newState: AGENT_STATE_MAP[event.newState],
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onUserStateChanged = (event: UserStateChangedEvent): void => {
+    this.emitEvent(
+      {
+        case: 'userStateChanged',
+        value: new pb.AgentSessionEvent_UserStateChanged({
+          oldState: USER_STATE_MAP[event.oldState],
+          newState: USER_STATE_MAP[event.newState],
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onUserInputTranscribed = (event: UserInputTranscribedEvent): void => {
+    this.emitEvent(
+      {
+        case: 'userInputTranscribed',
+        value: new pb.AgentSessionEvent_UserInputTranscribed({
+          transcript: event.transcript,
+          isFinal: event.isFinal,
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onConversationItemAdded = (event: ConversationItemAddedEvent): void => {
+    this.emitEvent(
+      {
+        case: 'conversationItemAdded',
+        value: new pb.AgentSessionEvent_ConversationItemAdded({
+          item: chatItemToProto(event.item),
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onFunctionToolsExecuted = (event: FunctionToolsExecutedEvent): void => {
+    const pbCalls = event.functionCalls.map(
+      (fc: FCItem) => new pb.FunctionCall({ name: fc.name, arguments: fc.args, callId: fc.callId }),
+    );
+    const pbOutputs = event.functionCallOutputs
+      .filter((fco): fco is FCOItem => fco != null)
+      .map(
+        (fco: FCOItem) =>
+          new pb.FunctionCallOutput({
+            callId: fco.callId,
+            output: fco.output,
+            isError: fco.isError,
+          }),
+      );
+    this.emitEvent(
+      {
+        case: 'functionToolsExecuted',
+        value: new pb.AgentSessionEvent_FunctionToolsExecuted({
+          functionCalls: pbCalls,
+          functionCallOutputs: pbOutputs,
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onOverlappingSpeech = (event: OverlappingSpeechEvent): void => {
+    const value = new pb.AgentSessionEvent_OverlappingSpeech({
+      isInterruption: event.isInterruption,
+      detectionDelay: event.detectionDelayInS,
+      detectedAt: msToTimestamp(event.detectedAt),
+    });
+    if (event.overlapStartedAt != null) {
+      value.overlapStartedAt = msToTimestamp(event.overlapStartedAt);
+    }
+    this.emitEvent({ case: 'overlappingSpeech', value });
+  };
+
+  private onMetricsCollected = (event: MetricsCollectedEvent): void => {
+    if (!this.session) return;
+    this.emitEvent(
+      {
+        case: 'sessionUsageUpdated',
+        value: new pb.AgentSessionEvent_SessionUsageUpdated({
+          usage: sessionUsageToProto(this.session.usage),
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private onHostError = (event: ErrorEvent): void => {
+    this.emitEvent(
+      {
+        case: 'error',
+        value: new pb.AgentSessionEvent_Error({
+          message: event.error ? String(event.error) : 'Unknown error',
+        }),
+      },
+      event.createdAt,
+    );
+  };
+
+  private async handleRequestSafe(req: pb.SessionRequest): Promise<void> {
+    try {
+      await this.handleRequest(req);
+    } catch (e) {
+      log().warn({ error: e, requestId: req.requestId }, 'error handling session request');
+      try {
+        const resp = new pb.AgentSessionMessage({
+          message: {
+            case: 'response',
+            value: new pb.SessionResponse({
+              requestId: req.requestId,
+              error: 'internal error',
+            }),
+          },
+        });
+        await this.transport.sendMessage(resp);
+      } catch (e) {
+        log().debug({ error: e }, 'failed to send error response');
+      }
+    }
+  }
+
+  private async handleRequest(req: pb.SessionRequest): Promise<void> {
+    if (!this.session) return;
+
+    switch (req.request.case) {
+      case 'ping':
+        return this.sendResponse(req.requestId, {
+          case: 'pong',
+          value: new pb.SessionResponse_Pong(),
+        });
+      case 'getChatHistory':
+        return this.handleGetChatHistory(req.requestId);
+      case 'getAgentInfo':
+        return this.handleGetAgentInfo(req.requestId);
+      case 'runInput':
+        return this.handleRunInput(req.requestId, req.request.value);
+      case 'getSessionState':
+        return this.handleGetSessionState(req.requestId);
+      case 'getRtcStats':
+        return this.sendResponse(req.requestId, {
+          case: 'getRtcStats',
+          value: new pb.SessionResponse_GetRTCStatsResponse({
+            publisherStats: [],
+            subscriberStats: [],
+          }),
+        });
+      case 'getSessionUsage':
+        return this.handleGetSessionUsage(req.requestId);
+    }
+  }
+
+  private async handleGetChatHistory(requestId: string): Promise<void> {
+    const items = this.session!.history.items.map(chatItemToProto);
+    return this.sendResponse(requestId, {
+      case: 'getChatHistory',
+      value: new pb.SessionResponse_GetChatHistoryResponse({ items }),
+    });
+  }
+
+  private async handleGetAgentInfo(requestId: string): Promise<void> {
+    const agent = this.session!.currentAgent;
+    return this.sendResponse(requestId, {
+      case: 'getAgentInfo',
+      value: new pb.SessionResponse_GetAgentInfoResponse({
+        id: agent.id,
+        instructions: agent.instructions,
+        tools: toolNames(agent.toolCtx),
+        chatCtx: agent.chatCtx.items.map(chatItemToProto),
+      }),
+    });
+  }
+
+  private async handleRunInput(
+    requestId: string,
+    input: pb.SessionRequest_RunInput,
+  ): Promise<void> {
+    const text = input.text;
+    let items: pb.ChatContext_ChatItem[] = [];
+    let error: string | undefined;
+
+    if (text) {
+      if (this.textInputCb) {
+        const cbResult = this.textInputCb(this.session!, { text });
+        if (cbResult instanceof Promise) {
+          await cbResult;
+        }
+      } else {
+        try {
+          await this.session!.interrupt({ force: true }).await;
+        } catch {
+          // ignore
+        }
+
+        const result = this.session!.run({ userInput: text });
+        try {
+          await result.wait();
+        } catch (e) {
+          error = e instanceof Error ? e.message : String(e);
+        }
+        items = result.events.map((ev) => chatItemToProto(ev.item));
+      }
+    }
+
+    return this.sendResponse(
+      requestId,
+      {
+        case: 'runInput',
+        value: new pb.SessionResponse_RunInputResponse({ items }),
+      },
+      error,
+    );
+  }
+
+  private async handleGetSessionState(requestId: string): Promise<void> {
+    const agent = this.session!.currentAgent;
+    const startedAt = this.session!._startedAt ?? Date.now();
+    return this.sendResponse(requestId, {
+      case: 'getSessionState',
+      value: new pb.SessionResponse_GetSessionStateResponse({
+        agentState: AGENT_STATE_MAP[this.session!.agentState],
+        userState: USER_STATE_MAP[this.session!.userState],
+        agentId: agent.id,
+        options: protoSerializeOptions({
+          turnHandling: this.session!.sessionOptions.turnHandling,
+          maxToolSteps: this.session!.sessionOptions.maxToolSteps,
+          userAwayTimeout: this.session!.sessionOptions.userAwayTimeout,
+          preemptiveGeneration: this.session!.sessionOptions.preemptiveGeneration,
+          useTtsAlignedTranscript: this.session!.sessionOptions.useTtsAlignedTranscript,
+        }),
+        createdAt: msToTimestamp(startedAt),
+      }),
+    });
+  }
+
+  private async handleGetSessionUsage(requestId: string): Promise<void> {
+    return this.sendResponse(requestId, {
+      case: 'getSessionUsage',
+      value: new pb.SessionResponse_GetSessionUsageResponse({
+        usage: sessionUsageToProto(this.session!.usage),
+        createdAt: nowTimestamp(),
+      }),
+    });
+  }
+
+  private async sendResponse(
+    requestId: string,
+    response: pb.SessionResponse['response'],
+    error?: string,
+  ): Promise<void> {
+    await this.transport.sendMessage(
+      new pb.AgentSessionMessage({
+        message: {
+          case: 'response',
+          value: new pb.SessionResponse({ requestId, response, error }),
+        },
+      }),
+    );
+  }
+
+  private trackTask(task: Task<void>): void {
+    this.tasks.add(task);
+    task.addDoneCallback(() => {
+      this.tasks.delete(task);
+    });
+  }
+}
+
+// ===========================================================================
+// RemoteSession (protobuf-based client-side interface)
+// ===========================================================================
+
+/** @experimental */
+export class RemoteSession extends (EventEmitter as new () => TypedEventEmitter<RemoteSessionCallbacks>) {
+  private readonly transport: SessionTransport;
+  private started = false;
+
+  private readonly tasks = new Set<Task<void>>();
+  private readonly pendingRequests = new Map<string, Future<pb.SessionResponse>>();
+  private recvTask: Task<void> | undefined;
+  private readonly _logger = log();
+
+  constructor(transport: SessionTransport) {
+    super();
+    this.transport = transport;
+  }
+
+  static fromRoom(room: Room, roomIO: RoomIO): RemoteSession {
+    const transport = new RoomSessionTransport(room, roomIO);
+    return new RemoteSession(transport);
+  }
+
+  async start(): Promise<void> {
+    if (this.started) return;
+    this.started = true;
+    await this.transport.start();
+    this.recvTask = Task.from(async () => this.recvLoop());
+  }
+
+  async close(): Promise<void> {
+    if (!this.started) return;
+    this.started = false;
+
+    if (this.recvTask) {
+      this.recvTask.cancel();
+    }
+
+    for (const pending of this.pendingRequests.values()) {
+      pending.reject(new Error('RemoteSession closed'));
+    }
+    this.pendingRequests.clear();
+
+    for (const task of this.tasks) {
+      task.cancel();
+    }
+    this.tasks.clear();
+
+    await this.transport.close();
+  }
+
+  private async recvLoop(): Promise<void> {
+    try {
+      for await (const msg of this.transport) {
+        switch (msg.message.case) {
+          case 'event':
+            this.dispatchEvent(msg.message.value);
+            break;
+          case 'response':
+            this.dispatchResponse(msg.message.value);
+            break;
+        }
+      }
+    } catch (e) {
+      if (this.started) {
+        this._logger.warn({ error: e }, 'error in RemoteSession recv loop');
+      }
+    }
+  }
+
+  private dispatchEvent(event: pb.AgentSessionEvent): void {
+    const ev = event.event;
+    switch (ev.case) {
+      case 'agentStateChanged':
+        this.emit('agent_state_changed', ev.value);
+        break;
+      case 'userStateChanged':
+        this.emit('user_state_changed', ev.value);
+        break;
+      case 'userInputTranscribed':
+        this.emit('user_input_transcribed', ev.value);
+        break;
+      case 'conversationItemAdded':
+        this.emit('conversation_item_added', ev.value);
+        break;
+      case 'functionToolsExecuted':
+        this.emit('function_tools_executed', ev.value);
+        break;
+      case 'overlappingSpeech':
+        this.emit('overlapping_speech', ev.value);
+        break;
+      case 'sessionUsageUpdated':
+        this.emit('session_usage', ev.value);
+        break;
+      case 'error':
+        this.emit('error', ev.value);
+        break;
+    }
+  }
+
+  private dispatchResponse(response: pb.SessionResponse): void {
+    const future = this.pendingRequests.get(response.requestId);
+    this.pendingRequests.delete(response.requestId);
+    if (future && !future.done) {
+      future.resolve(response);
+    }
+  }
+
+  private async sendRequest(
+    buildReq: (requestId: string) => pb.SessionRequest,
+    timeout = 60000,
+  ): Promise<pb.SessionResponse> {
+    const requestId = shortuuid('req_');
+    const req = buildReq(requestId);
+    req.requestId = requestId;
+
+    const future = new Future<pb.SessionResponse>();
+    this.pendingRequests.set(requestId, future);
+
+    const msg = new pb.AgentSessionMessage({
+      message: { case: 'request', value: req },
+    });
+    await this.transport.sendMessage(msg);
+
+    const timer = setTimeout(() => {
+      if (!future.done) {
+        this.pendingRequests.delete(requestId);
+        future.reject(new Error('RemoteSession request timed out'));
+      }
+    }, timeout);
+
+    try {
+      const response = await future.await;
+      if (response.error) {
+        throw new Error(response.error);
+      }
+      return response;
+    } finally {
+      clearTimeout(timer);
+    }
+  }
+
+  async fetchSessionState(): Promise<pb.SessionResponse_GetSessionStateResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'getSessionState', value: new pb.SessionRequest_GetSessionState() },
+        }),
+    );
+    if (resp.response.case !== 'getSessionState') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  async fetchChatHistory(): Promise<pb.SessionResponse_GetChatHistoryResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'getChatHistory', value: new pb.SessionRequest_GetChatHistory() },
+        }),
+    );
+    if (resp.response.case !== 'getChatHistory') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  async fetchAgentInfo(): Promise<pb.SessionResponse_GetAgentInfoResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'getAgentInfo', value: new pb.SessionRequest_GetAgentInfo() },
+        }),
+    );
+    if (resp.response.case !== 'getAgentInfo') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  async sendMessage(
+    text: string,
+    responseTimeout = 60000,
+  ): Promise<pb.SessionResponse_RunInputResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'runInput', value: new pb.SessionRequest_RunInput({ text }) },
+        }),
+      responseTimeout,
+    );
+    if (resp.response.case !== 'runInput') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  async fetchRtcStats(): Promise<pb.SessionResponse_GetRTCStatsResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'getRtcStats', value: new pb.SessionRequest_GetRTCStats() },
+        }),
+    );
+    if (resp.response.case !== 'getRtcStats') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  async fetchSessionUsage(): Promise<pb.SessionResponse_GetSessionUsageResponse> {
+    const resp = await this.sendRequest(
+      (id) =>
+        new pb.SessionRequest({
+          requestId: id,
+          request: { case: 'getSessionUsage', value: new pb.SessionRequest_GetSessionUsage() },
+        }),
+    );
+    if (resp.response.case !== 'getSessionUsage') {
+      throw new Error('unexpected response type');
+    }
+    return resp.response.value;
+  }
+
+  private trackTask(task: Task<void>): void {
+    this.tasks.add(task);
+    task.addDoneCallback(() => {
+      this.tasks.delete(task);
+    });
+  }
+}
diff --git a/agents/src/voice/report.test.ts b/agents/src/voice/report.test.ts
new file mode 100644
index 000000000..a774dfd40
--- /dev/null
+++ b/agents/src/voice/report.test.ts
@@ -0,0 +1,136 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { describe, expect, it } from 'vitest';
+import { ChatContext } from '../llm/chat_context.js';
+import type { AgentSessionOptions, VoiceOptions } from './agent_session.js';
+import { createSessionReport, sessionReportToJSON } from './report.js';
+
+type ReportOptions = AgentSessionOptions & Partial<VoiceOptions>;
+
+function baseOptions(): ReportOptions {
+  return {
+    maxToolSteps: 3,
+    preemptiveGeneration: false,
+    userAwayTimeout: 15,
+    useTtsAlignedTranscript: true,
+    turnHandling: {},
+  };
+}
+
+function serializeOptions(options: ReportOptions) {
+  const report = createSessionReport({
+    jobId: 'job',
+    roomId: 'room-id',
+    room: 'room',
+    options,
+    events: [],
+    chatHistory: ChatContext.empty(),
+    enableRecording: false,
+    timestamp: 0,
+    startedAt: 0,
+  });
+
+  const payload = sessionReportToJSON(report);
+  return payload.options as Record<string, unknown>;
+}
+
+describe('sessionReportToJSON', () => {
+  it('serializes interruption and endpointing values from turnHandling', () => {
+    const options = baseOptions();
+    options.turnHandling = {
+      interruption: {
+        mode: 'adaptive',
+        discardAudioIfUninterruptible: false,
+        minDuration: 1200,
+        minWords: 2,
+      },
+      endpointing: {
+        minDelay: 900,
+        maxDelay: 4500,
+      },
+    };
+
+    const serialized = serializeOptions(options);
+    expect(serialized).toMatchObject({
+      allow_interruptions: true,
+      discard_audio_if_uninterruptible: false,
+      min_interruption_duration: 1200,
+      min_interruption_words: 2,
+      min_endpointing_delay: 900,
+      max_endpointing_delay: 4500,
+      max_tool_steps: 3,
+    });
+  });
+
+  it('prefers turnHandling values over deprecated flat fields', () => {
+    const options = baseOptions();
+    options.allowInterruptions = false;
+    options.discardAudioIfUninterruptible = true;
+    options.minInterruptionDuration = 400;
+    options.minInterruptionWords = 1;
+    options.minEndpointingDelay = 500;
+    options.maxEndpointingDelay = 2500;
+    options.turnHandling = {
+      interruption: {
+        mode: 'vad',
+        discardAudioIfUninterruptible: false,
+        minDuration: 1400,
+        minWords: 4,
+      },
+      endpointing: {
+        minDelay: 700,
+        maxDelay: 3900,
+      },
+    };
+
+    const serialized = serializeOptions(options);
+    expect(serialized).toMatchObject({
+      allow_interruptions: true,
+      discard_audio_if_uninterruptible: false,
+      min_interruption_duration: 1400,
+      min_interruption_words: 4,
+      min_endpointing_delay: 700,
+      max_endpointing_delay: 3900,
+      max_tool_steps: 3,
+    });
+  });
+
+  it('serializes allow_interruptions from interruption.enabled when present', () => {
+    const options = baseOptions();
+    options.allowInterruptions = true;
+    options.turnHandling = {
+      interruption: {
+        enabled: false,
+        mode: 'adaptive',
+      },
+    };
+
+    const serialized = serializeOptions(options);
+    expect(serialized).toMatchObject({
+      allow_interruptions: false,
+      max_tool_steps: 3,
+    });
+  });
+
+  it('falls back to deprecated flat fields when turnHandling values are absent', () => {
+    const options = baseOptions();
+    options.allowInterruptions = false;
+    options.discardAudioIfUninterruptible = false;
+    options.minInterruptionDuration = 600;
+    options.minInterruptionWords = 3;
+    options.minEndpointingDelay = 1000;
+    options.maxEndpointingDelay = 5000;
+
+    const serialized = serializeOptions(options);
+    expect(serialized).toMatchObject({
+      allow_interruptions: false,
+      discard_audio_if_uninterruptible: false,
+      min_interruption_duration: 600,
+      min_interruption_words: 3,
+      min_endpointing_delay: 1000,
+      max_endpointing_delay: 5000,
+      max_tool_steps: 3,
+    });
+  });
+});
diff --git a/agents/src/voice/report.ts b/agents/src/voice/report.ts
index 49701a696..bf3fb1474 100644
--- a/agents/src/voice/report.ts
+++ b/agents/src/voice/report.ts
@@ -2,14 +2,17 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import type { ChatContext } from '../llm/chat_context.js';
-import type { VoiceOptions } from './agent_session.js';
+import { type ModelUsage, filterZeroValues } from '../metrics/model_usage.js';
+import type { AgentSessionOptions, VoiceOptions } from './agent_session.js';
 import type { AgentEvent } from './events.js';
 
+type ReportOptions = AgentSessionOptions & Partial<VoiceOptions>;
+
 export interface SessionReport {
   jobId: string;
   roomId: string;
   room: string;
-  options: VoiceOptions;
+  options: ReportOptions;
   events: AgentEvent[];
   chatHistory: ChatContext;
   enableRecording: boolean;
@@ -23,13 +26,15 @@ export interface SessionReport {
   audioRecordingStartedAt?: number;
   /** Duration of the session in milliseconds */
   duration?: number;
+  /** Usage summaries for the session, one per model/provider combination */
+  modelUsage?: ModelUsage[];
 }
 
 export interface SessionReportOptions {
   jobId: string;
   roomId: string;
   room: string;
-  options: VoiceOptions;
+  options: ReportOptions;
   events: AgentEvent[];
   chatHistory: ChatContext;
   enableRecording?: boolean;
@@ -41,6 +46,8 @@ export interface SessionReportOptions {
   audioRecordingPath?: string;
   /** Timestamp when the audio recording started (milliseconds) */
   audioRecordingStartedAt?: number;
+  /** Usage summaries for the session, one per model/provider combination */
+  modelUsage?: ModelUsage[];
 }
 
 export function createSessionReport(opts: SessionReportOptions): SessionReport {
@@ -61,6 +68,7 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
     audioRecordingStartedAt,
     duration:
       audioRecordingStartedAt !== undefined ? timestamp - audioRecordingStartedAt : undefined,
+    modelUsage: opts.modelUsage,
   };
 }
 
@@ -70,6 +78,37 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
 //   - Uploads to LiveKit Cloud observability endpoint with JWT auth
 export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
   const events: Record<string, unknown>[] = [];
+  const options = report.options;
+  const interruptionConfig = options.turnHandling?.interruption;
+  const endpointingConfig = options.turnHandling?.endpointing;
+
+  // Keep backwards compatibility with deprecated fields
+  const allowInterruptions =
+    interruptionConfig?.enabled !== undefined
+      ? interruptionConfig.enabled
+      : interruptionConfig?.mode !== undefined
+        ? true
+        : options.allowInterruptions ?? options.voiceOptions?.allowInterruptions;
+  const discardAudioIfUninterruptible =
+    interruptionConfig?.discardAudioIfUninterruptible ??
+    options.discardAudioIfUninterruptible ??
+    options.voiceOptions?.discardAudioIfUninterruptible;
+  const minInterruptionDuration =
+    interruptionConfig?.minDuration ??
+    options.minInterruptionDuration ??
+    options.voiceOptions?.minInterruptionDuration;
+  const minInterruptionWords =
+    interruptionConfig?.minWords ??
+    options.minInterruptionWords ??
+    options.voiceOptions?.minInterruptionWords;
+  const minEndpointingDelay =
+    endpointingConfig?.minDelay ??
+    options.minEndpointingDelay ??
+    options.voiceOptions?.minEndpointingDelay;
+  const maxEndpointingDelay =
+    endpointingConfig?.maxDelay ??
+    options.maxEndpointingDelay ??
+    options.voiceOptions?.maxEndpointingDelay;
 
   for (const event of report.events) {
     if (event.type === 'metrics_collected') {
@@ -85,16 +124,17 @@ export function sessionReportToJSON(report: SessionReport): Record<string, unkno
     room: report.room,
     events,
     options: {
-      allow_interruptions: report.options.allowInterruptions,
-      discard_audio_if_uninterruptible: report.options.discardAudioIfUninterruptible,
-      min_interruption_duration: report.options.minInterruptionDuration,
-      min_interruption_words: report.options.minInterruptionWords,
-      min_endpointing_delay: report.options.minEndpointingDelay,
-      max_endpointing_delay: report.options.maxEndpointingDelay,
-      max_tool_steps: report.options.maxToolSteps,
+      allow_interruptions: allowInterruptions,
+      discard_audio_if_uninterruptible: discardAudioIfUninterruptible,
+      min_interruption_duration: minInterruptionDuration,
+      min_interruption_words: minInterruptionWords,
+      min_endpointing_delay: minEndpointingDelay,
+      max_endpointing_delay: maxEndpointingDelay,
+      max_tool_steps: options.maxToolSteps,
     },
     chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
     enable_user_data_training: report.enableRecording,
     timestamp: report.timestamp,
+    usage: report.modelUsage ? report.modelUsage.map(filterZeroValues) : null,
   };
 }
diff --git a/agents/src/voice/room_io/room_io.ts b/agents/src/voice/room_io/room_io.ts
index 1d16d8483..9c15f1da2 100644
--- a/agents/src/voice/room_io/room_io.ts
+++ b/agents/src/voice/room_io/room_io.ts
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+import type { TextStreamReader } from '@livekit/rtc-node';
 import {
   type AudioFrame,
   ConnectionState,
@@ -12,8 +13,6 @@ import {
   type RemoteParticipant,
   type Room,
   RoomEvent,
-  type TextStreamInfo,
-  type TextStreamReader,
   TrackPublishOptions,
   TrackSource,
 } from '@livekit/rtc-node';
@@ -30,6 +29,7 @@ import {
   type UserInputTranscribedEvent,
 } from '../events.js';
 import type { AudioOutput, TextOutput } from '../io.js';
+import type { TextInputCallback } from '../remote_session.js';
 import { TranscriptionSynchronizer } from '../transcription/synchronizer.js';
 import { ParticipantAudioInputStream } from './_input.js';
 import {
@@ -39,15 +39,7 @@ import {
   ParticipantTranscriptionOutput,
 } from './_output.js';
 
-export interface TextInputEvent {
-  text: string;
-  info: TextStreamInfo;
-  participant: RemoteParticipant;
-}
-
-export type TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => void | Promise<void>;
-
-const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => {
+export const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess, ev) => {
   sess.interrupt();
   sess.generateReply({ userInput: ev.text });
 };
@@ -136,6 +128,7 @@ export class RoomIO {
   private agentTranscriptOutput?: ParalellTextOutput;
   private transcriptionSynchronizer?: TranscriptionSynchronizer;
   private participantIdentity: string | null = null;
+  private textStreamHandlerRegistered = false;
 
   private participantAvailableFuture: Future<RemoteParticipant> = new Future();
   private roomConnectedFuture: Future<void> = new Future();
@@ -146,8 +139,6 @@ export class RoomIO {
   private forwardUserTranscriptTask?: Task<void>;
   private initTask?: Task<void>;
 
-  private textStreamHandlerRegistered = false;
-
   private logger = log();
 
   constructor({
@@ -283,7 +274,7 @@ export class RoomIO {
   };
 
   private onUserTextInput = (reader: TextStreamReader, participantInfo: { identity: string }) => {
-    if (participantInfo.identity !== this.participantIdentity) {
+    if (this.participantIdentity && participantInfo.identity !== this.participantIdentity) {
       return;
     }
 
@@ -299,7 +290,7 @@ export class RoomIO {
       const textInputResult = this.inputOptions.textInputCallback!(this.agentSession, {
         text,
         info: reader.info,
-        participant,
+        participantIdentity: participantInfo.identity,
       });
 
       // check if callback is a Promise
@@ -387,6 +378,10 @@ export class RoomIO {
     return this.participantAvailableFuture.done;
   }
 
+  get rtcRoom(): Room {
+    return this.room;
+  }
+
   get linkedParticipant(): RemoteParticipant | undefined {
     if (!this.isParticipantAvailable) {
       return undefined;
@@ -439,6 +434,8 @@ export class RoomIO {
   }
 
   start() {
+    // -- create inputs --
+
     if (this.inputOptions.textEnabled) {
       try {
         this.room.registerTextStreamHandler(TOPIC_CHAT, this.onUserTextInput);
@@ -450,7 +447,6 @@ export class RoomIO {
       }
     }
 
-    // -- create inputs --
     if (this.inputOptions.audioEnabled) {
       this.audioInput = new ParticipantAudioInputStream({
         room: this.room,
diff --git a/agents/src/voice/turn_config/endpointing.ts b/agents/src/voice/turn_config/endpointing.ts
new file mode 100644
index 000000000..f2603e00f
--- /dev/null
+++ b/agents/src/voice/turn_config/endpointing.ts
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Configuration for endpointing, which determines when the user's turn is complete.
+ */
+export interface EndpointingOptions {
+  /**
+   * Endpointing mode. `"fixed"` uses a fixed delay, `"dynamic"` adjusts delay based on
+   * end-of-utterance prediction.
+   * @defaultValue "fixed"
+   */
+  mode: 'fixed' | 'dynamic';
+  /**
+   * Minimum time in milliseconds since the last detected speech before the agent declares the user's
+   * turn complete. In VAD mode this effectively behaves like `max(VAD silence, minDelay)`;
+   * in STT mode it is applied after the STT end-of-speech signal, so it can be additive with
+   * the STT provider's endpointing delay.
+   * @defaultValue 500
+   */
+  minDelay: number;
+  /**
+   * Maximum time in milliseconds the agent will wait before terminating the turn.
+   * @defaultValue 3000
+   */
+  maxDelay: number;
+}
+
+export const defaultEndpointingOptions = {
+  mode: 'fixed',
+  minDelay: 500,
+  maxDelay: 3000,
+} as const satisfies EndpointingOptions;
diff --git a/agents/src/voice/turn_config/interruption.ts b/agents/src/voice/turn_config/interruption.ts
new file mode 100644
index 000000000..63cf92c9d
--- /dev/null
+++ b/agents/src/voice/turn_config/interruption.ts
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Configuration for interruption handling.
+ */
+export interface InterruptionOptions {
+  /**
+   * Whether interruptions are enabled.
+   * @defaultValue true
+   */
+  enabled: boolean;
+  /**
+   * Interruption handling strategy. `"adaptive"` for ML-based detection, `"vad"` for simple
+   * voice-activity detection. `undefined` means auto-detect.
+   * @defaultValue undefined
+   */
+  mode: 'adaptive' | 'vad' | undefined;
+  /**
+   * When `true`, buffered audio is dropped while the agent is speaking and cannot be interrupted.
+   * @defaultValue true
+   */
+  discardAudioIfUninterruptible: boolean;
+  /**
+   * Minimum speech length in milliseconds to register as an interruption.
+   * @defaultValue 500
+   */
+  minDuration: number;
+  /**
+   * Minimum number of words to consider an interruption, only used if STT is enabled.
+   * @defaultValue 0
+   */
+  minWords: number;
+  /**
+   * If set, emit an `agentFalseInterruption` event after this amount of time if the user is
+   * silent and no user transcript is detected after the interruption. Set to `undefined` to
+   * disable. The value is in milliseconds.
+   * @defaultValue 2000
+   */
+  falseInterruptionTimeout: number;
+  /**
+   * Whether to resume the false interruption after the `falseInterruptionTimeout`.
+   * @defaultValue true
+   */
+  resumeFalseInterruption: boolean;
+}
+
+export const defaultInterruptionOptions = {
+  enabled: true,
+  mode: undefined,
+  discardAudioIfUninterruptible: true,
+  minDuration: 500,
+  minWords: 0,
+  falseInterruptionTimeout: 2000,
+  resumeFalseInterruption: true,
+} as const satisfies InterruptionOptions;
diff --git a/agents/src/voice/turn_config/turn_handling.ts b/agents/src/voice/turn_config/turn_handling.ts
new file mode 100644
index 000000000..1458fb663
--- /dev/null
+++ b/agents/src/voice/turn_config/turn_handling.ts
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { TurnDetectionMode } from '../agent_session.js';
+import { type EndpointingOptions, defaultEndpointingOptions } from './endpointing.js';
+import { type InterruptionOptions, defaultInterruptionOptions } from './interruption.js';
+
+/**
+ * Configuration for the turn handling system. Used to configure the turn taking behavior of the
+ * session.
+ */
+export interface TurnHandlingOptions {
+  /**
+   * Strategy for deciding when the user has finished speaking.
+   *
+   * - `"stt"` – rely on speech-to-text end-of-utterance cues
+   * - `"vad"` – rely on Voice Activity Detection start/stop cues
+   * - `"realtime_llm"` – use server-side detection from a realtime LLM
+   * - `"manual"` – caller controls turn boundaries explicitly
+   *
+   * If not set, the session chooses the best available mode in priority order
+   * `realtime_llm → vad → stt → manual`; it automatically falls back if the necessary model
+   * is missing.
+   */
+  turnDetection: TurnDetectionMode | undefined;
+  /**
+   * Configuration for endpointing.
+   */
+  endpointing: Partial<EndpointingOptions>;
+  /**
+   * Configuration for interruption handling.
+   */
+  interruption: Partial<InterruptionOptions>;
+}
+
+export interface InternalTurnHandlingOptions extends TurnHandlingOptions {
+  endpointing: EndpointingOptions;
+  interruption: InterruptionOptions;
+}
+
+export const defaultTurnHandlingOptions: InternalTurnHandlingOptions = {
+  turnDetection: undefined,
+  interruption: defaultInterruptionOptions,
+  endpointing: defaultEndpointingOptions,
+};
diff --git a/agents/src/voice/turn_config/utils.test.ts b/agents/src/voice/turn_config/utils.test.ts
new file mode 100644
index 000000000..96bfd6972
--- /dev/null
+++ b/agents/src/voice/turn_config/utils.test.ts
@@ -0,0 +1,148 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { beforeAll, describe, expect, it } from 'vitest';
+import { initializeLogger } from '../../log.js';
+import { defaultAgentSessionOptions } from '../agent_session.js';
+import { defaultEndpointingOptions } from './endpointing.js';
+import { defaultInterruptionOptions } from './interruption.js';
+import { defaultTurnHandlingOptions } from './turn_handling.js';
+import { migrateLegacyOptions, migrateTurnHandling } from './utils.js';
+
+beforeAll(() => {
+  initializeLogger({ pretty: true, level: 'info' });
+});
+
+describe('migrateLegacyOptions', () => {
+  it('should return all defaults when no options are provided', () => {
+    const { agentSessionOptions: result } = migrateLegacyOptions({});
+
+    expect(result.turnHandling).toEqual({
+      turnDetection: defaultTurnHandlingOptions.turnDetection,
+      endpointing: defaultEndpointingOptions,
+      interruption: defaultInterruptionOptions,
+    });
+    expect(result.maxToolSteps).toBe(defaultAgentSessionOptions.maxToolSteps);
+    expect(result.preemptiveGeneration).toBe(defaultAgentSessionOptions.preemptiveGeneration);
+    expect(result.userAwayTimeout).toBe(defaultAgentSessionOptions.userAwayTimeout);
+  });
+
+  it('should migrate legacy flat fields into nested turnHandling config', () => {
+    const { agentSessionOptions: result } = migrateLegacyOptions({
+      voiceOptions: {
+        minInterruptionDuration: 1000,
+        minInterruptionWords: 3,
+        discardAudioIfUninterruptible: false,
+        minEndpointingDelay: 800,
+        maxEndpointingDelay: 5000,
+      },
+    });
+
+    expect(result.turnHandling.interruption!.minDuration).toBe(1000);
+    expect(result.turnHandling.interruption!.minWords).toBe(3);
+    expect(result.turnHandling.interruption!.discardAudioIfUninterruptible).toBe(false);
+    expect(result.turnHandling.endpointing!.minDelay).toBe(800);
+    expect(result.turnHandling.endpointing!.maxDelay).toBe(5000);
+  });
+
+  it('should set interruption.enabled to false when allowInterruptions is false', () => {
+    const { agentSessionOptions: result } = migrateLegacyOptions({
+      voiceOptions: { allowInterruptions: false },
+    });
+
+    expect(result.turnHandling.interruption!.enabled).toBe(false);
+  });
+
+  it('should give top-level fields precedence over voiceOptions', () => {
+    const { agentSessionOptions: result } = migrateLegacyOptions({
+      voiceOptions: {
+        minInterruptionDuration: 1000,
+        maxEndpointingDelay: 5000,
+        maxToolSteps: 10,
+      },
+      turnHandling: {
+        interruption: {
+          minDuration: 2000,
+        },
+        endpointing: {
+          maxDelay: 8000,
+        },
+      },
+      maxToolSteps: 5,
+    });
+
+    expect(result.turnHandling.interruption!.minDuration).toBe(2000);
+    expect(result.turnHandling.endpointing!.maxDelay).toBe(8000);
+    expect(result.maxToolSteps).toBe(5);
+  });
+
+  it('should preserve top-level turnDetection in the result', () => {
+    const { agentSessionOptions: result } = migrateLegacyOptions({
+      turnDetection: 'vad',
+    });
+
+    expect(result.turnHandling.turnDetection).toBe('vad');
+  });
+});
+
+describe('migrateTurnHandling', () => {
+  it('should return empty partial when no deprecated Agent fields are given', () => {
+    const result = migrateTurnHandling({});
+    expect(result).toEqual({});
+  });
+
+  it('should set interruption.enabled to false when allowInterruptions is false', () => {
+    const result = migrateTurnHandling({ allowInterruptions: false });
+    expect(result.interruption).toEqual({ enabled: false });
+    expect(result.endpointing).toBeUndefined();
+    expect(result.turnDetection).toBeUndefined();
+  });
+
+  it('should not set interruption when allowInterruptions is true or undefined', () => {
+    expect(migrateTurnHandling({ allowInterruptions: true })).toEqual({});
+    expect(migrateTurnHandling({ allowInterruptions: undefined })).toEqual({});
+  });
+
+  it('should map minEndpointingDelay to endpointing.minDelay', () => {
+    const result = migrateTurnHandling({ minEndpointingDelay: 800 });
+    expect(result.endpointing).toEqual({ minDelay: 800 });
+  });
+
+  it('should map maxEndpointingDelay to endpointing.maxDelay', () => {
+    const result = migrateTurnHandling({ maxEndpointingDelay: 5000 });
+    expect(result.endpointing).toEqual({ maxDelay: 5000 });
+  });
+
+  it('should pass through turnDetection', () => {
+    const result = migrateTurnHandling({ turnDetection: 'vad' });
+    expect(result.turnDetection).toBe('vad');
+  });
+
+  it('should combine all deprecated Agent fields', () => {
+    const result = migrateTurnHandling({
+      turnDetection: 'stt',
+      allowInterruptions: false,
+      minEndpointingDelay: 400,
+      maxEndpointingDelay: 3000,
+    });
+    expect(result.turnDetection).toBe('stt');
+    expect(result.interruption).toEqual({ enabled: false });
+    expect(result.endpointing).toEqual({ minDelay: 400, maxDelay: 3000 });
+  });
+
+  it('should ignore deprecated Agent fields when explicit turnHandling is provided', () => {
+    const turnHandling = {
+      endpointing: { minDelay: 999, maxDelay: 4000 },
+      interruption: { enabled: true },
+      turnDetection: 'vad' as const,
+    };
+    const result = migrateTurnHandling({
+      turnHandling,
+      turnDetection: 'stt',
+      allowInterruptions: false,
+      minEndpointingDelay: 100,
+      maxEndpointingDelay: 200,
+    });
+    expect(result).toEqual(turnHandling);
+  });
+});
diff --git a/agents/src/voice/turn_config/utils.ts b/agents/src/voice/turn_config/utils.ts
new file mode 100644
index 000000000..59022e58a
--- /dev/null
+++ b/agents/src/voice/turn_config/utils.ts
@@ -0,0 +1,167 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { log } from '../../log.js';
+import {
+  type AgentSessionOptions,
+  type InternalSessionOptions,
+  type TurnDetectionMode,
+  type VoiceOptions,
+} from '../agent_session.js';
+import { defaultEndpointingOptions } from './endpointing.js';
+import { defaultInterruptionOptions } from './interruption.js';
+import { type TurnHandlingOptions, defaultTurnHandlingOptions } from './turn_handling.js';
+
+const defaultSessionOptions = {
+  maxToolSteps: 3,
+  preemptiveGeneration: true,
+  userAwayTimeout: 15.0,
+  aecWarmupDuration: 3000,
+  turnHandling: {},
+  useTtsAlignedTranscript: true,
+} as const satisfies AgentSessionOptions;
+
+const defaultLegacyVoiceOptions: VoiceOptions = {
+  minEndpointingDelay: defaultTurnHandlingOptions.endpointing.minDelay,
+  maxEndpointingDelay: defaultTurnHandlingOptions.endpointing.maxDelay,
+  maxToolSteps: defaultSessionOptions.maxToolSteps,
+  preemptiveGeneration: defaultSessionOptions.preemptiveGeneration,
+};
+
+export function migrateLegacyOptions<UserData>(legacyOptions: AgentSessionOptions<UserData>): {
+  agentSessionOptions: InternalSessionOptions<UserData>;
+  legacyVoiceOptions: VoiceOptions;
+} {
+  const logger = log();
+  const {
+    voiceOptions,
+    turnDetection,
+    stt,
+    vad,
+    llm,
+    tts,
+    userData,
+    connOptions,
+    ...sessionOptions
+  } = legacyOptions;
+
+  if (voiceOptions !== undefined) {
+    logger.warn(
+      'voiceOptions is deprecated, use top-level SessionOptions fields on AgentSessionOptions instead',
+    );
+  }
+
+  const turnHandling: TurnHandlingOptions = {
+    interruption: {
+      discardAudioIfUninterruptible: voiceOptions?.discardAudioIfUninterruptible,
+      minDuration: voiceOptions?.minInterruptionDuration,
+      minWords: voiceOptions?.minInterruptionWords,
+      ...sessionOptions.turnHandling?.interruption,
+    },
+    endpointing: {
+      minDelay: voiceOptions?.minEndpointingDelay,
+      maxDelay: voiceOptions?.maxEndpointingDelay,
+      ...sessionOptions.turnHandling?.endpointing,
+    },
+
+    turnDetection: sessionOptions?.turnHandling?.turnDetection ?? turnDetection,
+  } as const;
+
+  if (
+    voiceOptions?.allowInterruptions === false &&
+    turnHandling.interruption.enabled === undefined
+  ) {
+    turnHandling.interruption.enabled = false;
+  }
+
+  const migratedVoiceOptions: AgentSessionOptions<UserData> = {};
+
+  if (voiceOptions?.maxToolSteps !== undefined) {
+    migratedVoiceOptions.maxToolSteps = voiceOptions.maxToolSteps;
+  }
+  if (voiceOptions?.preemptiveGeneration !== undefined) {
+    migratedVoiceOptions.preemptiveGeneration = voiceOptions.preemptiveGeneration;
+  }
+  if (voiceOptions?.userAwayTimeout !== undefined) {
+    migratedVoiceOptions.userAwayTimeout = voiceOptions.userAwayTimeout;
+  }
+
+  const legacyVoiceOptions = { ...defaultLegacyVoiceOptions, ...voiceOptions };
+
+  const agentSessionOptions = {
+    stt,
+    vad,
+    llm,
+    tts,
+    userData,
+    connOptions,
+    ...defaultSessionOptions,
+    ...migratedVoiceOptions,
+    ...sessionOptions,
+    turnHandling: mergeWithDefaults(turnHandling),
+    // repopulate the deprecated voice options with migrated options for backwards compatibility
+    voiceOptions: legacyVoiceOptions,
+  };
+
+  return { agentSessionOptions, legacyVoiceOptions };
+}
+
+/** Remove keys whose value is `undefined` so they don't shadow defaults when spread. */
+export function stripUndefined<T extends object>(obj: T): Partial<T> {
+  return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== undefined)) as Partial<T>;
+}
+
+export function mergeWithDefaults(config: TurnHandlingOptions) {
+  return {
+    turnDetection: config.turnDetection ?? defaultTurnHandlingOptions.turnDetection,
+    endpointing: { ...defaultEndpointingOptions, ...stripUndefined(config.endpointing) },
+    interruption: { ...defaultInterruptionOptions, ...stripUndefined(config.interruption) },
+  } as const;
+}
+
+/**
+ * Build a partial {@link TurnHandlingOptions} from deprecated Agent constructor fields.
+ * Mirrors the Python Agent compatibility path, but keeps the JS API surface explicit.
+ */
+export function migrateTurnHandling(opts: {
+  turnDetection?: TurnDetectionMode;
+  allowInterruptions?: boolean;
+  minEndpointingDelay?: number;
+  maxEndpointingDelay?: number;
+  turnHandling?: TurnHandlingOptions;
+}): Partial<TurnHandlingOptions> {
+  if (opts.turnHandling !== undefined) {
+    return opts.turnHandling;
+  }
+
+  const migrated: Partial<TurnHandlingOptions> = {};
+
+  const endpointing: Partial<TurnHandlingOptions['endpointing']> = {};
+  if (opts.minEndpointingDelay !== undefined) {
+    endpointing.minDelay = opts.minEndpointingDelay;
+  }
+  if (opts.maxEndpointingDelay !== undefined) {
+    endpointing.maxDelay = opts.maxEndpointingDelay;
+  }
+  if (Object.keys(endpointing).length > 0) {
+    migrated.endpointing = endpointing;
+  }
+
+  const interruption: Partial<TurnHandlingOptions['interruption']> = {};
+  if (opts.allowInterruptions === false) {
+    interruption.enabled = false;
+  }
+  if (Object.keys(interruption).length > 0) {
+    migrated.interruption = interruption;
+  }
+
+  if (opts.turnDetection !== undefined) {
+    migrated.turnDetection = opts.turnDetection;
+  }
+
+  return {
+    ...(migrated.endpointing ? { endpointing: migrated.endpointing } : {}),
+    ...(migrated.interruption ? { interruption: migrated.interruption } : {}),
+    ...(migrated.turnDetection !== undefined ? { turnDetection: migrated.turnDetection } : {}),
+  };
+}
diff --git a/examples/src/basic_agent.ts b/examples/src/basic_agent.ts
index ac2512b2c..b85d633ac 100644
--- a/examples/src/basic_agent.ts
+++ b/examples/src/basic_agent.ts
@@ -9,6 +9,7 @@ import {
   defineAgent,
   inference,
   llm,
+  log,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -39,7 +40,12 @@ export default defineAgent({
       },
     });
 
+    const logger = log();
+
     const session = new voice.AgentSession({
+      // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
+      // See more at https://docs.livekit.io/agents/build/turns
+      vad: ctx.proc.userData.vad! as silero.VAD,
       // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
       // See all available models at https://docs.livekit.io/agents/models/stt/
       stt: new inference.STT({
@@ -61,18 +67,17 @@ export default defineAgent({
           'rime/arcana',
         ],
       }),
-      // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
-      // See more at https://docs.livekit.io/agents/build/turns
-      vad: ctx.proc.userData.vad! as silero.VAD,
-      turnDetection: new livekit.turnDetector.MultilingualModel(),
-      // to use realtime model, replace the stt, llm, tts and vad with the following
-      // llm: new openai.realtime.RealtimeModel(),
-      voiceOptions: {
-        // allow the LLM to generate a response while waiting for the end of turn
-        preemptiveGeneration: true,
-        useTtsAlignedTranscript: true,
-        aecWarmupDuration: 3000,
+      preemptiveGeneration: true,
+      turnHandling: {
+        turnDetection: new livekit.turnDetector.MultilingualModel(),
+        interruption: {
+          resumeFalseInterruption: true,
+          falseInterruptionTimeout: 1,
+          mode: 'adaptive',
+        },
       },
+      useTtsAlignedTranscript: true,
+      aecWarmupDuration: 3000,
       connOptions: {
         // Example of overriding the default connection options for the LLM/TTS/STT
         llmConnOptions: {
@@ -83,11 +88,23 @@ export default defineAgent({
       },
     });
 
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
+    });
+
+    session.on(voice.AgentSessionEventTypes.OverlappingSpeech, (ev) => {
+      logger.warn({ type: ev.type, isInterruption: ev.isInterruption }, 'user overlapping speech');
     });
 
     await session.start({
diff --git a/examples/src/bey_avatar.ts b/examples/src/bey_avatar.ts
index f8eb1f3d1..5cad7655c 100644
--- a/examples/src/bey_avatar.ts
+++ b/examples/src/bey_avatar.ts
@@ -1,7 +1,15 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { type JobContext, WorkerOptions, cli, defineAgent, metrics, voice } from '@livekit/agents';
+import {
+  type JobContext,
+  WorkerOptions,
+  cli,
+  defineAgent,
+  log,
+  metrics,
+  voice,
+} from '@livekit/agents';
 import * as bey from '@livekit/agents-plugin-bey';
 import * as openai from '@livekit/agents-plugin-openai';
 import { fileURLToPath } from 'node:url';
@@ -12,6 +20,7 @@ export default defineAgent({
       instructions: 'You are a helpful assistant. Speak clearly and concisely.',
     });
 
+    const logger = log();
     const session = new voice.AgentSession({
       llm: new openai.realtime.RealtimeModel({
         voice: 'alloy',
@@ -32,11 +41,19 @@ export default defineAgent({
     });
     await avatar.start(session, ctx.room);
 
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted (session.usage is automatically collected)
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
     });
 
     session.generateReply({
diff --git a/examples/src/cartesia_tts.ts b/examples/src/cartesia_tts.ts
index a11aae33a..4d40b7334 100644
--- a/examples/src/cartesia_tts.ts
+++ b/examples/src/cartesia_tts.ts
@@ -7,6 +7,7 @@ import {
   WorkerOptions,
   cli,
   defineAgent,
+  log,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -28,6 +29,7 @@ export default defineAgent({
         "You are a helpful assistant, you can hear the user's message and respond to it.",
     });
 
+    const logger = log();
     const vad = ctx.proc.userData.vad! as silero.VAD;
 
     const session = new voice.AgentSession({
@@ -40,11 +42,19 @@ export default defineAgent({
       turnDetection: new livekit.turnDetector.MultilingualModel(),
     });
 
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted (session.usage is automatically collected)
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
     });
 
     await session.start({
diff --git a/examples/src/comprehensive_test.ts b/examples/src/comprehensive_test.ts
index b6d08d6cd..bac9910cc 100644
--- a/examples/src/comprehensive_test.ts
+++ b/examples/src/comprehensive_test.ts
@@ -8,6 +8,7 @@ import {
   cli,
   defineAgent,
   llm,
+  log,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -238,6 +239,7 @@ export default defineAgent({
     proc.userData.vad = await silero.VAD.load();
   },
   entry: async (ctx: JobContext) => {
+    const logger = log();
     const vad = ctx.proc.userData.vad! as silero.VAD;
     const session = new voice.AgentSession({
       vad,
@@ -249,11 +251,19 @@ export default defineAgent({
         testedRealtimeLlmChoices: new Set(),
       },
     });
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted (session.usage is automatically collected)
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
     });
 
     await session.start({
diff --git a/examples/src/hedra/hedra_avatar.ts b/examples/src/hedra/hedra_avatar.ts
index 38c103d66..9bead6094 100644
--- a/examples/src/hedra/hedra_avatar.ts
+++ b/examples/src/hedra/hedra_avatar.ts
@@ -9,6 +9,7 @@ import {
   defineAgent,
   inference,
   initializeLogger,
+  log,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -33,6 +34,7 @@ export default defineAgent({
       instructions: 'You are a helpful assistant. Speak clearly and concisely.',
     });
 
+    const logger = log();
     const session = new voice.AgentSession({
       stt: new inference.STT({
         model: 'deepgram/nova-3',
@@ -68,11 +70,19 @@ export default defineAgent({
     });
     await avatar.start(session, ctx.room);
 
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted (session.usage is automatically collected)
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
     });
 
     session.generateReply({
diff --git a/examples/src/inworld_tts.ts b/examples/src/inworld_tts.ts
index fec5c552d..45bb6961c 100644
--- a/examples/src/inworld_tts.ts
+++ b/examples/src/inworld_tts.ts
@@ -7,6 +7,7 @@ import {
   WorkerOptions,
   cli,
   defineAgent,
+  log,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -26,6 +27,7 @@ export default defineAgent({
         "You are a helpful assistant, you can hear the user's message and respond to it in 1-2 short sentences.",
     });
 
+    const logger = log();
     // Create TTS instance
     const tts = new inworld.TTS({
       timestampType: 'WORD',
@@ -96,11 +98,19 @@ export default defineAgent({
       }
     });
 
-    const usageCollector = new metrics.UsageCollector();
-
+    // Log metrics as they are emitted (session.usage is automatically collected)
     session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
       metrics.logMetrics(ev.metrics);
-      usageCollector.collect(ev.metrics);
+    });
+
+    // Log usage summary when job shuts down
+    ctx.addShutdownCallback(async () => {
+      logger.info(
+        {
+          usage: session.usage,
+        },
+        'Session usage summary',
+      );
     });
 
     await session.start({
diff --git a/package.json b/package.json
index 7db46fed8..800baf185 100644
--- a/package.json
+++ b/package.json
@@ -21,7 +21,7 @@
     "doc": "typedoc && mkdir -p docs/assets/github && cp .github/*.png docs/assets/github/ && find docs -name '*.html' -type f -exec sed -i.bak 's|=\"/.github/|=\"assets/github/|g' {} + && find docs -name '*.bak' -delete"
   },
   "devDependencies": {
-    "@changesets/cli": "^2.29.6",
+    "@changesets/cli": "^2.30.0",
     "@livekit/changesets-changelog-github": "^0.0.4",
     "@rushstack/heft": "^0.66.0",
     "@trivago/prettier-plugin-sort-imports": "^4.3.0",
diff --git a/patches/@changesets__assemble-release-plan.patch b/patches/@changesets__assemble-release-plan.patch
index 5d0ce7e8e..6a62d6b58 100644
--- a/patches/@changesets__assemble-release-plan.patch
+++ b/patches/@changesets__assemble-release-plan.patch
@@ -1,32 +1,42 @@
 diff --git a/dist/changesets-assemble-release-plan.cjs.js b/dist/changesets-assemble-release-plan.cjs.js
-index e07ba6e793021b6cfdec898afca517e293386ddb..a60de932d8521d241e5c9f272b4e4de93e6f09db 100644
+index e07ba6e793021b6cfdec898afca517e293386ddb..c286051e952d78d4dc6cda5832ba4226d63295c5 100644
 --- a/dist/changesets-assemble-release-plan.cjs.js
 +++ b/dist/changesets-assemble-release-plan.cjs.js
-@@ -317,6 +317,12 @@ function shouldBumpMajor({
+@@ -317,6 +317,9 @@ function shouldBumpMajor({
    preInfo,
    onlyUpdatePeerDependentsWhenOutOfRange
  }) {
-+  
-+  // Temporary fix for: https://github.com/changesets/changesets/pull/1132
 +  if (depType === "peerDependencies") {
 +    return false;
 +  }
-+
    // we check if it is a peerDependency because if it is, our dependent bump type might need to be major.
    return depType === "peerDependencies" && nextRelease.type !== "none" && nextRelease.type !== "patch" && (
    // 1. If onlyUpdatePeerDependentsWhenOutOfRange set to true, bump major if the version is leaving the range.
 diff --git a/dist/changesets-assemble-release-plan.esm.js b/dist/changesets-assemble-release-plan.esm.js
-index ea2be567403c4ef94a65f3218ccb683cf5cb4bc1..fba07a45efee5d46d6ecfc3587f7a967a95df6f2 100644
+index ea2be567403c4ef94a65f3218ccb683cf5cb4bc1..0f5ac121649f1ac0a2b2e9704079474e360761e9 100644
 --- a/dist/changesets-assemble-release-plan.esm.js
 +++ b/dist/changesets-assemble-release-plan.esm.js
-@@ -306,6 +306,10 @@ function shouldBumpMajor({
+@@ -306,6 +306,9 @@ function shouldBumpMajor({
    preInfo,
    onlyUpdatePeerDependentsWhenOutOfRange
  }) {
-+  // Temporary fix for: https://github.com/changesets/changesets/pull/1132
 +  if (depType === "peerDependencies") {
 +    return false;
 +  }
    // we check if it is a peerDependency because if it is, our dependent bump type might need to be major.
    return depType === "peerDependencies" && nextRelease.type !== "none" && nextRelease.type !== "patch" && (
    // 1. If onlyUpdatePeerDependentsWhenOutOfRange set to true, bump major if the version is leaving the range.
+diff --git a/src/determine-dependents.ts b/src/determine-dependents.ts
+index 47e32b6f09310d3ed03ab46b2d342d032f9014df..4edccca93152b1c8df59391b35e5f72de7ab8c05 100644
+--- a/src/determine-dependents.ts
++++ b/src/determine-dependents.ts
+@@ -230,6 +230,9 @@ function shouldBumpMajor({
+   preInfo: PreInfo | undefined;
+   onlyUpdatePeerDependentsWhenOutOfRange: boolean;
+ }) {
++  if (depType === "peerDependencies") {
++    return false;
++  }
+   // we check if it is a peerDependency because if it is, our dependent bump type might need to be major.
+   return (
+     depType === "peerDependencies" &&
diff --git a/plugins/cartesia/src/tts.ts b/plugins/cartesia/src/tts.ts
index f30da3083..867f730c9 100644
--- a/plugins/cartesia/src/tts.ts
+++ b/plugins/cartesia/src/tts.ts
@@ -126,6 +126,14 @@ export class TTS extends tts.TTS {
   #opts: TTSOptions;
   label = 'cartesia.TTS';
 
+  get model(): string {
+    return this.#opts.model;
+  }
+
+  get provider(): string {
+    return 'Cartesia';
+  }
+
   constructor(opts: Partial<TTSOptions> = {}) {
     const resolvedOpts = {
       ...defaultTTSOptions,
diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts
index 7ffc5142c..0f10ee331 100644
--- a/plugins/deepgram/src/stt.ts
+++ b/plugins/deepgram/src/stt.ts
@@ -72,6 +72,14 @@ export class STT extends stt.STT {
   label = 'deepgram.STT';
   private abortController = new AbortController();
 
+  get model(): string {
+    return this.#opts.model;
+  }
+
+  get provider(): string {
+    return 'Deepgram';
+  }
+
   constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
     super({
       streaming: true,
diff --git a/plugins/deepgram/src/tts.ts b/plugins/deepgram/src/tts.ts
index 5e9aceb30..6c6c2ff98 100644
--- a/plugins/deepgram/src/tts.ts
+++ b/plugins/deepgram/src/tts.ts
@@ -46,6 +46,14 @@ export class TTS extends tts.TTS {
   private opts: TTSOptions;
   label = 'deepgram.TTS';
 
+  get model(): string {
+    return this.opts.model;
+  }
+
+  get provider(): string {
+    return 'Deepgram';
+  }
+
   constructor(opts: Partial<TTSOptions> = {}) {
     super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {
       streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,
diff --git a/plugins/google/src/llm.ts b/plugins/google/src/llm.ts
index 7b9d4b4ac..cea76ac52 100644
--- a/plugins/google/src/llm.ts
+++ b/plugins/google/src/llm.ts
@@ -51,6 +51,13 @@ export class LLM extends llm.LLM {
     return this.#opts.model;
   }
 
+  get provider(): string {
+    if (this.#opts.vertexai) {
+      return 'Vertex AI';
+    }
+    return 'Gemini';
+  }
+
   /**
    * Create a new instance of Google GenAI LLM.
    *
diff --git a/plugins/livekit/src/turn_detector/base.ts b/plugins/livekit/src/turn_detector/base.ts
index cf425e54c..da2cf8197 100644
--- a/plugins/livekit/src/turn_detector/base.ts
+++ b/plugins/livekit/src/turn_detector/base.ts
@@ -170,6 +170,14 @@ export abstract class EOUModel {
 
   #logger = log();
 
+  get model(): string {
+    return MODEL_REVISIONS[this.modelType];
+  }
+
+  get provider(): string {
+    return 'livekit';
+  }
+
   constructor(opts: EOUModelOptions) {
     const {
       modelType = 'en',
diff --git a/plugins/openai/src/llm.ts b/plugins/openai/src/llm.ts
index 22299344a..f4d055506 100644
--- a/plugins/openai/src/llm.ts
+++ b/plugins/openai/src/llm.ts
@@ -86,6 +86,15 @@ export class LLM extends llm.LLM {
     return this.#opts.model;
   }
 
+  get provider(): string {
+    try {
+      const url = new URL(this.#client.baseURL);
+      return url.host;
+    } catch {
+      return 'api.openai.com';
+    }
+  }
+
   /**
    * Create a new instance of OpenAI LLM with Azure.
    *
diff --git a/plugins/openai/src/realtime/realtime_model.ts b/plugins/openai/src/realtime/realtime_model.ts
index 3623d5d5d..e08e95dbb 100644
--- a/plugins/openai/src/realtime/realtime_model.ts
+++ b/plugins/openai/src/realtime/realtime_model.ts
@@ -144,6 +144,15 @@ export class RealtimeModel extends llm.RealtimeModel {
     return this._options.model;
   }
 
+  get provider(): string {
+    try {
+      const url = new URL(this._options.baseURL);
+      return url.host;
+    } catch {
+      return 'api.openai.com';
+    }
+  }
+
   constructor(
     options: {
       model?: string;
diff --git a/plugins/openai/src/stt.ts b/plugins/openai/src/stt.ts
index 076e462b7..14c7a3779 100644
--- a/plugins/openai/src/stt.ts
+++ b/plugins/openai/src/stt.ts
@@ -28,6 +28,19 @@ export class STT extends stt.STT {
   #client: OpenAI;
   label = 'openai.STT';
 
+  get model(): string {
+    return this.#opts.model;
+  }
+
+  get provider(): string {
+    try {
+      const url = new URL(this.#client.baseURL);
+      return url.host;
+    } catch {
+      return 'api.openai.com';
+    }
+  }
+
   /**
    * Create a new instance of OpenAI STT.
    *
diff --git a/plugins/openai/src/tts.ts b/plugins/openai/src/tts.ts
index 2bb77c3d5..3bce9d501 100644
--- a/plugins/openai/src/tts.ts
+++ b/plugins/openai/src/tts.ts
@@ -32,6 +32,19 @@ export class TTS extends tts.TTS {
   label = 'openai.TTS';
   private abortController = new AbortController();
 
+  get model(): string {
+    return this.#opts.model;
+  }
+
+  get provider(): string {
+    try {
+      const url = new URL(this.#client.baseURL);
+      return url.host;
+    } catch {
+      return 'api.openai.com';
+    }
+  }
+
   /**
    * Create a new instance of OpenAI TTS.
    *
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 674e7b9b0..0350aa94d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -12,7 +12,7 @@ catalogs:
 
 patchedDependencies:
   '@changesets/assemble-release-plan':
-    hash: wy7r54pxmuo7fdh5qymk2adrlq
+    hash: gqgxtqs6uc7rjtxf53tdm2jjpi
     path: patches/@changesets__assemble-release-plan.patch
 
 importers:
@@ -20,8 +20,8 @@ importers:
   .:
     devDependencies:
       '@changesets/cli':
-        specifier: ^2.29.6
-        version: 2.29.6(@types/node@22.15.30)
+        specifier: ^2.30.0
+        version: 2.30.0(@types/node@22.15.30)
       '@livekit/changesets-changelog-github':
         specifier: ^0.0.4
         version: 0.0.4
@@ -100,6 +100,9 @@ importers:
 
   agents:
     dependencies:
+      '@bufbuild/protobuf':
+        specifier: ^1.10.0
+        version: 1.10.1
       '@ffmpeg-installer/ffmpeg':
         specifier: ^1.1.0
         version: 1.1.0
@@ -107,8 +110,8 @@ importers:
         specifier: ^1.1.1
         version: 1.1.1
       '@livekit/protocol':
-        specifier: ^1.43.0
-        version: 1.43.0
+        specifier: ^1.45.1
+        version: 1.45.1
       '@livekit/typed-emitter':
         specifier: ^3.0.0
         version: 3.0.0
@@ -169,6 +172,9 @@ importers:
       livekit-server-sdk:
         specifier: ^2.14.1
         version: 2.14.1
+      ofetch:
+        specifier: ^1.5.1
+        version: 1.5.1
       openai:
         specifier: ^6.8.1
         version: 6.8.1(ws@8.18.3)(zod@3.25.76)
@@ -939,29 +945,29 @@ importers:
         version: 3.25.76
 
   plugins/trugen:
-      dependencies:
-        livekit-server-sdk:
-          specifier: ^2.13.3
-          version: 2.13.3
-      devDependencies:
-        '@livekit/agents':
-          specifier: workspace:*
-          version: link:../../agents
-        '@livekit/rtc-node':
-          specifier: 'catalog:'
-          version: 0.13.24
-        '@microsoft/api-extractor':
-          specifier: ^7.35.0
-          version: 7.43.7(@types/node@22.19.1)
-        pino:
-          specifier: ^8.19.0
-          version: 8.21.0
-        tsup:
-          specifier: ^8.3.5
-          version: 8.4.0(@microsoft/api-extractor@7.43.7(@types/node@22.19.1))(postcss@8.5.6)(tsx@4.21.0)(typescript@5.4.5)
-        typescript:
-          specifier: ^5.0.0
-          version: 5.4.5
+    dependencies:
+      livekit-server-sdk:
+        specifier: ^2.13.3
+        version: 2.13.3
+    devDependencies:
+      '@livekit/agents':
+        specifier: workspace:*
+        version: link:../../agents
+      '@livekit/rtc-node':
+        specifier: 'catalog:'
+        version: 0.13.24
+      '@microsoft/api-extractor':
+        specifier: ^7.35.0
+        version: 7.43.7(@types/node@22.19.1)
+      pino:
+        specifier: ^8.19.0
+        version: 8.21.0
+      tsup:
+        specifier: ^8.3.5
+        version: 8.4.0(@microsoft/api-extractor@7.43.7(@types/node@22.19.1))(postcss@8.5.6)(tsx@4.21.0)(typescript@5.4.5)
+      typescript:
+        specifier: ^5.0.0
+        version: 5.4.5
 
   plugins/xai:
     dependencies:
@@ -1079,8 +1085,8 @@ packages:
   '@bufbuild/protobuf@1.10.1':
     resolution: {integrity: sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==}
 
-  '@changesets/apply-release-plan@7.0.12':
-    resolution: {integrity: sha512-EaET7As5CeuhTzvXTQCRZeBUcisoYPDDcXvgTE/2jmmypKp0RC7LxKj/yzqeh/1qFTZI7oDGFcL1PHRuQuketQ==}
+  '@changesets/apply-release-plan@7.1.0':
+    resolution: {integrity: sha512-yq8ML3YS7koKQ/9bk1PqO0HMzApIFNwjlwCnwFEXMzNe8NpzeeYYKCmnhWJGkN8g7E51MnWaSbqRcTcdIxUgnQ==}
 
   '@changesets/assemble-release-plan@6.0.9':
     resolution: {integrity: sha512-tPgeeqCHIwNo8sypKlS3gOPmsS3wP0zHt67JDuL20P4QcXiw/O4Hl7oXiuLnP9yg+rXLQ2sScdV1Kkzde61iSQ==}
@@ -1088,12 +1094,12 @@ packages:
   '@changesets/changelog-git@0.2.1':
     resolution: {integrity: sha512-x/xEleCFLH28c3bQeQIyeZf8lFXyDFVn1SgcBiR2Tw/r4IAWlk1fzxCEZ6NxQAjF2Nwtczoen3OA2qR+UawQ8Q==}
 
-  '@changesets/cli@2.29.6':
-    resolution: {integrity: sha512-6qCcVsIG1KQLhpQ5zE8N0PckIx4+9QlHK3z6/lwKnw7Tir71Bjw8BeOZaxA/4Jt00pcgCnCSWZnyuZf5Il05QQ==}
+  '@changesets/cli@2.30.0':
+    resolution: {integrity: sha512-5D3Nk2JPqMI1wK25pEymeWRSlSMdo5QOGlyfrKg0AOufrUcjEE3RQgaCpHoBiM31CSNrtSgdJ0U6zL1rLDDfBA==}
     hasBin: true
 
-  '@changesets/config@3.1.1':
-    resolution: {integrity: sha512-bd+3Ap2TKXxljCggI0mKPfzCQKeV/TU4yO2h2C6vAihIo8tzseAn2e7klSuiyYYXvgu53zMN1OeYMIQkaQoWnA==}
+  '@changesets/config@3.1.3':
+    resolution: {integrity: sha512-vnXjcey8YgBn2L1OPWd3ORs0bGC4LoYcK/ubpgvzNVr53JXV5GiTVj7fWdMRsoKUH7hhhMAQnsJUqLr21EncNw==}
 
   '@changesets/errors@0.2.0':
     resolution: {integrity: sha512-6BLOQUscTpZeGljvyQXlWOItQyU71kCdGz7Pi8H8zdw6BI0g3m43iL4xKUVPWtG+qrrL9DTjpdn8eYuCQSRpow==}
@@ -1104,8 +1110,8 @@ packages:
   '@changesets/get-github-info@0.5.2':
     resolution: {integrity: sha512-JppheLu7S114aEs157fOZDjFqUDpm7eHdq5E8SSR0gUBTEK0cNSHsrSR5a66xs0z3RWuo46QvA3vawp8BxDHvg==}
 
-  '@changesets/get-release-plan@4.0.13':
-    resolution: {integrity: sha512-DWG1pus72FcNeXkM12tx+xtExyH/c9I1z+2aXlObH3i9YA7+WZEVaiHzHl03thpvAgWTRaH64MpfHxozfF7Dvg==}
+  '@changesets/get-release-plan@4.0.15':
+    resolution: {integrity: sha512-Q04ZaRPuEVZtA+auOYgFaVQQSA98dXiVe/yFaZfY7hoSmQICHGvP0TF4u3EDNHWmmCS4ekA/XSpKlSM2PyTS2g==}
 
   '@changesets/get-version-range-type@0.4.0':
     resolution: {integrity: sha512-hwawtob9DryoGTpixy1D3ZXbGgJu1Rhr+ySH2PvTLHvkZuQ7sRT4oQwMh0hbqZH1weAooedEjRsbrWcGLCeyVQ==}
@@ -1116,14 +1122,14 @@ packages:
   '@changesets/logger@0.1.1':
     resolution: {integrity: sha512-OQtR36ZlnuTxKqoW4Sv6x5YIhOmClRd5pWsjZsddYxpWs517R0HkyiefQPIytCVh4ZcC5x9XaG8KTdd5iRQUfg==}
 
-  '@changesets/parse@0.4.1':
-    resolution: {integrity: sha512-iwksMs5Bf/wUItfcg+OXrEpravm5rEd9Bf4oyIPL4kVTmJQ7PNDSd6MDYkpSJR1pn7tz/k8Zf2DhTCqX08Ou+Q==}
+  '@changesets/parse@0.4.3':
+    resolution: {integrity: sha512-ZDmNc53+dXdWEv7fqIUSgRQOLYoUom5Z40gmLgmATmYR9NbL6FJJHwakcCpzaeCy+1D0m0n7mT4jj2B/MQPl7A==}
 
   '@changesets/pre@2.0.2':
     resolution: {integrity: sha512-HaL/gEyFVvkf9KFg6484wR9s0qjAXlZ8qWPDkTyKF6+zqjBe/I2mygg3MbpZ++hdi0ToqNUF8cjj7fBy0dg8Ug==}
 
-  '@changesets/read@0.6.5':
-    resolution: {integrity: sha512-UPzNGhsSjHD3Veb0xO/MwvasGe8eMyNrR/sT9gR8Q3DhOQZirgKhhXv/8hVsI0QpPjR004Z9iFxoJU6in3uGMg==}
+  '@changesets/read@0.6.7':
+    resolution: {integrity: sha512-D1G4AUYGrBEk8vj8MGwf75k9GpN6XL3wg8i42P2jZZwFLXnlr2Pn7r9yuQNbaMCarP7ZQWNJbV6XLeysAIMhTA==}
 
   '@changesets/should-skip-package@0.1.2':
     resolution: {integrity: sha512-qAK/WrqWLNCP22UDdBTMPH5f41elVDlsNyat180A33dWxuUDyNpg6fPi/FyTZwRriVjg0L8gnjJn2F9XAoF0qw==}
@@ -1969,8 +1975,8 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@inquirer/external-editor@1.0.1':
-    resolution: {integrity: sha512-Oau4yL24d2B5IL4ma4UpbQigkVhzPDXLoqy1ggK4gnHg/stmkffJE4oOXHXF3uz0UEpywG68KcyXsyYpA1Re/Q==}
+  '@inquirer/external-editor@1.0.3':
+    resolution: {integrity: sha512-RWbSrDiYmO4LbejWY7ttpxczuwQyZLBUyygsA9Nsv95hpzUWwnNTVQmAq3xuh7vNwCp07UTmE5i11XAEExx4RA==}
     engines: {node: '>=18'}
     peerDependencies:
       '@types/node': '>=18'
@@ -2051,8 +2057,8 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@livekit/protocol@1.43.0':
-    resolution: {integrity: sha512-WCJ97fa4CBqPDh8pzdszOm/2xmelJ3Dx2vjKBlyb9BzmPQx1LjzVciP6uYFFMCMdrq2l1mjFQBXEz8Z20UCkyw==}
+  '@livekit/protocol@1.45.1':
+    resolution: {integrity: sha512-sr6p0TwKofHO5KW6kUzjq4hH2de4Al5scQo824xFnyI1XYo0qQn6fTG+bdr+Uj4EedjYAOqjezwUju5OErVIRA==}
 
   '@livekit/rtc-node-darwin-arm64@0.13.24':
     resolution: {integrity: sha512-gm5xOpGu6Rj/mNU2jEijcGhQGN2GdxV2dNYQm3NCKN7ow0BmMFZvXSCAWOWf+9oTutPXHnrc7EN1mt2v+lfqhA==}
@@ -2966,8 +2972,8 @@ packages:
     resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==}
     engines: {node: '>=10'}
 
-  chardet@2.1.0:
-    resolution: {integrity: sha512-bNFETTG/pM5ryzQ9Ad0lJOTa6HWD/YsScAR3EnCPZRPlQh77JocYktSHOUHelyhm8IARL+o4c4F1bP5KVOjiRA==}
+  chardet@2.1.1:
+    resolution: {integrity: sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==}
 
   check-error@1.0.3:
     resolution: {integrity: sha512-iKEoDYaRmd1mxM90a2OEfWhjsjPpYPuQ+lMYsoxB126+t8fw7ySEO48nmDg5COTjxDI65/Y2OWpeEHk3ZOe8zg==}
@@ -2984,10 +2990,6 @@ packages:
     resolution: {integrity: sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==}
     engines: {node: '>=18'}
 
-  ci-info@3.9.0:
-    resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==}
-    engines: {node: '>=8'}
-
   cjs-module-lexer@1.4.3:
     resolution: {integrity: sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==}
 
@@ -3119,6 +3121,9 @@ packages:
     resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
     engines: {node: '>=6'}
 
+  destr@2.0.5:
+    resolution: {integrity: sha512-ugFTXCtDZunbzasqBxrK93Ik/DRYsO6S/fedkWEMKqt04xZ4csmnmwGDBAb07QWNaGMAmnTIemsYZCksjATwsA==}
+
   detect-indent@6.1.0:
     resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
     engines: {node: '>=8'}
@@ -3764,8 +3769,8 @@ packages:
     resolution: {integrity: sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==}
     engines: {node: '>=16.17.0'}
 
-  iconv-lite@0.6.3:
-    resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
+  iconv-lite@0.7.2:
+    resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
     engines: {node: '>=0.10.0'}
 
   ieee754@1.2.1:
@@ -3983,6 +3988,10 @@ packages:
     resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==}
     hasBin: true
 
+  js-yaml@4.1.1:
+    resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==}
+    hasBin: true
+
   jsesc@2.5.2:
     resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
     engines: {node: '>=4'}
@@ -4250,6 +4259,9 @@ packages:
     engines: {node: '>=10.5.0'}
     deprecated: Use your platform's native DOMException instead
 
+  node-fetch-native@1.6.7:
+    resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
+
   node-fetch@2.7.0:
     resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
     engines: {node: 4.x || >=6.0.0}
@@ -4305,6 +4317,9 @@ packages:
   obug@2.1.1:
     resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==}
 
+  ofetch@1.5.1:
+    resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==}
+
   on-exit-leak-free@2.1.2:
     resolution: {integrity: sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==}
     engines: {node: '>=14.0.0'}
@@ -4721,11 +4736,6 @@ packages:
     engines: {node: '>=10'}
     hasBin: true
 
-  semver@7.6.3:
-    resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==}
-    engines: {node: '>=10'}
-    hasBin: true
-
   semver@7.7.2:
     resolution: {integrity: sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==}
     engines: {node: '>=10'}
@@ -5154,6 +5164,9 @@ packages:
   ufo@1.5.3:
     resolution: {integrity: sha512-Y7HYmWaFwPUmkoQCUIAYpKqkOf+SbVj/2fJJZ4RJMCfZp0rTGwRbzQD+HghfnhKOjL9E01okqz+ncJskGYfBNw==}
 
+  ufo@1.6.3:
+    resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==}
+
   unbox-primitive@1.0.2:
     resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==}
 
@@ -5576,9 +5589,9 @@ snapshots:
 
   '@bufbuild/protobuf@1.10.1': {}
 
-  '@changesets/apply-release-plan@7.0.12':
+  '@changesets/apply-release-plan@7.1.0':
     dependencies:
-      '@changesets/config': 3.1.1
+      '@changesets/config': 3.1.3
       '@changesets/get-version-range-type': 0.4.0
       '@changesets/git': 3.0.4
       '@changesets/should-skip-package': 0.1.2
@@ -5592,7 +5605,7 @@ snapshots:
       resolve-from: 5.0.0
       semver: 7.7.3
 
-  '@changesets/assemble-release-plan@6.0.9(patch_hash=wy7r54pxmuo7fdh5qymk2adrlq)':
+  '@changesets/assemble-release-plan@6.0.9(patch_hash=gqgxtqs6uc7rjtxf53tdm2jjpi)':
     dependencies:
       '@changesets/errors': 0.2.0
       '@changesets/get-dependents-graph': 2.1.3
@@ -5605,44 +5618,43 @@ snapshots:
     dependencies:
       '@changesets/types': 6.1.0
 
-  '@changesets/cli@2.29.6(@types/node@22.15.30)':
+  '@changesets/cli@2.30.0(@types/node@22.15.30)':
     dependencies:
-      '@changesets/apply-release-plan': 7.0.12
-      '@changesets/assemble-release-plan': 6.0.9(patch_hash=wy7r54pxmuo7fdh5qymk2adrlq)
+      '@changesets/apply-release-plan': 7.1.0
+      '@changesets/assemble-release-plan': 6.0.9(patch_hash=gqgxtqs6uc7rjtxf53tdm2jjpi)
       '@changesets/changelog-git': 0.2.1
-      '@changesets/config': 3.1.1
+      '@changesets/config': 3.1.3
       '@changesets/errors': 0.2.0
       '@changesets/get-dependents-graph': 2.1.3
-      '@changesets/get-release-plan': 4.0.13
+      '@changesets/get-release-plan': 4.0.15
       '@changesets/git': 3.0.4
       '@changesets/logger': 0.1.1
       '@changesets/pre': 2.0.2
-      '@changesets/read': 0.6.5
+      '@changesets/read': 0.6.7
       '@changesets/should-skip-package': 0.1.2
       '@changesets/types': 6.1.0
       '@changesets/write': 0.4.0
-      '@inquirer/external-editor': 1.0.1(@types/node@22.15.30)
+      '@inquirer/external-editor': 1.0.3(@types/node@22.15.30)
       '@manypkg/get-packages': 1.1.3
       ansi-colors: 4.1.3
-      ci-info: 3.9.0
       enquirer: 2.4.1
       fs-extra: 7.0.1
       mri: 1.2.0
-      p-limit: 2.3.0
       package-manager-detector: 0.2.11
       picocolors: 1.1.1
       resolve-from: 5.0.0
-      semver: 7.6.3
+      semver: 7.7.3
       spawndamnit: 3.0.1
       term-size: 2.2.1
     transitivePeerDependencies:
       - '@types/node'
 
-  '@changesets/config@3.1.1':
+  '@changesets/config@3.1.3':
     dependencies:
       '@changesets/errors': 0.2.0
       '@changesets/get-dependents-graph': 2.1.3
       '@changesets/logger': 0.1.1
+      '@changesets/should-skip-package': 0.1.2
       '@changesets/types': 6.1.0
       '@manypkg/get-packages': 1.1.3
       fs-extra: 7.0.1
@@ -5666,12 +5678,12 @@ snapshots:
     transitivePeerDependencies:
       - encoding
 
-  '@changesets/get-release-plan@4.0.13':
+  '@changesets/get-release-plan@4.0.15':
     dependencies:
-      '@changesets/assemble-release-plan': 6.0.9(patch_hash=wy7r54pxmuo7fdh5qymk2adrlq)
-      '@changesets/config': 3.1.1
+      '@changesets/assemble-release-plan': 6.0.9(patch_hash=gqgxtqs6uc7rjtxf53tdm2jjpi)
+      '@changesets/config': 3.1.3
       '@changesets/pre': 2.0.2
-      '@changesets/read': 0.6.5
+      '@changesets/read': 0.6.7
       '@changesets/types': 6.1.0
       '@manypkg/get-packages': 1.1.3
 
@@ -5689,10 +5701,10 @@ snapshots:
     dependencies:
       picocolors: 1.1.1
 
-  '@changesets/parse@0.4.1':
+  '@changesets/parse@0.4.3':
     dependencies:
       '@changesets/types': 6.1.0
-      js-yaml: 3.14.1
+      js-yaml: 4.1.1
 
   '@changesets/pre@2.0.2':
     dependencies:
@@ -5701,11 +5713,11 @@ snapshots:
       '@manypkg/get-packages': 1.1.3
       fs-extra: 7.0.1
 
-  '@changesets/read@0.6.5':
+  '@changesets/read@0.6.7':
     dependencies:
       '@changesets/git': 3.0.4
       '@changesets/logger': 0.1.1
-      '@changesets/parse': 0.4.1
+      '@changesets/parse': 0.4.3
       '@changesets/types': 6.1.0
       fs-extra: 7.0.1
       p-filter: 2.1.0
@@ -6222,10 +6234,10 @@ snapshots:
   '@img/sharp-win32-x64@0.34.5':
     optional: true
 
-  '@inquirer/external-editor@1.0.1(@types/node@22.15.30)':
+  '@inquirer/external-editor@1.0.3(@types/node@22.15.30)':
     dependencies:
-      chardet: 2.1.0
-      iconv-lite: 0.6.3
+      chardet: 2.1.1
+      iconv-lite: 0.7.2
     optionalDependencies:
       '@types/node': 22.15.30
 
@@ -6308,7 +6320,7 @@ snapshots:
   '@livekit/noise-cancellation-win32-x64@0.1.9':
     optional: true
 
-  '@livekit/protocol@1.43.0':
+  '@livekit/protocol@1.45.1':
     dependencies:
       '@bufbuild/protobuf': 1.10.1
 
@@ -7382,7 +7394,7 @@ snapshots:
       ansi-styles: 4.3.0
       supports-color: 7.2.0
 
-  chardet@2.1.0: {}
+  chardet@2.1.1: {}
 
   check-error@1.0.3:
     dependencies:
@@ -7396,8 +7408,6 @@ snapshots:
 
   chownr@3.0.0: {}
 
-  ci-info@3.9.0: {}
-
   cjs-module-lexer@1.4.3: {}
 
   color-convert@1.9.3:
@@ -7505,6 +7515,8 @@ snapshots:
 
   dequal@2.0.3: {}
 
+  destr@2.0.5: {}
+
   detect-indent@6.1.0: {}
 
   detect-libc@2.1.2: {}
@@ -8398,7 +8410,7 @@ snapshots:
 
   human-signals@5.0.0: {}
 
-  iconv-lite@0.6.3:
+  iconv-lite@0.7.2:
     dependencies:
       safer-buffer: 2.1.2
 
@@ -8603,6 +8615,10 @@ snapshots:
     dependencies:
       argparse: 2.0.1
 
+  js-yaml@4.1.1:
+    dependencies:
+      argparse: 2.0.1
+
   jsesc@2.5.2: {}
 
   json-bigint@1.0.0:
@@ -8671,14 +8687,14 @@ snapshots:
   livekit-server-sdk@2.13.3:
     dependencies:
       '@bufbuild/protobuf': 1.10.1
-      '@livekit/protocol': 1.43.0
+      '@livekit/protocol': 1.45.1
       camelcase-keys: 9.1.3
       jose: 5.2.4
 
   livekit-server-sdk@2.14.1:
     dependencies:
       '@bufbuild/protobuf': 1.10.1
-      '@livekit/protocol': 1.43.0
+      '@livekit/protocol': 1.45.1
       camelcase-keys: 9.1.3
       jose: 5.2.4
 
@@ -8841,6 +8857,8 @@ snapshots:
 
   node-domexception@1.0.0: {}
 
+  node-fetch-native@1.6.7: {}
+
   node-fetch@2.7.0:
     dependencies:
       whatwg-url: 5.0.0
@@ -8901,6 +8919,12 @@ snapshots:
 
   obug@2.1.1: {}
 
+  ofetch@1.5.1:
+    dependencies:
+      destr: 2.0.5
+      node-fetch-native: 1.6.7
+      ufo: 1.6.3
+
   on-exit-leak-free@2.1.2: {}
 
   once@1.4.0:
@@ -9382,8 +9406,6 @@ snapshots:
     dependencies:
       lru-cache: 6.0.0
 
-  semver@7.6.3: {}
-
   semver@7.7.2: {}
 
   semver@7.7.3: {}
@@ -9892,6 +9914,8 @@ snapshots:
 
   ufo@1.5.3: {}
 
+  ufo@1.6.3: {}
+
   unbox-primitive@1.0.2:
     dependencies:
       call-bind: 1.0.7
diff --git a/turbo.json b/turbo.json
index e044f4fea..4f7d3a3a0 100644
--- a/turbo.json
+++ b/turbo.json
@@ -30,6 +30,7 @@
     "LIVEKIT_API_SECRET",
     "LIVEKIT_INFERENCE_API_KEY",
     "LIVEKIT_INFERENCE_API_SECRET",
+    "LIVEKIT_DEV_MODE",
     "LIVEKIT_INFERENCE_URL",
     "LIVEKIT_URL",
     "LLAMA_API_KEY",