Skip to content

Commit 245bc66

Browse files
authored
Session Usage Collection (#1014)
1 parent 76bd4e8 commit 245bc66

37 files changed

Lines changed: 1308 additions & 81 deletions

.changeset/lucky-grapes-care.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
"@livekit/agents": patch
3+
"@livekit/agents-plugin-cartesia": patch
4+
"@livekit/agents-plugin-deepgram": patch
5+
"@livekit/agents-plugin-google": patch
6+
"@livekit/agents-plugin-openai": patch
7+
"livekit-agents-examples": patch
8+
---
9+
10+
Add granular session models usage stats

agents/src/inference/interruption/defaults.ts

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,37 +14,6 @@ export const SAMPLE_RATE = 16000;
1414
export const FRAMES_PER_SECOND = 40;
1515
export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
1616

17-
/** Default production inference URL */
18-
export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
19-
20-
/** Staging inference URL */
21-
export const STAGING_BASE_URL = 'https://agent-gateway-staging.livekit.cloud/v1';
22-
23-
/**
24-
* Get the default inference URL based on the environment.
25-
*
26-
* Priority:
27-
* 1. LIVEKIT_INFERENCE_URL if set
28-
* 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
29-
* 3. Otherwise, use production gateway
30-
*/
31-
export function getDefaultInferenceUrl(): string {
32-
// Priority 1: LIVEKIT_INFERENCE_URL
33-
const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
34-
if (inferenceUrl) {
35-
return inferenceUrl;
36-
}
37-
38-
// Priority 2: Check LIVEKIT_URL for staging (exact match to Python)
39-
const livekitUrl = process.env.LIVEKIT_URL || '';
40-
if (livekitUrl.includes('.staging.livekit.cloud')) {
41-
return STAGING_BASE_URL;
42-
}
43-
44-
// Priority 3: Default to production
45-
return DEFAULT_BASE_URL;
46-
}
47-
4817
export const apiConnectDefaults: ApiConnectOptions = {
4918
maxRetries: 3,
5019
retryInterval: 2_000,

agents/src/inference/interruption/interruption_detector.ts

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,8 @@
44
import type { TypedEventEmitter } from '@livekit/typed-emitter';
55
import EventEmitter from 'events';
66
import { log } from '../../log.js';
7-
import {
8-
DEFAULT_BASE_URL,
9-
FRAMES_PER_SECOND,
10-
SAMPLE_RATE,
11-
STAGING_BASE_URL,
12-
getDefaultInferenceUrl,
13-
interruptionOptionDefaults,
14-
} from './defaults.js';
7+
import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
8+
import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
159
import type { InterruptionDetectionError } from './errors.js';
1610
import { InterruptionStreamBase } from './interruption_stream.js';
1711
import type { InterruptionEvent, InterruptionOptions } from './types.js';
@@ -56,7 +50,8 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
5650
let useProxy: boolean;
5751

5852
// Use LiveKit credentials if using the inference service (production or staging)
59-
const isInferenceUrl = lkBaseUrl === DEFAULT_BASE_URL || lkBaseUrl === STAGING_BASE_URL;
53+
const isInferenceUrl =
54+
lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;
6055
if (isInferenceUrl) {
6156
lkApiKey =
6257
apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';

agents/src/inference/llm.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ import {
1212
} from '../index.js';
1313
import * as llm from '../llm/index.js';
1414
import type { APIConnectOptions } from '../types.js';
15-
import { type AnyString, createAccessToken } from './utils.js';
16-
17-
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
15+
import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
1816

1917
export type OpenAIModels =
2018
| 'openai/gpt-5.2'
@@ -127,7 +125,7 @@ export class LLM extends llm.LLM {
127125
strictToolSchema = false,
128126
} = opts;
129127

130-
const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
128+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
131129
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
132130
if (!lkApiKey) {
133131
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -163,6 +161,10 @@ export class LLM extends llm.LLM {
163161
return this.opts.model;
164162
}
165163

164+
get provider(): string {
165+
return 'livekit';
166+
}
167+
166168
static fromModelString(modelString: string): LLM {
167169
return new LLM({ model: modelString });
168170
}

agents/src/inference/stt.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import {
2222
type SttTranscriptEvent,
2323
sttServerEventSchema,
2424
} from './api_protos.js';
25-
import { type AnyString, connectWs, createAccessToken } from './utils.js';
25+
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
2626

2727
export type DeepgramModels =
2828
| 'deepgram/flux-general'
@@ -97,7 +97,6 @@ export type STTEncoding = 'pcm_s16le';
9797

9898
const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
9999
const DEFAULT_SAMPLE_RATE = 16000;
100-
const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
101100
const DEFAULT_CANCEL_TIMEOUT = 5000;
102101

103102
export interface InferenceSTTOptions<TModel extends STTModels> {
@@ -143,7 +142,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
143142
modelOptions = {} as STTOptions<TModel>,
144143
} = opts || {};
145144

146-
const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
145+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
147146
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
148147
if (!lkApiKey) {
149148
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -171,6 +170,14 @@ export class STT<TModel extends STTModels> extends BaseSTT {
171170
return 'inference.STT';
172171
}
173172

173+
get model(): string {
174+
return this.opts.model ?? 'auto';
175+
}
176+
177+
get provider(): string {
178+
return 'livekit';
179+
}
180+
174181
static fromModelString(modelString: string): STT<AnyString> {
175182
if (modelString.includes(':')) {
176183
const [model, language] = modelString.split(':') as [AnyString, STTLanguages];

agents/src/inference/tts.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import {
2020
ttsClientEventSchema,
2121
ttsServerEventSchema,
2222
} from './api_protos.js';
23-
import { type AnyString, connectWs, createAccessToken } from './utils.js';
23+
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
2424

2525
export type CartesiaModels =
2626
| 'cartesia/sonic-3'
@@ -94,7 +94,6 @@ type TTSEncoding = 'pcm_s16le';
9494

9595
const DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';
9696
const DEFAULT_SAMPLE_RATE = 16000;
97-
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
9897
const NUM_CHANNELS = 1;
9998
const DEFAULT_LANGUAGE = 'en';
10099

@@ -145,7 +144,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
145144
modelOptions = {} as TTSOptions<TModel>,
146145
} = opts || {};
147146

148-
const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
147+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
149148
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
150149
if (!lkApiKey) {
151150
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -202,6 +201,14 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
202201
return 'inference.TTS';
203202
}
204203

204+
get model(): string {
205+
return this.opts.model ?? 'unknown';
206+
}
207+
208+
get provider(): string {
209+
return 'livekit';
210+
}
211+
205212
static fromModelString(modelString: string): TTS<AnyString> {
206213
if (modelString.includes(':')) {
207214
const [model, voice] = modelString.split(':') as [TTSModels, string];

agents/src/inference/utils.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,34 @@ import { APIConnectionError, APIStatusError } from '../index.js';
77

88
export type AnyString = string & NonNullable<unknown>;
99

10+
/** Default production inference URL */
11+
export const DEFAULT_INFERENCE_URL = 'https://agent-gateway.livekit.cloud/v1';
12+
13+
/** Staging inference URL */
14+
export const STAGING_INFERENCE_URL = 'https://agent-gateway.staging.livekit.cloud/v1';
15+
16+
/**
17+
* Get the default inference URL based on the environment.
18+
*
19+
* Priority:
20+
* 1. LIVEKIT_INFERENCE_URL if set
21+
* 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
22+
* 3. Otherwise, use production gateway
23+
*/
24+
export function getDefaultInferenceUrl(): string {
25+
const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
26+
if (inferenceUrl) {
27+
return inferenceUrl;
28+
}
29+
30+
const livekitUrl = process.env.LIVEKIT_URL || '';
31+
if (livekitUrl.includes('.staging.livekit.cloud')) {
32+
return STAGING_INFERENCE_URL;
33+
}
34+
35+
return DEFAULT_INFERENCE_URL;
36+
}
37+
1038
export async function createAccessToken(
1139
apiKey: string,
1240
apiSecret: string,

agents/src/llm/llm.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ export abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCal
6565
return 'unknown';
6666
}
6767

68+
/**
69+
* Get the provider name for this LLM instance.
70+
*
71+
* @returns The provider name if available, "unknown" otherwise.
72+
*
73+
* @remarks
74+
* Plugins should override this property to provide their provider information.
75+
*/
76+
get provider(): string {
77+
return 'unknown';
78+
}
79+
6880
/**
6981
* Returns a {@link LLMStream} that can be used to push text and receive LLM responses.
7082
*/
@@ -248,6 +260,10 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
248260
}
249261
return (usage?.completionTokens || 0) / (durationMs / 1000);
250262
})(),
263+
metadata: {
264+
modelProvider: this.#llm.provider,
265+
modelName: this.#llm.model,
266+
},
251267
};
252268

253269
if (this.#llmRequestSpan) {

agents/src/llm/realtime.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ export abstract class RealtimeModel {
7272
/** The model name/identifier used by this realtime model */
7373
abstract get model(): string;
7474

75+
get provider(): string {
76+
return 'unknown';
77+
}
78+
7579
abstract session(): RealtimeSession;
7680

7781
abstract close(): Promise<void>;

agents/src/metrics/base.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
//
33
// SPDX-License-Identifier: Apache-2.0
44

5+
export type MetricsMetadata = {
6+
/** The provider name (e.g., 'openai', 'anthropic'). */
7+
modelProvider?: string;
8+
/** The model name (e.g., 'gpt-4o', 'claude-3-5-sonnet'). */
9+
modelName?: string;
10+
};
11+
512
export type AgentMetrics =
613
| STTMetrics
714
| LLMMetrics
@@ -26,6 +33,8 @@ export type LLMMetrics = {
2633
totalTokens: number;
2734
tokensPerSecond: number;
2835
speechId?: string;
36+
/** Metadata for model provider and name tracking. */
37+
metadata?: MetricsMetadata;
2938
};
3039

3140
export type STTMetrics = {
@@ -41,10 +50,16 @@ export type STTMetrics = {
4150
* The duration of the pushed audio in milliseconds.
4251
*/
4352
audioDurationMs: number;
53+
/** Input audio tokens (for token-based billing). */
54+
inputTokens?: number;
55+
/** Output text tokens (for token-based billing). */
56+
outputTokens?: number;
4457
/**
4558
* Whether the STT is streaming (e.g using websocket).
4659
*/
4760
streamed: boolean;
61+
/** Metadata for model provider and name tracking. */
62+
metadata?: MetricsMetadata;
4863
};
4964

5065
export type TTSMetrics = {
@@ -59,10 +74,17 @@ export type TTSMetrics = {
5974
/** Generated audio duration in milliseconds. */
6075
audioDurationMs: number;
6176
cancelled: boolean;
77+
/** Number of characters synthesized (for character-based billing). */
6278
charactersCount: number;
79+
/** Input text tokens (for token-based billing, e.g., OpenAI TTS). */
80+
inputTokens?: number;
81+
/** Output audio tokens (for token-based billing, e.g., OpenAI TTS). */
82+
outputTokens?: number;
6383
streamed: boolean;
6484
segmentId?: string;
6585
speechId?: string;
86+
/** Metadata for model provider and name tracking. */
87+
metadata?: MetricsMetadata;
6688
};
6789

6890
export type VADMetrics = {
@@ -133,6 +155,10 @@ export type RealtimeModelMetrics = {
133155
* The duration of the response from created to done in milliseconds.
134156
*/
135157
durationMs: number;
158+
/**
159+
* The duration of the session connection in milliseconds (for session-based billing like xAI).
160+
*/
161+
sessionDurationMs?: number;
136162
/**
137163
* Time to first audio token in milliseconds. -1 if no audio token was sent.
138164
*/
@@ -165,4 +191,6 @@ export type RealtimeModelMetrics = {
165191
* Details about the output tokens used in the Response.
166192
*/
167193
outputTokenDetails: RealtimeModelMetricsOutputTokenDetails;
194+
/** Metadata for model provider and name tracking. */
195+
metadata?: MetricsMetadata;
168196
};

0 commit comments

Comments
 (0)