From 39afd42732efe7fdca9ec98a663a1732061a199f Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Mon, 1 Jun 2026 15:56:36 +0530 Subject: [PATCH 1/8] feat(google-cloud): add Google Cloud Text-to-Speech plugin Add @livekit/agents-plugin-google-cloud using the @google-cloud/text-to-speech client library. Supports both gRPC bidirectional streaming and REST-based synthesis. The existing google.beta.TTS uses @google/genai (Gemini API) which does not support streaming. This plugin uses the Google Cloud TTS client which supports streaming with Gemini Flash TTS models like gemini-3.1-flash-tts-preview, as well as standard models (Journey, Chirp 3, Standard, WaveNet). Credentials follow the standard Google Cloud auth chain: credentials object -> keyFilename -> GOOGLE_APPLICATION_CREDENTIALS -> ADC. --- .changeset/google-cloud-tts-plugin.md | 10 + plugins/google-cloud/README.md | 60 +++ plugins/google-cloud/api-extractor.json | 8 + plugins/google-cloud/package.json | 51 +++ plugins/google-cloud/src/index.ts | 19 + plugins/google-cloud/src/models.ts | 39 ++ plugins/google-cloud/src/tts.ts | 495 ++++++++++++++++++++++++ plugins/google-cloud/tsconfig.json | 15 + plugins/google-cloud/tsup.config.ts | 7 + pnpm-lock.yaml | 239 ++++++++++++ 10 files changed, 943 insertions(+) create mode 100644 .changeset/google-cloud-tts-plugin.md create mode 100644 plugins/google-cloud/README.md create mode 100644 plugins/google-cloud/api-extractor.json create mode 100644 plugins/google-cloud/package.json create mode 100644 plugins/google-cloud/src/index.ts create mode 100644 plugins/google-cloud/src/models.ts create mode 100644 plugins/google-cloud/src/tts.ts create mode 100644 plugins/google-cloud/tsconfig.json create mode 100644 plugins/google-cloud/tsup.config.ts diff --git a/.changeset/google-cloud-tts-plugin.md b/.changeset/google-cloud-tts-plugin.md new file mode 100644 index 000000000..ee3be6fb8 --- /dev/null +++ b/.changeset/google-cloud-tts-plugin.md @@ -0,0 +1,10 @@ +--- +"@livekit/agents-plugin-google-cloud": patch +--- + +feat: add Google Cloud Text-to-Speech plugin + +Ports the Google Cloud TTS integration from the Python SDK, supporting both +streaming (via gRPC bidirectional streaming) and non-streaming synthesis. +Uses @google-cloud/text-to-speech client library with credentials from +GOOGLE_TTS_CREDENTIALS_JSON env var or Application Default Credentials. diff --git a/plugins/google-cloud/README.md b/plugins/google-cloud/README.md new file mode 100644 index 000000000..8eb27c361 --- /dev/null +++ b/plugins/google-cloud/README.md @@ -0,0 +1,60 @@ + + +# Google Cloud plugin for LiveKit Agents + +The Agents Framework is designed for building realtime, programmable +participants that run on servers. Use it to create conversational, multi-modal +voice agents that can see, hear, and understand. + +This package contains the Google Cloud plugin, which provides text-to-speech +via the [Google Cloud Text-to-Speech API](https://cloud.google.com/text-to-speech). + +## Installation + +```bash +pnpm add @livekit/agents-plugin-google-cloud +``` + +## Authentication + +Credentials are resolved by the underlying `@google-cloud/text-to-speech` client in order: + +1. `credentials` object passed directly (`{ client_email, private_key }`) +2. `keyFilename` path to a service account JSON key file +3. `GOOGLE_APPLICATION_CREDENTIALS` environment variable +4. Application Default Credentials (auto-detected by `gcloud auth`) + +## Usage + +```typescript +import { TTS } from '@livekit/agents-plugin-google-cloud'; + +// Streaming synthesis (gRPC, default) +const tts = new TTS({ + language: 'en-US', + voiceName: 'en-US-Standard-H', +}); + +// Non-streaming synthesis (REST) +const tts = new TTS({ + language: 'en-IN', + voiceName: 'en-IN-Standard-C', + streaming: false, +}); + +// Streaming synthesis +const stream = tts.stream(); +stream.pushText('Hello, world!'); +stream.flush(); +for await (const event of stream) { + // event.frame contains AudioFrame data +} +``` + +## License + +Apache 2.0 diff --git a/plugins/google-cloud/api-extractor.json b/plugins/google-cloud/api-extractor.json new file mode 100644 index 000000000..baa041649 --- /dev/null +++ b/plugins/google-cloud/api-extractor.json @@ -0,0 +1,8 @@ +/** + * Config file for API Extractor. For more info, please visit: https://api-extractor.com + */ +{ + "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", + "extends": "../../api-extractor-shared.json", + "mainEntryPointFilePath": "./dist/index.d.ts" +} diff --git a/plugins/google-cloud/package.json b/plugins/google-cloud/package.json new file mode 100644 index 000000000..86e3989bc --- /dev/null +++ b/plugins/google-cloud/package.json @@ -0,0 +1,51 @@ +{ + "name": "@livekit/agents-plugin-google-cloud", + "version": "1.4.4", + "description": "Google Cloud TTS plugin for LiveKit Node Agents", + "main": "dist/index.js", + "require": "dist/index.cjs", + "types": "dist/index.d.ts", + "exports": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + }, + "author": "LiveKit", + "type": "module", + "repository": "git@github.com:livekit/agents-js.git", + "license": "Apache-2.0", + "files": [ + "dist", + "src", + "README.md" + ], + "scripts": { + "build": "tsup --onSuccess \"pnpm build:types\"", + "build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js", + "clean": "rm -rf dist", + "clean:build": "pnpm clean && pnpm build", + "lint": "eslint -f unix \"src/**/*.{ts,js}\"", + "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", + "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" + }, + "devDependencies": { + "@livekit/agents": "workspace:*", + "@livekit/agents-plugins-test": "workspace:*", + "@livekit/rtc-node": "catalog:", + "@microsoft/api-extractor": "^7.35.0", + "tsup": "^8.3.5", + "typescript": "^5.0.0" + }, + "dependencies": { + "@google-cloud/text-to-speech": "^6.0.0" + }, + "peerDependencies": { + "@livekit/agents": "workspace:*", + "@livekit/rtc-node": "catalog:" + } +} diff --git a/plugins/google-cloud/src/index.ts b/plugins/google-cloud/src/index.ts new file mode 100644 index 000000000..cb12e7fbb --- /dev/null +++ b/plugins/google-cloud/src/index.ts @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { Plugin } from '@livekit/agents'; + +export * from './models.js'; +export { ChunkedStream, SynthesizeStream, TTS, type TTSOptions } from './tts.js'; + +class GoogleCloudPlugin extends Plugin { + constructor() { + super({ + title: 'google-cloud', + version: __PACKAGE_VERSION__, + package: __PACKAGE_NAME__, + }); + } +} + +Plugin.registerPlugin(new GoogleCloudPlugin()); diff --git a/plugins/google-cloud/src/models.ts b/plugins/google-cloud/src/models.ts new file mode 100644 index 000000000..0399eefc7 --- /dev/null +++ b/plugins/google-cloud/src/models.ts @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +/** Google Cloud TTS models. */ +export type TTSModel = + | 'chirp-3' + | 'chirp-3-hd' + | 'journey' + | 'journey-2' + | 'chirp-2-hd' + | 'chirp-2-lq' + | string; + +/** Google Cloud TTS voice genders. */ +export type TTSGender = 'male' | 'female' | 'neutral'; + +/** Speech language codes (BCP-47). */ +export type TTSLanguage = + | 'en-US' + | 'en-GB' + | 'en-AU' + | 'en-IN' + | 'hi-IN' + | 'bn-IN' + | 'ta-IN' + | 'te-IN' + | 'mr-IN' + | 'gu-IN' + | 'kn-IN' + | 'ml-IN' + | 'pa-IN' + | string; + +/** Audio encoding formats for non-streaming synthesis. */ +export type TTSAudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'MULAW' | 'ALAW'; + +/** Default voice name used when none is specified. */ +export const DEFAULT_VOICE_NAME = 'en-US-Standard-H'; diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts new file mode 100644 index 000000000..12cdb132d --- /dev/null +++ b/plugins/google-cloud/src/tts.ts @@ -0,0 +1,495 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { TextToSpeechClient } from '@google-cloud/text-to-speech'; +import type { protos } from '@google-cloud/text-to-speech'; +import { + type APIConnectOptions, + APIConnectionError, + APIStatusError, + AudioByteStream, + shortuuid, + tokenize, + tts, +} from '@livekit/agents'; +import type { AudioFrame } from '@livekit/rtc-node'; +import { once } from 'node:events'; +import type { TTSGender, TTSLanguage, TTSModel } from './models.js'; + +const NUM_CHANNELS = 1; +const DEFAULT_SAMPLE_RATE = 24000; + +type GaxClientOptions = NonNullable[0]>; +type SynthesizeSpeechRequest = protos.google.cloud.texttospeech.v1.ISynthesizeSpeechRequest; +type StreamingSynthesizeRequest = protos.google.cloud.texttospeech.v1.IStreamingSynthesizeRequest; +type StreamingSynthesizeResponse = protos.google.cloud.texttospeech.v1.StreamingSynthesizeResponse; +type GoogleStreamingCall = ReturnType; + +// --------------------------------------------------------------------------- +// Options +// --------------------------------------------------------------------------- + +/** Configuration options for the Google Cloud TTS plugin. */ +export interface TTSOptions { + /** Model name (e.g. `journey`, `chirp-3-hd`). */ + modelName?: TTSModel | string; + /** Voice name (e.g. `en-US-Standard-H`). */ + voiceName?: TTSLanguage | string; + /** Language code (BCP-47, e.g. `en-US`). */ + language?: TTSLanguage | string; + /** Voice gender. */ + gender?: TTSGender; + /** Output sample rate in Hz. Default: 24000. */ + sampleRate?: number; + /** + * Whether to use gRPC bidirectional streaming for `stream()`. + * Set to `false` to prefer non-streaming REST synthesis. + * Default: `true`. + */ + streaming?: boolean; + /** + * Google Cloud service account credentials object. + * Must include `client_email` and `private_key`. + */ + credentials?: GaxClientOptions['credentials']; + /** + * Path to a Google Cloud service account JSON key file. + * Falls back to `GOOGLE_APPLICATION_CREDENTIALS` environment variable. + */ + keyFilename?: string; +} + +interface ResolvedTTSOptions { + modelName: TTSModel | string; + voiceName: TTSLanguage | string; + language: TTSLanguage | string; + sampleRate: number; + streaming: boolean; +} + +// --------------------------------------------------------------------------- +// TTS +// --------------------------------------------------------------------------- + +export class TTS extends tts.TTS { + readonly label = 'google-cloud.TTS'; + #opts: ResolvedTTSOptions; + #client: TextToSpeechClient; + + constructor(opts: TTSOptions = {}) { + const sampleRate = opts.sampleRate ?? DEFAULT_SAMPLE_RATE; + const streaming = opts.streaming ?? true; + + super(sampleRate, NUM_CHANNELS, { streaming }); + + this.#opts = { + modelName: opts.modelName ?? 'journey', + voiceName: opts.voiceName ?? 'en-US-Standard-H', + language: opts.language ?? 'en-US', + sampleRate, + streaming, + }; + + const gender = opts.gender; + if (gender) { + this.#opts.voiceName = buildVoiceName(this.#opts.language, gender); + } + + const clientOptions: GaxClientOptions = {}; + if (opts.credentials) { + clientOptions.credentials = opts.credentials; + } + if (opts.keyFilename) { + clientOptions.keyFilename = opts.keyFilename; + } + + this.#client = new TextToSpeechClient(clientOptions); + } + + get model(): string { + return this.#opts.modelName; + } + + get provider(): string { + return 'google-cloud'; + } + + synthesize( + text: string, + connOptions?: APIConnectOptions, + abortSignal?: AbortSignal, + ): ChunkedStream { + return new ChunkedStream(text, this, connOptions, abortSignal); + } + + stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream { + if (!this.capabilities.streaming) { + throw new Error( + 'Google Cloud TTS streaming is disabled (`streaming: false`). Use synthesize() for REST synthesis.', + ); + } + return new SynthesizeStream(this, options?.connOptions); + } + + get opts() { + return this.#opts; + } + + get client() { + return this.#client; + } + + async close(): Promise { + await this.#client.close(); + } +} + +// --------------------------------------------------------------------------- +// Streaming synthesis +// --------------------------------------------------------------------------- + +export class SynthesizeStream extends tts.SynthesizeStream { + readonly label = 'google-cloud.SynthesizeStream'; + #tts: TTS; + #tokenizer: tokenize.SentenceStream; + + constructor(ttsProvider: TTS, connOptions?: APIConnectOptions) { + super(ttsProvider, connOptions); + this.#tts = ttsProvider; + this.#tokenizer = new tokenize.basic.SentenceTokenizer({ + language: ttsProvider.opts.language, + }).stream(); + } + + protected async run(): Promise { + const requestId = shortuuid(); + const call = this.#tts.client.streamingSynthesize(); + + await writeStreamingRequest(call, { + streamingConfig: { + voice: { + languageCode: this.#tts.opts.language, + name: this.#tts.opts.voiceName, + modelName: this.#tts.opts.modelName, + }, + streamingAudioConfig: { + audioEncoding: 1 /* PCM */, + sampleRateHertz: this.#tts.opts.sampleRate, + }, + }, + }); + + const abort = () => { + try { + call.cancel(); + } catch { + call.destroy(); + } + }; + this.abortSignal.addEventListener('abort', abort, { once: true }); + + try { + await Promise.all([ + this.#tokenizeInput(), + this.#sendText(call), + this.#receiveAudio(call, requestId), + ]); + } catch (error: unknown) { + if (this.abortSignal.aborted) { + return; + } + + throw toLiveKitTtsError(error); + } finally { + this.abortSignal.removeEventListener('abort', abort); + this.#tokenizer.close(); + call.destroy(); + } + } + + async #tokenizeInput(): Promise { + try { + for await (const data of this.input) { + if (data === SynthesizeStream.FLUSH_SENTINEL) { + this.#tokenizer.flush(); + continue; + } + + this.#tokenizer.pushText(data); + } + + this.#tokenizer.endInput(); + } catch { + // Stream shutdown can close tokenizer/input concurrently. + } + } + + async #sendText(call: GoogleStreamingCall): Promise { + for await (const event of this.#tokenizer) { + if (this.abortSignal.aborted) { + break; + } + + await writeStreamingRequest(call, { + input: { + text: event.token, + }, + }); + } + + call.end(); + } + + async #receiveAudio(call: GoogleStreamingCall, requestId: string): Promise { + const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels); + let lastFrame: AudioFrame | undefined; + + const sendLastFrame = (final: boolean) => { + if (!lastFrame || this.queue.closed) { + return; + } + + this.queue.put({ + requestId, + segmentId: requestId, + frame: lastFrame, + final, + }); + lastFrame = undefined; + }; + + await new Promise((resolve, reject) => { + call.on('data', (response: StreamingSynthesizeResponse) => { + const audioContent = response.audioContent; + if (!audioContent) { + return; + } + + const audioBuffer = + typeof audioContent === 'string' + ? Buffer.from(audioContent, 'base64') + : Buffer.from(audioContent); + + const audioData = extractArrayBuffer(audioBuffer); + for (const frame of bstream.write(audioData)) { + sendLastFrame(false); + lastFrame = frame; + } + }); + + call.once('end', () => { + for (const frame of bstream.flush()) { + sendLastFrame(false); + lastFrame = frame; + } + sendLastFrame(true); + + if (!this.queue.closed) { + this.queue.put(tts.SynthesizeStream.END_OF_STREAM); + } + resolve(); + }); + + call.once('error', (error) => { + reject(error); + }); + }); + } +} + +// --------------------------------------------------------------------------- +// Non-streaming (one-shot) synthesis +// --------------------------------------------------------------------------- + +export class ChunkedStream extends tts.ChunkedStream { + readonly label = 'google-cloud.ChunkedStream'; + #tts: TTS; + + constructor( + inputText: string, + ttsProvider: TTS, + connOptions?: APIConnectOptions, + abortSignal?: AbortSignal, + ) { + super(inputText, ttsProvider, connOptions, abortSignal); + this.#tts = ttsProvider; + } + + protected async run() { + const requestId = shortuuid(); + const request: SynthesizeSpeechRequest = { + input: { + text: this.inputText, + }, + voice: { + languageCode: this.#tts.opts.language, + name: this.#tts.opts.voiceName, + modelName: this.#tts.opts.modelName, + }, + audioConfig: { + audioEncoding: 1 /* LINEAR16 */, + sampleRateHertz: this.#tts.opts.sampleRate, + }, + }; + + try { + const [response] = await this.#tts.client.synthesizeSpeech(request, { + otherArgs: { + headers: { + 'x-goog-request-params': `voice.language_code=${encodeURIComponent( + this.#tts.opts.language, + )}`, + }, + }, + }); + + if (this.abortSignal.aborted) { + return; + } + + const audioContent = response.audioContent; + if (!audioContent) { + throw new APIConnectionError({ + message: 'Google Cloud TTS returned empty audio', + options: { retryable: true }, + }); + } + + const audioBuffer = + typeof audioContent === 'string' + ? Buffer.from(audioContent, 'base64') + : Buffer.from(audioContent); + + const pcmAudio = extractLinear16Pcm(audioBuffer); + const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels); + const frames = [...bstream.write(extractArrayBuffer(pcmAudio)), ...bstream.flush()]; + + if (frames.length === 0) { + throw new APIConnectionError({ + message: 'Google Cloud TTS returned audio but no playable PCM frames', + options: { retryable: true }, + }); + } + + let lastFrame: AudioFrame | undefined; + const sendLastFrame = (final: boolean) => { + if (!lastFrame) { + return; + } + + this.queue.put({ + requestId, + segmentId: requestId, + frame: lastFrame, + final, + }); + lastFrame = undefined; + }; + + for (const frame of frames) { + sendLastFrame(false); + lastFrame = frame; + } + sendLastFrame(true); + } catch (error: unknown) { + if (error instanceof Error && error.name === 'AbortError') { + return; + } + + throw toLiveKitTtsError(error); + } finally { + this.queue.close(); + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function buildVoiceName(language: string, gender: TTSGender): string { + // Map gender to the Standard voice suffix + const suffix = gender === 'male' ? 'B' : gender === 'female' ? 'C' : 'A'; + return `${language}-Standard-${suffix}`; +} + +async function writeStreamingRequest( + call: GoogleStreamingCall, + request: StreamingSynthesizeRequest, +): Promise { + if (call.write(request)) { + return; + } + + await once(call, 'drain'); +} + +function extractArrayBuffer(buf: Buffer): ArrayBuffer { + return new Uint8Array(buf).buffer as ArrayBuffer; +} + +function extractLinear16Pcm(audioBuffer: Buffer): Buffer { + if ( + audioBuffer.length < 12 || + audioBuffer.toString('ascii', 0, 4) !== 'RIFF' || + audioBuffer.toString('ascii', 8, 12) !== 'WAVE' + ) { + return audioBuffer; + } + + let offset = 12; + while (offset + 8 <= audioBuffer.length) { + const chunkId = audioBuffer.toString('ascii', offset, offset + 4); + const chunkSize = audioBuffer.readUInt32LE(offset + 4); + const dataStart = offset + 8; + const dataEnd = dataStart + chunkSize; + + if (dataEnd > audioBuffer.length) { + break; + } + + if (chunkId === 'data') { + return audioBuffer.subarray(dataStart, dataEnd); + } + + offset = dataEnd + (chunkSize % 2); + } + + throw new APIConnectionError({ + message: 'Google Cloud TTS returned LINEAR16 audio without a WAV data chunk', + options: { retryable: true }, + }); +} + +function toLiveKitTtsError(error: unknown): Error { + const maybeGoogleError = error as { + code?: number; + message?: string; + details?: string; + }; + + if (typeof maybeGoogleError.code === 'number') { + const retryable = + maybeGoogleError.code === 8 || + maybeGoogleError.code === 10 || + maybeGoogleError.code === 13 || + maybeGoogleError.code === 14; + + return new APIStatusError({ + message: `Google Cloud TTS error (${maybeGoogleError.code}): ${ + maybeGoogleError.message ?? maybeGoogleError.details ?? 'unknown error' + }`, + options: { + statusCode: maybeGoogleError.code, + retryable, + }, + }); + } + + return new APIConnectionError({ + message: `Google Cloud TTS connection error: ${ + error instanceof Error ? error.message : 'unknown error' + }`, + options: { retryable: true }, + }); +} diff --git a/plugins/google-cloud/tsconfig.json b/plugins/google-cloud/tsconfig.json new file mode 100644 index 000000000..babf62d11 --- /dev/null +++ b/plugins/google-cloud/tsconfig.json @@ -0,0 +1,15 @@ +{ + "extends": "../../tsconfig.json", + "include": ["./src"], + "compilerOptions": { + "rootDir": "./src", + "declarationDir": "./dist", + "outDir": "./dist" + }, + "typedocOptions": { + "name": "plugins/agents-plugin-google-cloud", + "entryPointStrategy": "resolve", + "readme": "none", + "entryPoints": ["src/index.ts"] + } +} diff --git a/plugins/google-cloud/tsup.config.ts b/plugins/google-cloud/tsup.config.ts new file mode 100644 index 000000000..8ca20961f --- /dev/null +++ b/plugins/google-cloud/tsup.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'tsup'; + +import defaults from '../../tsup.config'; + +export default defineConfig({ + ...defaults, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 60f50e766..3ea16be67 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -722,6 +722,31 @@ importers: specifier: ^5.0.0 version: 5.9.3 + plugins/google-cloud: + dependencies: + '@google-cloud/text-to-speech': + specifier: ^6.0.0 + version: 6.4.1 + devDependencies: + '@livekit/agents': + specifier: workspace:* + version: link:../../agents + '@livekit/agents-plugins-test': + specifier: workspace:* + version: link:../test + '@livekit/rtc-node': + specifier: 'catalog:' + version: 0.13.29 + '@microsoft/api-extractor': + specifier: ^7.35.0 + version: 7.43.7(@types/node@25.6.0) + tsup: + specifier: ^8.3.5 + version: 8.4.0(@microsoft/api-extractor@7.43.7(@types/node@25.6.0))(postcss@8.5.9)(tsx@4.21.0)(typescript@5.9.3) + typescript: + specifier: ^5.0.0 + version: 5.9.3 + plugins/hedra: dependencies: livekit-server-sdk: @@ -2004,6 +2029,10 @@ packages: cpu: [x64] os: [win32] + '@google-cloud/text-to-speech@6.4.1': + resolution: {integrity: sha512-iF1SpBPbP019zoLYzIJXp/yDumrSNl19T7hXP4Lg8d2cnNtxoQKQuNOpiwFrxEKV3CBJpp7OY5+z7/K73zNr5w==} + engines: {node: '>=18'} + '@google/genai@1.50.1': resolution: {integrity: sha512-YbkX7H9+1Pt8wOt7DDREy8XSoiL6fRDzZQRyaVBarFf8MR3zHGqVdvM4cLbDXqPhxqvegZShgfxb8kw9C7YhAQ==} engines: {node: '>=20.0.0'} @@ -2013,6 +2042,15 @@ packages: '@modelcontextprotocol/sdk': optional: true + '@grpc/grpc-js@1.14.4': + resolution: {integrity: sha512-k9Dj3DV/itK9D06Y8f190Qgop7/Ui+D0njFV3LHMPwPT75DpXLQohE9Wmz0QElrJnzsjB7KPWiKJbOl7IPDArQ==} + engines: {node: '>=12.10.0'} + + '@grpc/proto-loader@0.8.1': + resolution: {integrity: sha512-wtF6h+DY6M3YaDBPAmvuuA6jV8Sif9MjtOI5euKFWRgCDl5PeDpPsHR9u2l6St5ceY8AZgoNDww5+HvEsXFsGg==} + engines: {node: '>=6'} + hasBin: true + '@huggingface/hub@2.4.1': resolution: {integrity: sha512-g/EJG091aIdP1whpSjhqBOL25/m60NKXhYGz3wqp7hLX57r4Fx7QVFfXRbtxI0ZMQjLQV3GYrPtldz38mvOr+w==} engines: {node: '>=18'} @@ -2216,6 +2254,9 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@js-sdsl/ordered-map@4.4.2': + resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} + '@livekit/changesets-changelog-github@0.0.4': resolution: {integrity: sha512-MXaiLYwgkYciZb8G2wkVtZ1pJJzZmVx5cM30Q+ClslrIYyAqQhRbPmZDM79/5CGxb1MTemR/tfOM25tgJgAK0g==} @@ -3212,6 +3253,10 @@ packages: cjs-module-lexer@1.4.3: resolution: {integrity: sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==} + cliui@8.0.1: + resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} + engines: {node: '>=12'} + color-convert@1.9.3: resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==} @@ -3382,6 +3427,9 @@ packages: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} + duplexify@4.1.3: + resolution: {integrity: sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==} + eastasianwidth@0.2.0: resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==} @@ -3457,6 +3505,10 @@ packages: engines: {node: '>=18'} hasBin: true + escalade@3.2.0: + resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} + engines: {node: '>=6'} + escape-string-regexp@1.0.5: resolution: {integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==} engines: {node: '>=0.8.0'} @@ -3795,6 +3847,10 @@ packages: resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==} engines: {node: '>=18'} + get-caller-file@2.0.5: + resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} + engines: {node: 6.* || 8.* || >= 10.*} + get-func-name@2.0.2: resolution: {integrity: sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ==} @@ -3875,6 +3931,10 @@ packages: resolution: {integrity: sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==} engines: {node: '>=18'} + google-gax@5.0.6: + resolution: {integrity: sha512-1kGbqVQBZPAAu4+/R1XxPQKP0ydbNYoLAr4l0ZO2bMV0kLyLW4I1gAk++qBLWt7DPORTzmWRMsCZe86gDjShJA==} + engines: {node: '>=18'} + google-logging-utils@1.1.3: resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==} engines: {node: '>=14'} @@ -3932,6 +3992,10 @@ packages: html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -4252,6 +4316,9 @@ packages: resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==} engines: {node: '>=10'} + lodash.camelcase@4.3.0: + resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} + lodash.get@4.4.2: resolution: {integrity: sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==} deprecated: This package is deprecated. Use the optional chaining (?.) operator instead. @@ -4431,6 +4498,10 @@ packages: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} + object-hash@3.0.0: + resolution: {integrity: sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==} + engines: {node: '>= 6'} + object-inspect@1.13.1: resolution: {integrity: sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==} @@ -4723,6 +4794,10 @@ packages: prop-types@15.8.1: resolution: {integrity: sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==} + proto3-json-serializer@3.0.4: + resolution: {integrity: sha512-E1sbAYg3aEbXrq0n1ojJkRHQJGE1kaE/O6GLA94y8rnJBfgvOPTOd1b9hOceQK1FFZI9qMh1vBERCyO2ifubcw==} + engines: {node: '>=18'} + protobufjs@7.5.4: resolution: {integrity: sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==} engines: {node: '>=12.0.0'} @@ -4761,6 +4836,10 @@ packages: resolution: {integrity: sha512-VIMnQi/Z4HT2Fxuwg5KrY174U1VdUIASQVWXXyqtNRtxSr9IYkn1rsI6Tb6HsrHCmB7gVpNwX6JxPTHcH6IoTA==} engines: {node: '>=6'} + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + readable-stream@4.5.2: resolution: {integrity: sha512-yjavECdqeZ3GLXNgRXgeQEdz9fvDDkNKyHnbHRFtOr7/LcfgBcmct7t/ET+HaCTqfh06OzoAxrkN/IfjJBVe+g==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} @@ -4784,6 +4863,10 @@ packages: resolution: {integrity: sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw==} engines: {node: '>= 0.4'} + require-directory@2.1.1: + resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} + engines: {node: '>=0.10.0'} + require-in-the-middle@7.5.2: resolution: {integrity: sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ==} engines: {node: '>=8.6.0'} @@ -4810,6 +4893,10 @@ packages: resolution: {integrity: sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==} hasBin: true + retry-request@8.0.2: + resolution: {integrity: sha512-JzFPAfklk1kjR1w76f0QOIhoDkNkSqW8wYKT08n9yysTmZfB+RQ2QoXoTAeOi1HD9ZipTyTAZg3c4pM/jeqgSw==} + engines: {node: '>=18'} + retry@0.13.1: resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} engines: {node: '>= 4'} @@ -4823,6 +4910,10 @@ packages: deprecated: Rimraf versions prior to v4 are no longer supported hasBin: true + rimraf@5.0.10: + resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==} + hasBin: true + roarr@2.15.4: resolution: {integrity: sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==} engines: {node: '>=8.0'} @@ -4960,6 +5051,12 @@ packages: std-env@3.10.0: resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==} + stream-events@1.0.5: + resolution: {integrity: sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==} + + stream-shift@1.0.3: + resolution: {integrity: sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==} + string-argv@0.3.2: resolution: {integrity: sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q==} engines: {node: '>=0.6.19'} @@ -5013,6 +5110,9 @@ packages: strip-literal@2.1.0: resolution: {integrity: sha512-Op+UycaUt/8FbN/Z2TWPBLge3jWrP3xj10f3fnYxf052bKuS3EKs1ZQcVGjnEMdsNVAM+plXRdmjrZ/KgG3Skw==} + stubs@3.0.0: + resolution: {integrity: sha512-PdHt7hHUJKxvTCgbKX9C1V/ftOcjJQgz8BZwNfV5c4B6dcGqlpelTbJ999jBGZ2jYiPAwcX5dP6oBwVlBlUbxw==} + sucrase@3.35.0: resolution: {integrity: sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==} engines: {node: '>=16 || 14 >=14.17'} @@ -5046,6 +5146,10 @@ packages: resolution: {integrity: sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==} engines: {node: '>=6'} + teeny-request@10.1.2: + resolution: {integrity: sha512-Xj0ZAQ0CeuQn6UxCDPLbFRlgcSTUEyO3+wiepr2grjIjyL/lMMs1Z4OwXn8kLvn/V1OuaEP0UY7Na6UDNNsYrQ==} + engines: {node: '>=18'} + term-size@2.2.1: resolution: {integrity: sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==} engines: {node: '>=8'} @@ -5270,6 +5374,9 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + validator@13.12.0: resolution: {integrity: sha512-c1Q0mCiPlgdTVVVIJIrBuxNicYE+t/7oKeI9MWLj3fh/uq2Pxh/3eeWbVZ4OcGW1TUf53At0njHw5SMdA3tmMg==} engines: {node: '>= 0.10'} @@ -5524,9 +5631,21 @@ packages: utf-8-validate: optional: true + y18n@5.0.8: + resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} + engines: {node: '>=10'} + yallist@4.0.0: resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} + yargs-parser@21.1.1: + resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} + engines: {node: '>=12'} + + yargs@17.7.2: + resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==} + engines: {node: '>=12'} + yocto-queue@0.1.0: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} @@ -6084,6 +6203,12 @@ snapshots: '@ffmpeg-installer/win32-x64@4.1.0': optional: true + '@google-cloud/text-to-speech@6.4.1': + dependencies: + google-gax: 5.0.6 + transitivePeerDependencies: + - supports-color + '@google/genai@1.50.1': dependencies: google-auth-library: 10.6.2 @@ -6095,6 +6220,18 @@ snapshots: - supports-color - utf-8-validate + '@grpc/grpc-js@1.14.4': + dependencies: + '@grpc/proto-loader': 0.8.1 + '@js-sdsl/ordered-map': 4.4.2 + + '@grpc/proto-loader@0.8.1': + dependencies: + lodash.camelcase: 4.3.0 + long: 5.3.2 + protobufjs: 7.5.6 + yargs: 17.7.2 + '@huggingface/hub@2.4.1': dependencies: '@huggingface/tasks': 0.19.36 @@ -6258,6 +6395,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@js-sdsl/ordered-map@4.4.2': {} + '@livekit/changesets-changelog-github@0.0.4': dependencies: '@changesets/get-github-info': 0.5.2 @@ -7419,6 +7558,12 @@ snapshots: cjs-module-lexer@1.4.3: {} + cliui@8.0.1: + dependencies: + string-width: 4.2.3 + strip-ansi: 6.0.1 + wrap-ansi: 7.0.0 + color-convert@1.9.3: dependencies: color-name: 1.1.3 @@ -7552,6 +7697,13 @@ snapshots: es-errors: 1.3.0 gopd: 1.2.0 + duplexify@4.1.3: + dependencies: + end-of-stream: 1.4.4 + inherits: 2.0.4 + readable-stream: 3.6.2 + stream-shift: 1.0.3 + eastasianwidth@0.2.0: {} ecdsa-sig-formatter@1.0.11: @@ -7754,6 +7906,8 @@ snapshots: '@esbuild/win32-ia32': 0.27.7 '@esbuild/win32-x64': 0.27.7 + escalade@3.2.0: {} + escape-string-regexp@1.0.5: {} escape-string-regexp@4.0.0: {} @@ -8186,6 +8340,8 @@ snapshots: transitivePeerDependencies: - supports-color + get-caller-file@2.0.5: {} + get-func-name@2.0.2: {} get-intrinsic@1.3.0: @@ -8302,6 +8458,22 @@ snapshots: transitivePeerDependencies: - supports-color + google-gax@5.0.6: + dependencies: + '@grpc/grpc-js': 1.14.4 + '@grpc/proto-loader': 0.8.1 + duplexify: 4.1.3 + google-auth-library: 10.6.2 + google-logging-utils: 1.1.3 + node-fetch: 3.3.2 + object-hash: 3.0.0 + proto3-json-serializer: 3.0.4 + protobufjs: 7.5.6 + retry-request: 8.0.2 + rimraf: 5.0.10 + transitivePeerDependencies: + - supports-color + google-logging-utils@1.1.3: {} gopd@1.2.0: {} @@ -8340,6 +8512,13 @@ snapshots: html-escaper@2.0.2: {} + http-proxy-agent@7.0.2: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.4 @@ -8643,6 +8822,8 @@ snapshots: dependencies: p-locate: 5.0.0 + lodash.camelcase@4.3.0: {} + lodash.get@4.4.2: {} lodash.isequal@4.5.0: {} @@ -8787,6 +8968,8 @@ snapshots: object-assign@4.1.1: {} + object-hash@3.0.0: {} + object-inspect@1.13.1: {} object-keys@1.1.1: {} @@ -9106,6 +9289,10 @@ snapshots: object-assign: 4.1.1 react-is: 16.13.1 + proto3-json-serializer@3.0.4: + dependencies: + protobufjs: 7.5.6 + protobufjs@7.5.4: dependencies: '@protobufjs/aspromise': 1.1.2 @@ -9162,6 +9349,12 @@ snapshots: pify: 4.0.1 strip-bom: 3.0.0 + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.3.0 + util-deprecate: 1.0.2 + readable-stream@4.5.2: dependencies: abort-controller: 3.0.0 @@ -9193,6 +9386,8 @@ snapshots: es-errors: 1.3.0 set-function-name: 2.0.2 + require-directory@2.1.1: {} + require-in-the-middle@7.5.2: dependencies: debug: 4.4.1 @@ -9224,6 +9419,13 @@ snapshots: path-parse: 1.0.7 supports-preserve-symlinks-flag: 1.0.0 + retry-request@8.0.2: + dependencies: + extend: 3.0.2 + teeny-request: 10.1.2 + transitivePeerDependencies: + - supports-color + retry@0.13.1: {} reusify@1.0.4: {} @@ -9232,6 +9434,10 @@ snapshots: dependencies: glob: 7.2.3 + rimraf@5.0.10: + dependencies: + glob: 10.4.5 + roarr@2.15.4: dependencies: boolean: 3.2.0 @@ -9419,6 +9625,12 @@ snapshots: std-env@3.10.0: {} + stream-events@1.0.5: + dependencies: + stubs: 3.0.0 + + stream-shift@1.0.3: {} + string-argv@0.3.2: {} string-width@4.2.3: @@ -9489,6 +9701,8 @@ snapshots: dependencies: js-tokens: 9.0.1 + stubs@3.0.0: {} + sucrase@3.35.0: dependencies: '@jridgewell/gen-mapping': 0.3.5 @@ -9522,6 +9736,15 @@ snapshots: tapable@2.2.1: {} + teeny-request@10.1.2: + dependencies: + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + stream-events: 1.0.5 + transitivePeerDependencies: + - supports-color + term-size@2.2.1: {} text-table@0.2.0: {} @@ -9762,6 +9985,8 @@ snapshots: dependencies: punycode: 2.3.1 + util-deprecate@1.0.2: {} + validator@13.12.0: {} vite-node@1.6.0(@types/node@22.19.1): @@ -10084,8 +10309,22 @@ snapshots: ws@8.20.1: {} + y18n@5.0.8: {} + yallist@4.0.0: {} + yargs-parser@21.1.1: {} + + yargs@17.7.2: + dependencies: + cliui: 8.0.1 + escalade: 3.2.0 + get-caller-file: 2.0.5 + require-directory: 2.1.1 + string-width: 4.2.3 + y18n: 5.0.8 + yargs-parser: 21.1.1 + yocto-queue@0.1.0: {} yocto-queue@1.0.0: {} From 318b628416b3a10466c0358e261f9d641b2aa776 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Mon, 1 Jun 2026 16:37:07 +0530 Subject: [PATCH 2/8] fix(google-cloud): address Devin review feedback - Remove queue.close() from ChunkedStream finally (base class handles retry) - Remove tokenizer.close() from SynthesizeStream finally (breaks retry path) - Skip toLiveKitTtsError wrapping for existing APIConnectionError/APIStatusError - Fix voiceName type from TTSLanguage to string (semantically misleading) - Log warning when gender overrides explicit voiceName - Restore updateOptions method (dropped during squash) --- .changeset/google-cloud-tts-plugin.md | 4 +-- plugins/google-cloud/src/tts.ts | 42 ++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.changeset/google-cloud-tts-plugin.md b/.changeset/google-cloud-tts-plugin.md index ee3be6fb8..0bc831c32 100644 --- a/.changeset/google-cloud-tts-plugin.md +++ b/.changeset/google-cloud-tts-plugin.md @@ -6,5 +6,5 @@ feat: add Google Cloud Text-to-Speech plugin Ports the Google Cloud TTS integration from the Python SDK, supporting both streaming (via gRPC bidirectional streaming) and non-streaming synthesis. -Uses @google-cloud/text-to-speech client library with credentials from -GOOGLE_TTS_CREDENTIALS_JSON env var or Application Default Credentials. +Uses @google-cloud/text-to-speech client with standard Google Cloud auth +(credentials object, keyFilename, GOOGLE_APPLICATION_CREDENTIALS, or ADC). diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index 12cdb132d..889b0cf1c 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -8,6 +8,7 @@ import { APIConnectionError, APIStatusError, AudioByteStream, + log, shortuuid, tokenize, tts, @@ -34,10 +35,10 @@ export interface TTSOptions { /** Model name (e.g. `journey`, `chirp-3-hd`). */ modelName?: TTSModel | string; /** Voice name (e.g. `en-US-Standard-H`). */ - voiceName?: TTSLanguage | string; + voiceName?: string; /** Language code (BCP-47, e.g. `en-US`). */ language?: TTSLanguage | string; - /** Voice gender. */ + /** Voice gender. Overrides `voiceName` when both are provided. */ gender?: TTSGender; /** Output sample rate in Hz. Default: 24000. */ sampleRate?: number; @@ -61,7 +62,7 @@ export interface TTSOptions { interface ResolvedTTSOptions { modelName: TTSModel | string; - voiceName: TTSLanguage | string; + voiceName: string; language: TTSLanguage | string; sampleRate: number; streaming: boolean; @@ -92,6 +93,11 @@ export class TTS extends tts.TTS { const gender = opts.gender; if (gender) { + if (opts.voiceName) { + log().warn( + `Google Cloud TTS: gender '${gender}' overrides explicit voiceName '${opts.voiceName}'`, + ); + } this.#opts.voiceName = buildVoiceName(this.#opts.language, gender); } @@ -131,6 +137,23 @@ export class TTS extends tts.TTS { return new SynthesizeStream(this, options?.connOptions); } + /** + * Update mutable TTS options without recreating the client. + */ + updateOptions(opts: { + modelName?: TTSModel | string; + voiceName?: string; + language?: TTSLanguage | string; + gender?: TTSGender; + }): void { + if (opts.modelName !== undefined) this.#opts.modelName = opts.modelName; + if (opts.voiceName !== undefined) this.#opts.voiceName = opts.voiceName; + if (opts.language !== undefined) this.#opts.language = opts.language; + if (opts.gender !== undefined) { + this.#opts.voiceName = buildVoiceName(this.#opts.language, opts.gender); + } + } + get opts() { return this.#opts; } @@ -199,10 +222,13 @@ export class SynthesizeStream extends tts.SynthesizeStream { return; } + if (error instanceof APIConnectionError || error instanceof APIStatusError) { + throw error; + } + throw toLiveKitTtsError(error); } finally { this.abortSignal.removeEventListener('abort', abort); - this.#tokenizer.close(); call.destroy(); } } @@ -315,7 +341,7 @@ export class ChunkedStream extends tts.ChunkedStream { this.#tts = ttsProvider; } - protected async run() { + protected async run(): Promise { const requestId = shortuuid(); const request: SynthesizeSpeechRequest = { input: { @@ -396,9 +422,11 @@ export class ChunkedStream extends tts.ChunkedStream { return; } + if (error instanceof APIConnectionError || error instanceof APIStatusError) { + throw error; + } + throw toLiveKitTtsError(error); - } finally { - this.queue.close(); } } } From c5094017018444c2bd36bd828e56b5920c948f9f Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 11:12:59 +0530 Subject: [PATCH 3/8] fix(google-cloud): support aborting unary TTS Use the Google gax cancellable unary call for synthesizeSpeech so aborting a ChunkedStream cancels the in-flight RPC. Pass the connection timeout through CallOptions and add updateOptions warnings for gender-derived Standard voices overriding voice selection. --- plugins/google-cloud/src/tts.ts | 86 +++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 9 deletions(-) diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index c548dc814..b4117aa27 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -9,6 +9,7 @@ import { APIError, APIStatusError, AudioByteStream, + DEFAULT_API_CONNECT_OPTIONS, log, shortuuid, tokenize, @@ -25,8 +26,25 @@ type GaxClientOptions = NonNullable; +type SynthesizeSpeechResult = [ + SynthesizeSpeechResponse, + SynthesizeSpeechRequest | undefined, + object | undefined, +]; +type CancellablePromise = Promise & { cancel(): void }; +type SynthesizeSpeechCallOptions = { + timeout?: number; + otherArgs?: { + headers?: Record; + }; +}; +type CancellableSynthesizeSpeechCall = ( + request: SynthesizeSpeechRequest, + options?: SynthesizeSpeechCallOptions, +) => CancellablePromise; // --------------------------------------------------------------------------- // Options @@ -40,7 +58,10 @@ export interface TTSOptions { voiceName?: string; /** Language code (BCP-47, e.g. `en-US`). */ language?: TTSLanguage | string; - /** Voice gender. Overrides `voiceName` when both are provided. */ + /** + * Voice gender. Builds a Standard-tier voice name and overrides `voiceName` + * when both are provided. + */ gender?: TTSGender; /** Output sample rate in Hz. Default: 24000. */ sampleRate?: number; @@ -100,6 +121,11 @@ export class TTS extends tts.TTS { `Google Cloud TTS: gender '${gender}' overrides explicit voiceName '${opts.voiceName}'`, ); } + if (opts.modelName) { + log().warn( + `Google Cloud TTS: gender '${gender}' builds a Standard voice name that may not match modelName '${opts.modelName}'`, + ); + } this.#opts.voiceName = buildVoiceName(this.#opts.language, gender); } @@ -152,6 +178,16 @@ export class TTS extends tts.TTS { if (opts.voiceName !== undefined) this.#opts.voiceName = opts.voiceName; if (opts.language !== undefined) this.#opts.language = opts.language; if (opts.gender !== undefined) { + if (opts.voiceName !== undefined) { + log().warn( + `Google Cloud TTS: gender '${opts.gender}' overrides explicit voiceName '${opts.voiceName}'`, + ); + } + if (this.#opts.modelName) { + log().warn( + `Google Cloud TTS: gender '${opts.gender}' builds a Standard voice name that may not match modelName '${this.#opts.modelName}'`, + ); + } this.#opts.voiceName = buildVoiceName(this.#opts.language, opts.gender); } } @@ -346,15 +382,17 @@ export class SynthesizeStream extends tts.SynthesizeStream { export class ChunkedStream extends tts.ChunkedStream { readonly label = 'google-cloud.ChunkedStream'; #tts: TTS; + #connOptions: APIConnectOptions; constructor( inputText: string, ttsProvider: TTS, - connOptions?: APIConnectOptions, + connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, abortSignal?: AbortSignal, ) { super(inputText, ttsProvider, connOptions, abortSignal); this.#tts = ttsProvider; + this.#connOptions = connOptions; } protected async run(): Promise { @@ -371,15 +409,21 @@ export class ChunkedStream extends tts.ChunkedStream { }; try { - const [response] = await this.#tts.client.synthesizeSpeech(request, { - otherArgs: { - headers: { - 'x-goog-request-params': `voice.language_code=${encodeURIComponent( - this.#tts.opts.language, - )}`, + const [response] = await synthesizeSpeechWithAbort( + this.#tts.client, + request, + { + timeout: this.#connOptions.timeoutMs, + otherArgs: { + headers: { + 'x-goog-request-params': `voice.language_code=${encodeURIComponent( + this.#tts.opts.language, + )}`, + }, }, }, - }); + this.abortSignal, + ); if (this.abortSignal.aborted) { return; @@ -477,6 +521,30 @@ async function writeStreamingRequest( await once(call, 'drain'); } +async function synthesizeSpeechWithAbort( + client: TextToSpeechClient, + request: SynthesizeSpeechRequest, + options: SynthesizeSpeechCallOptions, + abortSignal: AbortSignal, +): Promise { + const synthesizeSpeech = client.innerApiCalls.synthesizeSpeech as CancellableSynthesizeSpeechCall; + const call = synthesizeSpeech(request, options); + const abort = () => { + call.cancel(); + }; + + abortSignal.addEventListener('abort', abort, { once: true }); + if (abortSignal.aborted) { + call.cancel(); + } + + try { + return await call; + } finally { + abortSignal.removeEventListener('abort', abort); + } +} + function extractArrayBuffer(buf: Buffer): ArrayBuffer { return new Uint8Array(buf).buffer as ArrayBuffer; } From 5e9e7a367f4caca5c71a944f0b616db8e0ba66ff Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 11:28:58 +0530 Subject: [PATCH 4/8] fix(google-cloud): unblock streaming shutdown Reject pending streaming writes when the gRPC stream closes before drain, and destroy failed streaming calls with an error so concurrent tasks settle during cleanup. Treat DEADLINE_EXCEEDED as retryable for Google Cloud TTS errors. --- plugins/google-cloud/src/tts.ts | 48 ++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index b4117aa27..8890aab92 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -16,7 +16,6 @@ import { tts, } from '@livekit/agents'; import type { AudioFrame } from '@livekit/rtc-node'; -import { once } from 'node:events'; import type { TTSGender, TTSLanguage, TTSModel } from './models.js'; const NUM_CHANNELS = 1; @@ -253,13 +252,15 @@ export class SynthesizeStream extends tts.SynthesizeStream { await Promise.all(tasks); } catch (error: unknown) { - call.destroy(); tokenizer?.close(); if (tasks) { + destroyStreamingCall(call, error); if (!this.input.closed) { this.input.close(); } await Promise.allSettled(tasks); + } else { + call.destroy(); } if (this.abortSignal.aborted) { @@ -518,7 +519,47 @@ async function writeStreamingRequest( return; } - await once(call, 'drain'); + await new Promise((resolve, reject) => { + const cleanup = () => { + call.off('drain', onDrain); + call.off('error', onError); + call.off('close', onClose); + }; + const onDrain = () => { + cleanup(); + resolve(); + }; + const onError = (error: Error) => { + cleanup(); + reject(error); + }; + const onClose = () => { + cleanup(); + reject( + new APIConnectionError({ + message: 'Google Cloud TTS stream closed while waiting for drain', + options: { retryable: true }, + }), + ); + }; + + call.once('drain', onDrain); + call.once('error', onError); + call.once('close', onClose); + }); +} + +function destroyStreamingCall(call: GoogleStreamingCall, error: unknown): void { + const streamError = + error instanceof Error ? error : new Error('Google Cloud TTS streaming request failed'); + const ignoreDestroyError = () => {}; + + call.on('error', ignoreDestroyError); + try { + call.destroy(streamError); + } finally { + call.off('error', ignoreDestroyError); + } } async function synthesizeSpeechWithAbort( @@ -595,6 +636,7 @@ function toLiveKitTtsError(error: unknown): Error { if (typeof maybeGoogleError.code === 'number') { const retryable = + maybeGoogleError.code === 4 || maybeGoogleError.code === 8 || maybeGoogleError.code === 10 || maybeGoogleError.code === 13 || From bf872c867183d4303b9c4bda61b7cbaad991504a Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 11:46:38 +0530 Subject: [PATCH 5/8] fix(google-cloud): ignore unary TTS cancellation Treat gax CANCELLED rejections as normal ChunkedStream aborts when the abort signal is set. Also document that Google Cloud TTS numeric provider errors are gRPC status codes with explicit retryability. --- plugins/google-cloud/src/tts.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index 8890aab92..619c1964c 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -475,7 +475,7 @@ export class ChunkedStream extends tts.ChunkedStream { } sendLastFrame(true); } catch (error: unknown) { - if (error instanceof Error && error.name === 'AbortError') { + if (this.abortSignal.aborted || isAbortError(error)) { return; } @@ -562,6 +562,10 @@ function destroyStreamingCall(call: GoogleStreamingCall, error: unknown): void { } } +function isAbortError(error: unknown): boolean { + return error instanceof Error && error.name === 'AbortError'; +} + async function synthesizeSpeechWithAbort( client: TextToSpeechClient, request: SynthesizeSpeechRequest, @@ -635,6 +639,9 @@ function toLiveKitTtsError(error: unknown): Error { }; if (typeof maybeGoogleError.code === 'number') { + // Google returns gRPC status codes here (0-16), not HTTP status codes. + // Retryability is set explicitly so APIStatusError's HTTP 4xx heuristic + // does not classify these provider errors for us. const retryable = maybeGoogleError.code === 4 || maybeGoogleError.code === 8 || From 83dcc93c5027ff320c376d1e11899984b7932e25 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 12:03:55 +0530 Subject: [PATCH 6/8] fix(google-cloud): suppress async stream destroy errors Keep a no-op error listener attached when destroying a failed Google Cloud streaming call with an error. Node streams may emit destroy errors asynchronously, so removing the listener immediately can still produce an unhandled error. --- plugins/google-cloud/src/tts.ts | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index 619c1964c..376a77f3e 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -552,14 +552,9 @@ async function writeStreamingRequest( function destroyStreamingCall(call: GoogleStreamingCall, error: unknown): void { const streamError = error instanceof Error ? error : new Error('Google Cloud TTS streaming request failed'); - const ignoreDestroyError = () => {}; - call.on('error', ignoreDestroyError); - try { - call.destroy(streamError); - } finally { - call.off('error', ignoreDestroyError); - } + call.on('error', () => {}); + call.destroy(streamError); } function isAbortError(error: unknown): boolean { From 5b8041f7bcf7d465cba66f83e8e7d148d8524889 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 12:24:45 +0530 Subject: [PATCH 7/8] fix(google-cloud): preserve streaming input on retry Abort per-attempt tokenization instead of closing the shared SynthesizeStream input queue. This lets cleanup settle without poisoning the base retry path. Add abortable AsyncIterableQueue.next support for the plugin cleanup path. --- .changeset/google-cloud-tts-plugin.md | 1 + agents/src/utils.ts | 4 ++-- plugins/google-cloud/src/tts.ts | 32 +++++++++++++++++++++------ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/.changeset/google-cloud-tts-plugin.md b/.changeset/google-cloud-tts-plugin.md index 0bc831c32..d5c7fb540 100644 --- a/.changeset/google-cloud-tts-plugin.md +++ b/.changeset/google-cloud-tts-plugin.md @@ -1,5 +1,6 @@ --- "@livekit/agents-plugin-google-cloud": patch +"@livekit/agents": patch --- feat: add Google Cloud Text-to-Speech plugin diff --git a/agents/src/utils.ts b/agents/src/utils.ts index 9988da762..03e646790 100644 --- a/agents/src/utils.ts +++ b/agents/src/utils.ts @@ -334,11 +334,11 @@ export class AsyncIterableQueue implements AsyncIterableIterator { this.#queue.put(AsyncIterableQueue.CLOSE_SENTINEL); } - async next(): Promise> { + async next(options: { signal?: AbortSignal } = {}): Promise> { if (this.#closed && this.#queue.items.length === 0) { return { value: undefined, done: true }; } - const item = await this.#queue.get(); + const item = await this.#queue.get(options); if (item === AsyncIterableQueue.CLOSE_SENTINEL && this.#closed) { return { value: undefined, done: true }; } diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index 376a77f3e..34e292ca6 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -222,6 +222,7 @@ export class SynthesizeStream extends tts.SynthesizeStream { const call = this.#tts.client.streamingSynthesize(); let tokenizer: tokenize.SentenceStream | undefined; let tasks: Promise[] | undefined; + const attemptController = new AbortController(); const abort = () => { try { call.cancel(); @@ -245,19 +246,19 @@ export class SynthesizeStream extends tts.SynthesizeStream { tokenizer = new tokenize.basic.SentenceTokenizer().stream(); tasks = [ - this.#tokenizeInput(tokenizer), + this.#tokenizeInput(tokenizer, attemptController.signal), this.#sendText(call, tokenizer), this.#receiveAudio(call, requestId), ]; await Promise.all(tasks); } catch (error: unknown) { + // Do not close this.input here: it belongs to the base stream and must + // remain usable if the base retry loop invokes run() again. + attemptController.abort(); tokenizer?.close(); if (tasks) { destroyStreamingCall(call, error); - if (!this.input.closed) { - this.input.close(); - } await Promise.allSettled(tasks); } else { call.destroy(); @@ -274,14 +275,29 @@ export class SynthesizeStream extends tts.SynthesizeStream { throw toLiveKitTtsError(error); } finally { this.abortSignal.removeEventListener('abort', abort); + attemptController.abort(); tokenizer?.close(); call.destroy(); } } - async #tokenizeInput(tokenizer: tokenize.SentenceStream): Promise { + async #tokenizeInput( + tokenizer: tokenize.SentenceStream, + attemptSignal: AbortSignal, + ): Promise { + const input = this.input as { + next(options: { + signal?: AbortSignal; + }): Promise>; + }; + try { - for await (const data of this.input) { + while (!attemptSignal.aborted) { + const { value: data, done } = await input.next({ signal: attemptSignal }); + if (done || attemptSignal.aborted) { + break; + } + if (data === SynthesizeStream.FLUSH_SENTINEL) { tokenizer.flush(); continue; @@ -290,7 +306,9 @@ export class SynthesizeStream extends tts.SynthesizeStream { tokenizer.pushText(data); } - tokenizer.endInput(); + if (!attemptSignal.aborted) { + tokenizer.endInput(); + } } catch { // Stream shutdown can close tokenizer/input concurrently. } From cca2c86fe09834d12fdb88f378aa735424c00ba8 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 2 Jun 2026 12:47:55 +0530 Subject: [PATCH 8/8] fix(google-cloud): settle streaming abort cleanup --- plugins/google-cloud/src/tts.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/plugins/google-cloud/src/tts.ts b/plugins/google-cloud/src/tts.ts index 34e292ca6..7f3fbb7c4 100644 --- a/plugins/google-cloud/src/tts.ts +++ b/plugins/google-cloud/src/tts.ts @@ -8,6 +8,7 @@ import { APIConnectionError, APIError, APIStatusError, + type AsyncIterableQueue, AudioByteStream, DEFAULT_API_CONNECT_OPTIONS, log, @@ -44,6 +45,9 @@ type CancellableSynthesizeSpeechCall = ( request: SynthesizeSpeechRequest, options?: SynthesizeSpeechCallOptions, ) => CancellablePromise; +type AbortableAsyncIterableQueue = AsyncIterableQueue & { + next(options: { signal?: AbortSignal }): Promise>; +}; // --------------------------------------------------------------------------- // Options @@ -227,7 +231,7 @@ export class SynthesizeStream extends tts.SynthesizeStream { try { call.cancel(); } catch { - call.destroy(); + destroyStreamingCall(call, new Error('Google Cloud TTS stream cancelled')); } }; @@ -285,11 +289,9 @@ export class SynthesizeStream extends tts.SynthesizeStream { tokenizer: tokenize.SentenceStream, attemptSignal: AbortSignal, ): Promise { - const input = this.input as { - next(options: { - signal?: AbortSignal; - }): Promise>; - }; + const input = this.input as AbortableAsyncIterableQueue< + string | typeof SynthesizeStream.FLUSH_SENTINEL + >; try { while (!attemptSignal.aborted) {