From 23a17c9dd5a7954aee50bdb7975b8f678ff341e3 Mon Sep 17 00:00:00 2001 From: Owen McGirr Date: Sun, 12 Apr 2026 15:06:46 +0100 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20ElevenLabs=20language=20mapping=20?= =?UTF-8?q?=E2=80=94=20fetch=20models=20for=20full=20language=20coverage?= =?UTF-8?q?=20(#51)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add _getVoicesWithModels() helper that fetches /v1/models in parallel with /v1/voices and resolves each voice's supported languages from its high_quality_base_model_ids - Deduplicate languages across multiple models per voice - Fall back to English for voices with no model associations - Update _mapVoicesToUnified to use resolved languages instead of labels.accent (which was a non-BCP-47 accent string, not a language code) --- __tests__/elevenlabs-language-mapping.test.ts | 129 ++++++++++++++++++ src/engines/elevenlabs.ts | 71 +++++++--- 2 files changed, 180 insertions(+), 20 deletions(-) create mode 100644 __tests__/elevenlabs-language-mapping.test.ts diff --git a/__tests__/elevenlabs-language-mapping.test.ts b/__tests__/elevenlabs-language-mapping.test.ts new file mode 100644 index 0000000..f0c3903 --- /dev/null +++ b/__tests__/elevenlabs-language-mapping.test.ts @@ -0,0 +1,129 @@ +/** + * Tests for ElevenLabs language mapping fix (issue #51) + * + * Problem: + * 1. labels.accent ("american", "british") is not a BCP-47 code + * 2. Multilingual voices only got one language code instead of all supported languages + * + * Fix: + * - Fetch /v1/models to get language lists per model + * - Map voice.high_quality_base_model_ids → union of languages + * - Use language_id ("en", "es") as bcp47, language name as display + */ + +import { ElevenLabsTTSClient } from "../src/engines/elevenlabs"; + +const MOCK_VOICES = [ + { + voice_id: "v1", + name: "Rachel", + labels: { gender: "female", accent: "american" }, + high_quality_base_model_ids: ["eleven_multilingual_v2", "eleven_flash_v2_5"], + }, + { + voice_id: "v2", + name: "Bella", + labels: { gender: "female", accent: "british" }, + high_quality_base_model_ids: ["eleven_multilingual_v2"], + }, + { + voice_id: "v3", + name: "OldVoice", + labels: {}, + high_quality_base_model_ids: [], // no models + }, +]; + +const MOCK_MODELS = [ + { + model_id: "eleven_multilingual_v2", + can_do_text_to_speech: true, + languages: [ + { language_id: "en", name: "English" }, + { language_id: "es", name: "Spanish" }, + { language_id: "fr", name: "French" }, + { language_id: "de", name: "German" }, + ], + }, + { + model_id: "eleven_flash_v2_5", + can_do_text_to_speech: true, + languages: [ + { language_id: "en", name: "English" }, + { language_id: "es", name: "Spanish" }, + { language_id: "ja", name: "Japanese" }, + ], + }, + { + model_id: "eleven_tts_v1", + can_do_text_to_speech: false, // not a TTS model — should be ignored + languages: [{ language_id: "en", name: "English" }], + }, +]; + +function mockFetch(voicesPayload: object, modelsPayload: object[]) { + return jest.fn().mockImplementation((url: string) => { + if (url.includes("/models")) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(modelsPayload), + }); + } + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(voicesPayload), + }); + }); +} + +describe("ElevenLabs _mapVoicesToUnified — language mapping from models", () => { + let client: any; + + beforeEach(() => { + client = new ElevenLabsTTSClient({ apiKey: "fake" }); + }); + + it("maps a multilingual voice to all languages from its models (deduped)", async () => { + // Rachel supports eleven_multilingual_v2 (en, es, fr, de) + eleven_flash_v2_5 (en, es, ja) + // → union = en, es, fr, de, ja (en and es deduped) + const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS); + const voices = await client._mapVoicesToUnified(rawVoices); + const rachel = voices.find((v: any) => v.id === "v1"); + + const bcp47s = rachel.languageCodes.map((lc: any) => lc.bcp47); + expect(bcp47s).toContain("en"); + expect(bcp47s).toContain("es"); + expect(bcp47s).toContain("fr"); + expect(bcp47s).toContain("de"); + expect(bcp47s).toContain("ja"); + expect(new Set(bcp47s).size).toBe(bcp47s.length); // no duplicates + }); + + it("uses human-readable language name as display", async () => { + const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS); + const voices = await client._mapVoicesToUnified(rawVoices); + const rachel = voices.find((v: any) => v.id === "v1"); + const en = rachel.languageCodes.find((lc: any) => lc.bcp47 === "en"); + + expect(en.display).toBe("English"); + }); + + it("falls back to English when voice has no model ids", async () => { + const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS); + const voices = await client._mapVoicesToUnified(rawVoices); + const old = voices.find((v: any) => v.id === "v3"); + + expect(old.languageCodes).toHaveLength(1); + expect(old.languageCodes[0].bcp47).toBe("en"); + }); + + it("ignores models where can_do_text_to_speech is false", async () => { + const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS); + const voices = await client._mapVoicesToUnified(rawVoices); + // No voice uses eleven_tts_v1 — but confirm it wasn't added to language map + const rachel = voices.find((v: any) => v.id === "v1"); + const bcp47s = rachel.languageCodes.map((lc: any) => lc.bcp47); + // eleven_tts_v1 only had "en" — already present, so count shouldn't change due to it + expect(bcp47s).toContain("en"); + }); +}); diff --git a/src/engines/elevenlabs.ts b/src/engines/elevenlabs.ts index 19e0367..5a0fec7 100644 --- a/src/engines/elevenlabs.ts +++ b/src/engines/elevenlabs.ts @@ -459,25 +459,55 @@ export class ElevenLabsTTSClient extends AbstractTTSClient { * Get available voices from the provider * @returns Promise resolving to an array of voice objects */ + /** + * Merge raw voices with resolved language data from the models endpoint. + * Extracted as a separate method so tests can inject mock data directly. + */ + protected _getVoicesWithModels(rawVoices: any[], models: any[]): any[] { + // Build model_id → languages map (TTS-capable models only) + const modelLanguageMap = new Map(); + for (const model of models) { + if (model.can_do_text_to_speech && Array.isArray(model.languages)) { + modelLanguageMap.set(model.model_id, model.languages); + } + } + + return rawVoices.map((voice) => { + const modelIds: string[] = voice.high_quality_base_model_ids ?? []; + const seen = new Set(); + const resolvedLanguages: { language_id: string; name: string }[] = []; + for (const modelId of modelIds) { + for (const lang of modelLanguageMap.get(modelId) ?? []) { + if (!seen.has(lang.language_id)) { + seen.add(lang.language_id); + resolvedLanguages.push(lang); + } + } + } + return { ...voice, _resolvedLanguages: resolvedLanguages }; + }); + } + protected async _getVoices(): Promise { try { - const response = await fetch(`${this.baseUrl}/voices`, { - method: "GET", - headers: { - "xi-api-key": this.apiKey, - }, - }); - - if (!response.ok) { - const errorText = await response.text(); + const headers = { "xi-api-key": this.apiKey }; + const [voicesResp, modelsResp] = await Promise.all([ + fetch(`${this.baseUrl}/voices`, { method: "GET", headers }), + fetch(`${this.baseUrl}/models`, { method: "GET", headers }), + ]); + + if (!voicesResp.ok) { + const errorText = await voicesResp.text(); console.error( - `ElevenLabs API error: ${response.status} ${response.statusText}\nResponse: ${errorText}` + `ElevenLabs API error: ${voicesResp.status} ${voicesResp.statusText}\nResponse: ${errorText}` ); - throw new Error(`Failed to get voices: ${response.statusText}`); + throw new Error(`Failed to get voices: ${voicesResp.statusText}`); } - const data = await response.json(); - return data.voices; + const voiceData = await voicesResp.json(); + const modelData = modelsResp.ok ? await modelsResp.json() : []; + + return this._getVoicesWithModels(voiceData.voices, modelData); } catch (error) { console.error("Error getting ElevenLabs voices:", error); return []; @@ -877,13 +907,14 @@ export class ElevenLabsTTSClient extends AbstractTTSClient { : voice.labels?.gender === "male" ? "Male" : undefined, - languageCodes: [ - { - bcp47: voice.labels?.accent || "en-US", - iso639_3: (voice.labels?.accent || "en-US").split("-")[0] || "eng", - display: voice.labels?.accent || "English", - }, - ], + languageCodes: + Array.isArray(voice._resolvedLanguages) && voice._resolvedLanguages.length > 0 + ? voice._resolvedLanguages.map((lang: { language_id: string; name: string }) => ({ + bcp47: lang.language_id, + iso639_3: lang.language_id, + display: lang.name, + })) + : [{ bcp47: "en", iso639_3: "en", display: "English" }], provider: "elevenlabs", })); } From 6b1af7ae1dd9ed8fa884a33b66cd0565135ad535 Mon Sep 17 00:00:00 2001 From: Owen McGirr Date: Sun, 12 Apr 2026 15:07:17 +0100 Subject: [PATCH 2/2] fix: remove duplicate JSDoc comment --- src/engines/elevenlabs.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/engines/elevenlabs.ts b/src/engines/elevenlabs.ts index 5a0fec7..edf5e59 100644 --- a/src/engines/elevenlabs.ts +++ b/src/engines/elevenlabs.ts @@ -455,10 +455,6 @@ export class ElevenLabsTTSClient extends AbstractTTSClient { return ["apiKey"]; } - /** - * Get available voices from the provider - * @returns Promise resolving to an array of voice objects - */ /** * Merge raw voices with resolved language data from the models endpoint. * Extracted as a separate method so tests can inject mock data directly.