From ae1e57a27a58997fc3368db4275a149efb760390 Mon Sep 17 00:00:00 2001 From: Owen McGirr Date: Sun, 12 Apr 2026 09:53:21 +0100 Subject: [PATCH] feat: add getVoicesByGender() and fix gender mappings in Google/ElevenLabs (#44) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add AbstractTTSClient.getVoicesByGender() reusing existing filterByGender() utility - Fix Google engine: map ssmlGender MALE/FEMALE/NEUTRAL → Male/Female/Unknown (was lowercase) - Fix ElevenLabs bulk voice list: map labels.gender when present; leave undefined otherwise --- __tests__/abstract-tts-gender.test.ts | 96 +++++++++++++++++++++++++++ __tests__/elevenlabs-gender.test.ts | 52 +++++++++++++++ __tests__/google-gender.test.ts | 60 +++++++++++++++++ src/core/abstract-tts.ts | 11 +++ src/engines/elevenlabs.ts | 7 +- src/engines/google.ts | 3 +- 6 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 __tests__/abstract-tts-gender.test.ts create mode 100644 __tests__/elevenlabs-gender.test.ts create mode 100644 __tests__/google-gender.test.ts diff --git a/__tests__/abstract-tts-gender.test.ts b/__tests__/abstract-tts-gender.test.ts new file mode 100644 index 0000000..7d47672 --- /dev/null +++ b/__tests__/abstract-tts-gender.test.ts @@ -0,0 +1,96 @@ +/** + * Tests for AbstractTTSClient.getVoicesByGender() (issue #44) + */ + +import type { UnifiedVoice } from "../src/types"; + +// Minimal stub so we can instantiate a concrete subclass +jest.mock("../src/core/abstract-tts", () => { + const actual = jest.requireActual("../src/core/abstract-tts"); + return actual; +}); + +// Build a concrete subclass with a fixed voice list +async function makeClient(voices: UnifiedVoice[]) { + const { AbstractTTSClient } = await import("../src/core/abstract-tts"); + + class TestTTSClient extends AbstractTTSClient { + constructor() { + super({ lang: "en-US" } as any); + } + protected async _getVoices(): Promise { + return voices; + } + async synthToBytes(_text: string): Promise { + return new Uint8Array(); + } + async synthToBytestream(_text: string): Promise> { + return new ReadableStream(); + } + checkCredentials(): boolean { + return true; + } + } + + return new TestTTSClient(); +} + +const VOICES: UnifiedVoice[] = [ + { + id: "voice-female-1", + name: "Alice", + gender: "Female", + languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], + provider: "azure", + }, + { + id: "voice-female-2", + name: "Beth", + gender: "Female", + languageCodes: [{ bcp47: "en-GB", iso639_3: "eng", display: "English (UK)" }], + provider: "azure", + }, + { + id: "voice-male-1", + name: "Charles", + gender: "Male", + languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], + provider: "azure", + }, + { + id: "voice-unknown-1", + name: "Robot", + gender: "Unknown", + languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], + provider: "azure", + }, +]; + +describe("AbstractTTSClient.getVoicesByGender()", () => { + it("returns only Female voices when asked for Female", async () => { + const client = await makeClient(VOICES); + const result = await (client as any).getVoicesByGender("Female"); + expect(result).toHaveLength(2); + expect(result.every((v: UnifiedVoice) => v.gender === "Female")).toBe(true); + }); + + it("returns only Male voices when asked for Male", async () => { + const client = await makeClient(VOICES); + const result = await (client as any).getVoicesByGender("Male"); + expect(result).toHaveLength(1); + expect(result[0].id).toBe("voice-male-1"); + }); + + it("returns only Unknown voices when asked for Unknown", async () => { + const client = await makeClient(VOICES); + const result = await (client as any).getVoicesByGender("Unknown"); + expect(result).toHaveLength(1); + expect(result[0].id).toBe("voice-unknown-1"); + }); + + it("returns an empty array when no voices match the gender", async () => { + const client = await makeClient([VOICES[0]]); // only Female + const result = await (client as any).getVoicesByGender("Male"); + expect(result).toHaveLength(0); + }); +}); diff --git a/__tests__/elevenlabs-gender.test.ts b/__tests__/elevenlabs-gender.test.ts new file mode 100644 index 0000000..d7202b6 --- /dev/null +++ b/__tests__/elevenlabs-gender.test.ts @@ -0,0 +1,52 @@ +/** + * Tests for ElevenLabs engine gender mapping in _mapVoicesToUnified (issue #44) + * The bulk voice list response includes labels.gender as "female" / "male" + */ + +jest.mock("../src/core/abstract-tts", () => ({ + AbstractTTSClient: class { + voiceId = "some-voice-id"; + lang = "en-US"; + properties: Record = { rate: "medium", pitch: "medium", volume: 100 }; + timings: unknown[] = []; + on() {} + emit() {} + }, +})); + +describe("ElevenLabs _mapVoicesToUnified — gender mapping", () => { + let client: any; + + beforeEach(async () => { + const { ElevenLabsTTSClient } = await import("../src/engines/elevenlabs"); + client = new ElevenLabsTTSClient({ apiKey: "fake" }); + }); + + it("maps labels.gender=female to Female", async () => { + const voices = await client._mapVoicesToUnified([ + { voice_id: "v1", name: "Rachel", labels: { gender: "female", accent: "en-US" } }, + ]); + expect(voices[0].gender).toBe("Female"); + }); + + it("maps labels.gender=male to Male", async () => { + const voices = await client._mapVoicesToUnified([ + { voice_id: "v2", name: "Adam", labels: { gender: "male", accent: "en-US" } }, + ]); + expect(voices[0].gender).toBe("Male"); + }); + + it("leaves gender undefined when labels.gender is absent", async () => { + const voices = await client._mapVoicesToUnified([ + { voice_id: "v3", name: "Unnamed", labels: {} }, + ]); + expect(voices[0].gender).toBeUndefined(); + }); + + it("leaves gender undefined when labels is absent", async () => { + const voices = await client._mapVoicesToUnified([ + { voice_id: "v4", name: "NoLabels" }, + ]); + expect(voices[0].gender).toBeUndefined(); + }); +}); diff --git a/__tests__/google-gender.test.ts b/__tests__/google-gender.test.ts new file mode 100644 index 0000000..55a4734 --- /dev/null +++ b/__tests__/google-gender.test.ts @@ -0,0 +1,60 @@ +/** + * Tests for Google engine gender mapping (issue #44) + * Google API returns ssmlGender as "MALE", "FEMALE", "NEUTRAL", or "SSML_VOICE_GENDER_UNSPECIFIED" + * These must map to "Male", "Female", "Unknown" in UnifiedVoice + */ + +jest.mock("../src/core/abstract-tts", () => ({ + AbstractTTSClient: class { + voiceId = "en-US-Standard-A"; + lang = "en-US"; + properties: Record = { rate: "medium", pitch: "medium", volume: 100 }; + timings: unknown[] = []; + on() {} + emit() {} + }, +})); + +describe("Google _mapVoicesToUnified — gender casing", () => { + let client: any; + + beforeEach(async () => { + const { GoogleTTSClient } = await import("../src/engines/google"); + client = new GoogleTTSClient({ keyFilename: "fake.json" }); + }); + + it("maps FEMALE to Female", async () => { + const voices = await client._mapVoicesToUnified([ + { name: "en-US-A", ssmlGender: "FEMALE", languageCodes: ["en-US"] }, + ]); + expect(voices[0].gender).toBe("Female"); + }); + + it("maps MALE to Male", async () => { + const voices = await client._mapVoicesToUnified([ + { name: "en-US-B", ssmlGender: "MALE", languageCodes: ["en-US"] }, + ]); + expect(voices[0].gender).toBe("Male"); + }); + + it("maps NEUTRAL to Unknown", async () => { + const voices = await client._mapVoicesToUnified([ + { name: "en-US-C", ssmlGender: "NEUTRAL", languageCodes: ["en-US"] }, + ]); + expect(voices[0].gender).toBe("Unknown"); + }); + + it("maps SSML_VOICE_GENDER_UNSPECIFIED to Unknown", async () => { + const voices = await client._mapVoicesToUnified([ + { name: "en-US-D", ssmlGender: "SSML_VOICE_GENDER_UNSPECIFIED", languageCodes: ["en-US"] }, + ]); + expect(voices[0].gender).toBe("Unknown"); + }); + + it("maps missing ssmlGender to Unknown", async () => { + const voices = await client._mapVoicesToUnified([ + { name: "en-US-E", languageCodes: ["en-US"] }, + ]); + expect(voices[0].gender).toBe("Unknown"); + }); +}); diff --git a/src/core/abstract-tts.ts b/src/core/abstract-tts.ts index 85646b6..a927d81 100644 --- a/src/core/abstract-tts.ts +++ b/src/core/abstract-tts.ts @@ -16,6 +16,7 @@ import type { import type { AudioFormat } from "../utils/audio-converter"; import { detectAudioFormat } from "../utils/audio-input"; import { isBrowser, isNode } from "../utils/environment"; +import { filterByGender } from "./voice-utils"; import { LanguageNormalizer } from "./language-utils"; import * as SSMLUtils from "./ssml-utils"; @@ -1142,4 +1143,14 @@ export abstract class AbstractTTSClient { ) ); } + + /** + * Get available voices for a specific gender + * @param gender "Male", "Female", or "Unknown" + * @returns Promise resolving to an array of available voices for the specified gender + */ + async getVoicesByGender(gender: "Male" | "Female" | "Unknown"): Promise { + const voices = await this.getVoices(); + return filterByGender(voices, gender); + } } diff --git a/src/engines/elevenlabs.ts b/src/engines/elevenlabs.ts index 36ec6bb..19e0367 100644 --- a/src/engines/elevenlabs.ts +++ b/src/engines/elevenlabs.ts @@ -871,7 +871,12 @@ export class ElevenLabsTTSClient extends AbstractTTSClient { return rawVoices.map((voice) => ({ id: voice.voice_id, name: voice.name, - gender: undefined, // ElevenLabs doesn't provide gender + gender: + voice.labels?.gender === "female" + ? "Female" + : voice.labels?.gender === "male" + ? "Male" + : undefined, languageCodes: [ { bcp47: voice.labels?.accent || "en-US", diff --git a/src/engines/google.ts b/src/engines/google.ts index 920744a..e6088d2 100644 --- a/src/engines/google.ts +++ b/src/engines/google.ts @@ -413,7 +413,8 @@ export class GoogleTTSClient extends AbstractTTSClient { return rawVoices.map((voice: any) => ({ id: voice.name, name: voice.name || "Unknown", - gender: voice.ssmlGender?.toLowerCase() || undefined, + gender: + voice.ssmlGender === "MALE" ? "Male" : voice.ssmlGender === "FEMALE" ? "Female" : "Unknown", languageCodes: voice.languageCodes, provider: "google" as const, raw: voice, // Keep the original raw voice data