diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index 9c524bdc1c9d..dc4408e0a768 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -125,6 +125,10 @@ var defaultImporters = []Importer{ &KittenTTSImporter{}, &NeuTTSImporter{}, &ChatterboxImporter{}, + // VibeVoiceCppImporter must precede VibeVoiceImporter — the older + // Python-backend importer matches any repo name containing "vibevoice" + // and would otherwise swallow the C++ port's GGUF bundles. + &VibeVoiceCppImporter{}, &VibeVoiceImporter{}, &CoquiImporter{}, // Image/Video (Batch 3) diff --git a/core/gallery/importers/vibevoice-cpp.go b/core/gallery/importers/vibevoice-cpp.go new file mode 100644 index 000000000000..7f015b71f717 --- /dev/null +++ b/core/gallery/importers/vibevoice-cpp.go @@ -0,0 +1,355 @@ +package importers + +import ( + "encoding/json" + "path/filepath" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + "go.yaml.in/yaml/v2" +) + +var _ Importer = &VibeVoiceCppImporter{} + +// VibeVoiceCppImporter recognises the GGUF bundle that the vibevoice.cpp +// backend consumes — primary model file (vibevoice-realtime-*.gguf for TTS or +// vibevoice-asr-*.gguf for ASR), a sibling tokenizer.gguf (always required), +// and optional voice-*.gguf prompts for TTS voice cloning. Detection fires on +// the HF repo name containing "vibevoice.cpp"/"vibevoice-cpp", or on the +// presence of a vibevoice-*.gguf + tokenizer.gguf pair. preferences.backend +// ="vibevoice-cpp" forces the importer regardless of artefacts. +// +// Role pick: defaults to TTS (the realtime model is small and the common +// case). preferences.usecase="asr" routes to the ASR/diarization model. If a +// repo only ships one of the two roles, that role wins automatically. +// +// MUST be registered ahead of VibeVoiceImporter — the older Python-backed +// importer matches any repo with "vibevoice" in the name, which would +// otherwise swallow the C++ bundle. +type VibeVoiceCppImporter struct{} + +func (i *VibeVoiceCppImporter) Name() string { return "vibevoice-cpp" } +func (i *VibeVoiceCppImporter) Modality() string { return "tts" } +func (i *VibeVoiceCppImporter) AutoDetects() bool { return true } + +func (i *VibeVoiceCppImporter) Match(details Details) bool { + preferencesMap := unmarshalPreferences(details.Preferences) + if b, ok := preferencesMap["backend"].(string); ok && b == "vibevoice-cpp" { + return true + } + + // Repo-name signal: anything carrying "vibevoice.cpp" or "vibevoice-cpp" + // — the canonical naming for the C++ port bundles. + repoSignals := []string{strings.ToLower(repoNameOnly(details))} + if _, repo, ok := HFOwnerRepoFromURI(details.URI); ok { + repoSignals = append(repoSignals, strings.ToLower(repo)) + } + for _, s := range repoSignals { + if strings.Contains(s, "vibevoice.cpp") || strings.Contains(s, "vibevoice-cpp") { + return true + } + } + + // File-listing signal: a vibevoice-*.gguf primary + tokenizer.gguf is + // only what the C++ backend ships — the Python VibeVoice fork distributes + // safetensors, never GGUF. + if details.HuggingFace != nil && + HasFile(details.HuggingFace.Files, "tokenizer.gguf") && + hasVibeVoiceGGUF(details.HuggingFace.Files) { + return true + } + + return false +} + +func (i *VibeVoiceCppImporter) Import(details Details) (gallery.ModelConfig, error) { + preferencesMap := unmarshalPreferences(details.Preferences) + + name, ok := preferencesMap["name"].(string) + if !ok { + name = filepath.Base(details.URI) + } + + description, ok := preferencesMap["description"].(string) + if !ok { + description = "Imported from " + details.URI + } + + // Quant preference — default order matches what mudler/vibevoice.cpp-models + // ships today. Same comma-separated convention as whisper / llama-cpp. + quants := []string{"q8_0", "q4_k", "q5_k", "q4_0"} + if preferred, ok := preferencesMap["quantizations"].(string); ok && preferred != "" { + quants = strings.Split(preferred, ",") + } + + usecase := strings.ToLower(stringPref(preferencesMap, "usecase")) + + cfg := gallery.ModelConfig{ + Name: name, + Description: description, + } + + modelConfig := config.ModelConfig{ + Name: name, + Description: description, + Backend: "vibevoice-cpp", + } + + // Without HF metadata we can only emit a skeleton config — the user must + // edit it post-import to point at real files. Mirrors whisper's bare-URI + // fallback so preference-only invocations still produce something usable. + if details.HuggingFace == nil { + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{Model: filepath.Base(details.URI)}, + } + if usecase == "asr" { + modelConfig.KnownUsecaseStrings = []string{"transcript"} + modelConfig.Options = []string{"type=asr", "tokenizer=tokenizer.gguf"} + } else { + modelConfig.KnownUsecaseStrings = []string{"tts"} + modelConfig.Options = []string{"tokenizer=tokenizer.gguf"} + } + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + cfg.ConfigFile = string(data) + return cfg, nil + } + + files := details.HuggingFace.Files + ttsFiles := filterByPrefix(files, "vibevoice-realtime-") + asrFiles := filterByPrefix(files, "vibevoice-asr-") + + // Auto-pick role when the repo only ships one. Explicit usecase wins. + role := usecase + if role == "" { + switch { + case len(ttsFiles) > 0 && len(asrFiles) == 0: + role = "tts" + case len(asrFiles) > 0 && len(ttsFiles) == 0: + role = "asr" + default: + role = "tts" // default: realtime TTS is the smaller, more common case + } + } + + // Layout under /vibevoice-cpp// — same pattern as whisper's + // nesting so multiple imports of the same upstream repo (with different + // quants) don't collide on disk. Options[] paths are emitted relative to + // opts.ModelPath, which the backend resolves against the LocalAI models + // root in govibevoicecpp.go:resolvePath. + relDir := filepath.Join("vibevoice-cpp", name) + + var primary []hfapi.ModelFile + switch role { + case "asr", "transcript", "stt", "speech-to-text": + primary = asrFiles + modelConfig.KnownUsecaseStrings = []string{"transcript"} + default: + primary = ttsFiles + modelConfig.KnownUsecaseStrings = []string{"tts"} + } + // If the requested role has no matching files, fall back to any + // vibevoice-*.gguf so the import still produces something runnable. + if len(primary) == 0 { + primary = filterByPrefix(files, "vibevoice-") + } + + chosen, ok := pickPreferredGGUFFile(primary, quants) + if !ok { + // Nothing to download. Emit the skeleton — same shape as the + // no-HF-metadata branch above, just with a sensible default name. + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{Model: name + ".gguf"}, + } + if role == "asr" { + modelConfig.Options = []string{"type=asr", "tokenizer=" + filepath.Join(relDir, "tokenizer.gguf")} + } else { + modelConfig.Options = []string{"tokenizer=" + filepath.Join(relDir, "tokenizer.gguf")} + } + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + cfg.ConfigFile = string(data) + return cfg, nil + } + + modelTarget := filepath.Join(relDir, filepath.Base(chosen.Path)) + cfg.Files = append(cfg.Files, gallery.File{ + URI: chosen.URL, + Filename: modelTarget, + SHA256: chosen.SHA256, + }) + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{Model: modelTarget}, + } + + // tokenizer.gguf is mandatory — Load() rejects without it. Always pull + // it when the repo provides one (every official vibevoice.cpp bundle does). + options := []string{} + if role == "asr" { + options = append(options, "type=asr") + } + if tok, ok := findFile(files, "tokenizer.gguf"); ok { + tokTarget := filepath.Join(relDir, "tokenizer.gguf") + cfg.Files = append(cfg.Files, gallery.File{ + URI: tok.URL, + Filename: tokTarget, + SHA256: tok.SHA256, + }) + options = append(options, "tokenizer="+tokTarget) + } + + // For TTS, ship the first voice-*.gguf as a default — the backend needs + // a reference voice to clone from. ASR doesn't use voice prompts. + if role != "asr" { + if voice, ok := pickVoicePrompt(files, stringPref(preferencesMap, "voice")); ok { + voiceTarget := filepath.Join(relDir, filepath.Base(voice.Path)) + cfg.Files = append(cfg.Files, gallery.File{ + URI: voice.URL, + Filename: voiceTarget, + SHA256: voice.SHA256, + }) + options = append(options, "voice="+voiceTarget) + } + } + modelConfig.Options = options + + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + cfg.ConfigFile = string(data) + return cfg, nil +} + +// hasVibeVoiceGGUF returns true when any file matches "vibevoice-*.gguf" +// (case-insensitive). Narrow on purpose — third-party GGUF mirrors that +// re-pack the model under different filenames will be missed, but those +// users can pass preferences.backend="vibevoice-cpp" to force the importer. +func hasVibeVoiceGGUF(files []hfapi.ModelFile) bool { + for _, f := range files { + name := strings.ToLower(filepath.Base(f.Path)) + if strings.HasPrefix(name, "vibevoice-") && strings.HasSuffix(name, ".gguf") { + return true + } + } + return false +} + +// filterByPrefix returns every file whose basename starts with prefix and +// ends in .gguf (case-insensitive on the suffix, exact on the prefix). +func filterByPrefix(files []hfapi.ModelFile, prefix string) []hfapi.ModelFile { + var out []hfapi.ModelFile + for _, f := range files { + base := filepath.Base(f.Path) + if !strings.HasPrefix(base, prefix) { + continue + } + if !strings.HasSuffix(strings.ToLower(base), ".gguf") { + continue + } + out = append(out, f) + } + return out +} + +// findFile is HasFile's lookup-returning sibling. Returns the first file +// whose basename equals name (exact match), or false when none exists. +func findFile(files []hfapi.ModelFile, name string) (hfapi.ModelFile, bool) { + for _, f := range files { + if filepath.Base(f.Path) == name { + return f, true + } + } + return hfapi.ModelFile{}, false +} + +// pickPreferredGGUFFile mirrors pickPreferredGGMLFile but operates on .gguf +// files: walks prefs in order, returns the first file whose basename contains +// any preference token (case-insensitive). On no match, falls back to the +// last file so a missing quant still yields a runnable import. +func pickPreferredGGUFFile(files []hfapi.ModelFile, prefs []string) (hfapi.ModelFile, bool) { + if len(files) == 0 { + return hfapi.ModelFile{}, false + } + for _, pref := range prefs { + lower := strings.ToLower(strings.TrimSpace(pref)) + if lower == "" { + continue + } + for _, f := range files { + if strings.Contains(strings.ToLower(filepath.Base(f.Path)), lower) { + return f, true + } + } + } + return files[len(files)-1], true +} + +// pickVoicePrompt selects a voice-*.gguf to bundle with a TTS import. +// Honours an explicit preferences.voice substring (e.g. "Emma" picks +// voice-en-Emma.gguf); otherwise returns the first voice file in listing +// order so the choice is stable across imports of the same repo. +func pickVoicePrompt(files []hfapi.ModelFile, hint string) (hfapi.ModelFile, bool) { + hint = strings.ToLower(strings.TrimSpace(hint)) + var voices []hfapi.ModelFile + for _, f := range files { + base := strings.ToLower(filepath.Base(f.Path)) + if strings.HasPrefix(base, "voice-") && strings.HasSuffix(base, ".gguf") { + voices = append(voices, f) + } + } + if len(voices) == 0 { + return hfapi.ModelFile{}, false + } + if hint != "" { + for _, v := range voices { + if strings.Contains(strings.ToLower(filepath.Base(v.Path)), hint) { + return v, true + } + } + } + return voices[0], true +} + +// repoNameOnly extracts the repo basename (everything after the last "/") +// from HF metadata or, failing that, the URI. Empty when neither is set. +func repoNameOnly(details Details) string { + if details.HuggingFace != nil { + id := details.HuggingFace.ModelID + if idx := strings.Index(id, "/"); idx >= 0 { + return id[idx+1:] + } + return id + } + return "" +} + +// unmarshalPreferences decodes details.Preferences into a generic map. Returns +// an empty map (never nil) on any failure so callers can index without nil +// checks. Bad JSON is silently ignored — every importer here treats +// preferences as best-effort hints. +func unmarshalPreferences(raw json.RawMessage) map[string]any { + out := map[string]any{} + b, err := raw.MarshalJSON() + if err != nil || len(b) == 0 { + return out + } + _ = json.Unmarshal(b, &out) + return out +} + +// stringPref reads a string preference by key, returning "" when missing or +// of the wrong type. +func stringPref(m map[string]any, key string) string { + if v, ok := m[key].(string); ok { + return v + } + return "" +} diff --git a/core/gallery/importers/vibevoice-cpp_test.go b/core/gallery/importers/vibevoice-cpp_test.go new file mode 100644 index 000000000000..9723f058e2e3 --- /dev/null +++ b/core/gallery/importers/vibevoice-cpp_test.go @@ -0,0 +1,261 @@ +package importers_test + +import ( + "encoding/json" + "fmt" + + "github.com/mudler/LocalAI/core/gallery/importers" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("VibeVoiceCppImporter", func() { + Context("Importer interface metadata", func() { + It("exposes name/modality/autodetect", func() { + imp := &importers.VibeVoiceCppImporter{} + Expect(imp.Name()).To(Equal("vibevoice-cpp")) + Expect(imp.Modality()).To(Equal("tts")) + Expect(imp.AutoDetects()).To(BeTrue()) + }) + }) + + Context("preference override", func() { + It("honours preferences.backend=vibevoice-cpp for arbitrary URIs", func() { + uri := "https://example.com/some-unrelated-model" + preferences := json.RawMessage(`{"backend": "vibevoice-cpp"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: vibevoice-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("tokenizer=tokenizer.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("tts")) + }) + + It("emits an ASR skeleton when usecase=asr is requested with no HF metadata", func() { + uri := "https://example.com/some-unrelated-model" + preferences := json.RawMessage(`{"backend": "vibevoice-cpp", "usecase": "asr"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: vibevoice-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("type=asr")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript")) + }) + }) + + // Live HF call against the canonical bundle. Marked broad: it shouldn't + // be brittle to upstream adding more quants/voices — we only assert that + // the realtime TTS path was picked and the tokenizer was bundled. + Context("detection from HuggingFace: mudler/vibevoice.cpp-models", func() { + const uri = "https://huggingface.co/mudler/vibevoice.cpp-models" + + It("routes to vibevoice-cpp, picks the realtime TTS GGUF and bundles tokenizer + voice prompt", func() { + modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`)) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: vibevoice-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("tts")) + + // Primary model must be the realtime variant (TTS default). + Expect(modelConfig.ConfigFile).To(ContainSubstring("vibevoice-realtime-")) + + // Tokenizer is mandatory and must show up both as a downloaded + // file and as a tokenizer= option entry. The path is rooted + // under vibevoice-cpp// so multiple imports don't collide. + var sawTokenizerFile, sawModelFile, sawVoiceFile bool + for _, f := range modelConfig.Files { + if f.Filename == "" { + continue + } + if filepathBase(f.Filename) == "tokenizer.gguf" { + sawTokenizerFile = true + } + if startsWith(filepathBase(f.Filename), "vibevoice-realtime-") { + sawModelFile = true + } + if startsWith(filepathBase(f.Filename), "voice-") { + sawVoiceFile = true + } + } + Expect(sawTokenizerFile).To(BeTrue(), fmt.Sprintf("expected tokenizer.gguf in Files, got: %+v", modelConfig.Files)) + Expect(sawModelFile).To(BeTrue(), fmt.Sprintf("expected a vibevoice-realtime-*.gguf in Files, got: %+v", modelConfig.Files)) + Expect(sawVoiceFile).To(BeTrue(), fmt.Sprintf("expected a voice-*.gguf in Files, got: %+v", modelConfig.Files)) + + Expect(modelConfig.ConfigFile).To(ContainSubstring("tokenizer=vibevoice-cpp/")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("voice=vibevoice-cpp/")) + }) + + It("routes to ASR + diarization when preferences.usecase=asr", func() { + modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{"usecase":"asr"}`)) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: vibevoice-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("type=asr")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("vibevoice-asr-")) + // ASR must NOT bundle a voice prompt — the backend ignores it + // for transcription and we don't want gratuitous downloads. + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("voice=")) + }) + }) + + // Offline fixtures — assert the end-to-end shape of what the importer + // emits without depending on HF availability or upstream file lists. + Context("Import from HuggingFace file listing (offline)", func() { + const repoBase = "https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/" + + hfFile := func(path, sha string) hfapi.ModelFile { + return hfapi.ModelFile{ + Path: path, + SHA256: sha, + URL: repoBase + path, + } + } + + withHF := func(preferences string, files ...hfapi.ModelFile) importers.Details { + d := importers.Details{ + URI: "https://huggingface.co/mudler/vibevoice.cpp-models", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "mudler/vibevoice.cpp-models", + Files: files, + }, + } + if preferences != "" { + d.Preferences = json.RawMessage(preferences) + } + return d + } + + It("defaults to TTS realtime + tokenizer + first voice, nested under vibevoice-cpp//", func() { + imp := &importers.VibeVoiceCppImporter{} + details := withHF(`{"name":"vibe"}`, + hfFile("vibevoice-realtime-0.5B-q8_0.gguf", "aaa"), + hfFile("vibevoice-asr-q4_k.gguf", "bbb"), + hfFile("tokenizer.gguf", "ccc"), + hfFile("voice-en-Carter_man.gguf", "ddd"), + hfFile("voice-en-Emma.gguf", "eee"), + hfFile("README.md", ""), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + + Expect(modelConfig.Files).To(HaveLen(3)) + byName := map[string]string{} + for _, f := range modelConfig.Files { + byName[filepathBase(f.Filename)] = f.Filename + } + Expect(byName).To(HaveKey("vibevoice-realtime-0.5B-q8_0.gguf")) + Expect(byName).To(HaveKey("tokenizer.gguf")) + Expect(byName).To(HaveKey("voice-en-Carter_man.gguf")) + Expect(byName["tokenizer.gguf"]).To(Equal("vibevoice-cpp/vibe/tokenizer.gguf")) + + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: vibevoice-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: vibevoice-cpp/vibe/vibevoice-realtime-0.5B-q8_0.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("- tokenizer=vibevoice-cpp/vibe/tokenizer.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("- voice=vibevoice-cpp/vibe/voice-en-Carter_man.gguf")) + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("type=asr")) + }) + + It("routes to ASR when preferences.usecase=asr and skips voice prompts", func() { + imp := &importers.VibeVoiceCppImporter{} + details := withHF(`{"name":"vibe-asr","usecase":"asr"}`, + hfFile("vibevoice-realtime-0.5B-q8_0.gguf", "aaa"), + hfFile("vibevoice-asr-q4_k.gguf", "bbb"), + hfFile("vibevoice-asr-q8_0.gguf", "fff"), + hfFile("tokenizer.gguf", "ccc"), + hfFile("voice-en-Emma.gguf", "ddd"), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + + Expect(modelConfig.Files).To(HaveLen(2)) + byName := map[string]string{} + for _, f := range modelConfig.Files { + byName[filepathBase(f.Filename)] = f.Filename + } + // Default quant order picks q8_0 over q4_k. + Expect(byName).To(HaveKey("vibevoice-asr-q8_0.gguf")) + Expect(byName).To(HaveKey("tokenizer.gguf")) + + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: vibevoice-cpp/vibe-asr/vibevoice-asr-q8_0.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("- type=asr")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("- tokenizer=vibevoice-cpp/vibe-asr/tokenizer.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript")) + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("voice=")) + }) + + It("honours preferences.quantizations to pick a specific quant", func() { + imp := &importers.VibeVoiceCppImporter{} + details := withHF(`{"name":"vibe","quantizations":"q4_k"}`, + hfFile("vibevoice-asr-q4_k.gguf", "aaa"), + hfFile("vibevoice-asr-q8_0.gguf", "bbb"), + hfFile("tokenizer.gguf", "ccc"), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + + // Repo only ships ASR — auto-routes to asr, picks the requested + // quant, emits type=asr automatically. + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: vibevoice-cpp/vibe/vibevoice-asr-q4_k.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("- type=asr")) + }) + + It("honours preferences.voice to pick a specific voice prompt", func() { + imp := &importers.VibeVoiceCppImporter{} + details := withHF(`{"name":"vibe","voice":"Emma"}`, + hfFile("vibevoice-realtime-0.5B-q8_0.gguf", "aaa"), + hfFile("tokenizer.gguf", "bbb"), + hfFile("voice-en-Carter_man.gguf", "ccc"), + hfFile("voice-en-Emma.gguf", "ddd"), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + + Expect(modelConfig.ConfigFile).To(ContainSubstring("- voice=vibevoice-cpp/vibe/voice-en-Emma.gguf")) + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("voice-en-Carter_man")) + }) + }) + + // Make sure we don't regress the existing Python-backend importer for + // repos that don't carry the C++ port's signal (e.g. microsoft/VibeVoice-1.5B). + Context("non-cpp vibevoice repos still route to the Python importer", func() { + It("does not claim microsoft/VibeVoice-1.5B (no GGUF / no .cpp suffix)", func() { + imp := &importers.VibeVoiceCppImporter{} + details := importers.Details{ + URI: "https://huggingface.co/microsoft/VibeVoice-1.5B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "microsoft/VibeVoice-1.5B", + Files: []hfapi.ModelFile{ + {Path: "config.json"}, + {Path: "model.safetensors"}, + }, + }, + Preferences: json.RawMessage(`{}`), + } + Expect(imp.Match(details)).To(BeFalse()) + }) + }) +}) + +// filepathBase / startsWith are tiny helpers so the test file stays +// stdlib-only and doesn't pull in path/filepath + strings just for the +// expected-shape assertions. +func filepathBase(p string) string { + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '/' { + return p[i+1:] + } + } + return p +} + +func startsWith(s, prefix string) bool { + return len(s) >= len(prefix) && s[:len(prefix)] == prefix +}