Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions pkg/model/autoload.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package model

import (
"slices"
"sort"
"strings"

"github.com/mudler/LocalAI/core/config"
)

// preferredGGUFBackend is tried first when auto-detecting the backend for a
// GGUF model, since GGUF is overwhelmingly llama.cpp's native format.
const preferredGGUFBackend = "llama-cpp"

// llmCapableUsecases are the BackendCapabilities usecases that signal a backend
// can serve a text/LLM GGUF model. A GGUF model that declares no explicit
// backend must only be auto-tried against backends carrying one of these
// usecases - never against audio/codec/image backends (e.g. opus) that happen
// to be installed alongside it (see issue #9287).
var llmCapableUsecases = []string{
config.UsecaseChat,
config.UsecaseCompletion,
config.UsecaseEdit,
config.UsecaseEmbeddings,
}

// SelectAutoLoadBackends returns the ordered, deterministic list of backend
// names to try when loading a model that declares no explicit backend.
//
// available is the set of installed backend names (unordered, as it comes from a
// Go map). modelFile is the model file name/path (may be empty).
//
// The trial loop in (*ModelLoader).Load picks the first backend whose gRPC
// LoadModel succeeds, so the order and membership of this list directly decide
// which backend wins. The previous implementation ranged a Go map (random
// order) with no filtering, so an unrelated installed backend such as the
// "opus" audio codec could win a GGUF/LLM model load (#9287).
//
// Behaviour:
// - The result is always deterministically ordered, so auto-detect no longer
// depends on map iteration order.
// - For a GGUF model file the list is filtered to LLM-capable backends and
// llama-cpp is placed first, so an incompatible audio/codec/image backend
// can never win the trial loop.
// - If filtering would leave no candidate, the full sorted set is returned
// instead, so a model that previously loaded never becomes unloadable.
func SelectAutoLoadBackends(available []string, modelFile string) []string {
sorted := append([]string(nil), available...)
sort.Strings(sorted)

if !isGGUFModelFile(modelFile) {
return sorted
}

filtered := make([]string, 0, len(sorted))
hasLlama := false
for _, b := range sorted {
if b == preferredGGUFBackend {
hasLlama = true
continue // added explicitly first below
}
if isLLMCapableBackend(b) {
filtered = append(filtered, b)
}
}
if hasLlama {
filtered = append([]string{preferredGGUFBackend}, filtered...)
}

if len(filtered) == 0 {
// Conservative fallback: no known LLM-capable backend is installed, so
// rather than refuse to load, fall back to the previous behaviour of
// trying every installed backend (now at least in a deterministic order).
return sorted
}
return filtered
}

func isGGUFModelFile(modelFile string) bool {
return strings.HasSuffix(strings.ToLower(modelFile), ".gguf")
}

// isLLMCapableBackend reports whether a backend is known to serve text/LLM
// models. Backends absent from the capability map (unknown) are treated as
// not LLM-capable here: for GGUF auto-detection we only want backends we can
// positively confirm handle LLMs, and the zero-candidate fallback keeps unknown
// setups working.
func isLLMCapableBackend(name string) bool {
capability := config.GetBackendCapability(name)
if capability == nil {
return false
}
for _, u := range capability.PossibleUsecases {
if slices.Contains(llmCapableUsecases, u) {
return true
}
}
return false
}
46 changes: 46 additions & 0 deletions pkg/model/autoload_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package model_test

import (
"github.com/mudler/LocalAI/pkg/model"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("SelectAutoLoadBackends (#9287)", func() {
Describe("GGUF model auto-detection", func() {
It("excludes incompatible audio/codec backends (e.g. opus) for a .gguf model", func() {
// Regression for #9287: installing an unrelated audio backend like
// "opus" must never win the GGUF auto-detect trial loop.
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp"}, "Qwen3.5-9b.gguf")
Expect(got).NotTo(ContainElement("opus"))
Expect(got).To(ContainElement("llama-cpp"))
})

It("places llama-cpp first for a .gguf model", func() {
got := model.SelectAutoLoadBackends([]string{"vllm", "opus", "llama-cpp"}, "model.gguf")
Expect(got).NotTo(BeEmpty())
Expect(got[0]).To(Equal("llama-cpp"))
})

It("is deterministic regardless of input ordering", func() {
a := model.SelectAutoLoadBackends([]string{"opus", "vllm", "llama-cpp", "whisper"}, "m.gguf")
b := model.SelectAutoLoadBackends([]string{"whisper", "llama-cpp", "vllm", "opus"}, "m.gguf")
Expect(a).To(Equal(b))
})

It("falls back to the full sorted set when filtering leaves no candidate", func() {
// No LLM-capable backend installed: never make a previously-loadable
// model unloadable, return the original set (sorted).
got := model.SelectAutoLoadBackends([]string{"opus"}, "model.gguf")
Expect(got).To(Equal([]string{"opus"}))
})
})

Describe("non-GGUF model auto-detection", func() {
It("returns a deterministic (sorted) set without filtering", func() {
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp", "diffusers"}, "model-dir")
Expect(got).To(Equal([]string{"diffusers", "llama-cpp", "opus"}))
})
})
})
13 changes: 8 additions & 5 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,14 +350,17 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
// Otherwise scan for backends in the asset directory
var err error

// get backends embedded in the binary
autoLoadBackends := []string{}

// append externalBackends supplied by the user via the CLI
// Collect the installed/external backends (the map is unordered).
available := []string{}
for b := range ml.GetAllExternalBackends(o) {
autoLoadBackends = append(autoLoadBackends, b)
available = append(available, b)
}

// Build a deterministic, file-type-filtered candidate list so an
// incompatible backend (e.g. an audio codec like opus) can never win the
// trial loop for a GGUF/LLM model. See SelectAutoLoadBackends / #9287.
autoLoadBackends := SelectAutoLoadBackends(available, o.model)

if len(autoLoadBackends) == 0 {
xlog.Error("No backends found")
return nil, fmt.Errorf("no backends found")
Expand Down
Loading