Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 190 additions & 2 deletions core/gallery/backends.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// Package gallery provides installation and registration utilities for LocalAI backends,
// including meta-backend resolution based on system capabilities.
package gallery

import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"

"github.com/mudler/LocalAI/core/config"
Expand All @@ -20,6 +23,12 @@ const (
runFile = "run.sh"
)

// backendCandidate represents an installed concrete backend option for a given alias
type backendCandidate struct {
name string
runFile string
}

// readBackendMetadata reads the metadata JSON file for a backend
func readBackendMetadata(backendPath string) (*BackendMetadata, error) {
metadataPath := filepath.Join(backendPath, metadataFile)
Expand Down Expand Up @@ -58,7 +67,8 @@ func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
return nil
}

// Installs a model from the gallery
// InstallBackendFromGallery installs a backend from galleries.
// If the backend is a meta backend, it selects the best concrete backend using system capabilities.
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, downloadStatus func(string, string, string, float64), force bool) error {
if !force {
// check if we already have the backend installed
Expand Down Expand Up @@ -371,7 +381,8 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error)
}

func RegisterBackends(systemState *system.SystemState, modelLoader *model.ModelLoader) error {
backends, err := ListSystemBackends(systemState)
// Prefer optimal alias resolution when multiple concrete backends share the same alias
backends, err := ListSystemBackendsSelected(systemState)
if err != nil {
return err
}
Expand All @@ -383,3 +394,180 @@ func RegisterBackends(systemState *system.SystemState, modelLoader *model.ModelL

return nil
}

// ResolveBestBackendName returns the concrete backend name to use for a given meta or concrete backend name,
// based on the current system state and gallery metadata. If the provided name is already concrete, it is returned as-is.
func ResolveBestBackendName(galleries []config.Gallery, systemState *system.SystemState, name string) (string, error) {
backends, err := AvailableBackends(galleries, systemState)
if err != nil {
return "", err
}
be := FindGalleryElement(backends, name)
if be == nil {
return "", fmt.Errorf("no backend found with name %q", name)
}
if !be.IsMeta() {
return be.Name, nil
}
best := be.FindBestBackendFromMeta(systemState, backends)
if best == nil {
return "", fmt.Errorf("no backend found with capabilities %v", be.CapabilitiesMap)
}
return best.Name, nil
}

// ListSystemBackendsSelected lists system backends and, when multiple concrete backends share the same alias
// (e.g., cpu-llama-cpp and cuda12-llama-cpp both alias to "llama-cpp"), selects the optimal one based on the
// detected system capability (GPU vendor/platform). Concrete backend names are always included.
func ListSystemBackendsSelected(systemState *system.SystemState) (SystemBackends, error) {
Copy link
Owner

@mudler mudler Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think at this point would make sense to actually modify directly ListSystemBackends

func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error) {

Its usage in the code is quite limited https://github.com/search?q=repo%3Amudler%2FLocalAI%20ListSystemBackends&type=code

otherwise would make sense to re-use it as much as possible, to avoid code dups

// First, include system-provided backends
backends := make(SystemBackends)

systemBackends, err := os.ReadDir(systemState.Backend.BackendsSystemPath)
if err == nil {
for _, systemBackend := range systemBackends {
if systemBackend.IsDir() {
systemBackendRunFile := filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile)
if _, err := os.Stat(systemBackendRunFile); err == nil {
backends[systemBackend.Name()] = SystemBackend{
Name: systemBackend.Name(),
RunFile: systemBackendRunFile,
IsMeta: false,
IsSystem: true,
Metadata: nil,
}
}
}
}
} else {
log.Warn().Err(err).Msg("Failed to read system backends, proceeding with user-managed backends")
}

// Scan user-managed backends and group alias candidates
entries, err := os.ReadDir(systemState.Backend.BackendsPath)
if err != nil {
return nil, err
}

aliasGroups := make(map[string][]backendCandidate)
metaMap := make(map[string]*BackendMetadata)

for _, e := range entries {
if !e.IsDir() {
continue
}
dir := e.Name()
run := filepath.Join(systemState.Backend.BackendsPath, dir, runFile)

var metadata *BackendMetadata
metadataPath := filepath.Join(systemState.Backend.BackendsPath, dir, metadataFile)
if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
metadata = &BackendMetadata{Name: dir}
} else {
m, rerr := readBackendMetadata(filepath.Join(systemState.Backend.BackendsPath, dir))
if rerr != nil {
return nil, rerr
}
if m == nil {
metadata = &BackendMetadata{Name: dir}
} else {
metadata = m
}
}

metaMap[dir] = metadata

// Always include the concrete backend name
if _, err := os.Stat(run); err == nil {
backends[dir] = SystemBackend{
Name: dir,
RunFile: run,
IsMeta: false,
Metadata: metadata,
}
}

// Collect alias candidates
if metadata.Alias != "" {
aliasGroups[metadata.Alias] = append(aliasGroups[metadata.Alias], backendCandidate{name: dir, runFile: run})
}

// Meta backend indirection (meta dir -> real dir run.sh)
if metadata.MetaBackendFor != "" {
backends[metadata.Name] = SystemBackend{
Name: metadata.Name,
RunFile: filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor, runFile),
IsMeta: true,
Metadata: metadata,
}
}
}

// For each alias, choose the best candidate for this system
for alias, cands := range aliasGroups {
selected := selectBestCandidate(systemState, cands)
if selected.runFile == "" {
// Skip if the candidate has no runnable file
continue
}
// Attach metadata of the selected concrete backend, if known
md := metaMap[selected.name]
backends[alias] = SystemBackend{
Name: alias,
RunFile: selected.runFile,
IsMeta: false,
Metadata: md,
}
}

return backends, nil
}

func selectBestCandidate(systemState *system.SystemState, cands []backendCandidate) backendCandidate {
Copy link
Owner

@mudler mudler Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably this is better placed in the capabilities code, to keep the capability logic well isolated.

Could maybe be just a method of system State?

https://github.com/mudler/LocalAI/blob/21faa4114bf6c8980fc612e7db5a2a13b62e8d23/pkg/system/capabilities.go

if len(cands) == 0 {
return backendCandidate{}
}
if len(cands) == 1 {
return cands[0]
}

// Determine capability
capStr := systemState.GPUVendor
if capStr == "" {
capStr = "default"
}

for _, token := range capabilityPriority(capStr) {
for _, c := range cands {
lname := strings.ToLower(c.name)
if strings.Contains(lname, token) {
return c
}
}
}
// Fallback: first one with a runfile
for _, c := range cands {
if c.runFile != "" {
return c
}
}
return cands[0]
}

func capabilityPriority(capStr string) []string {
capStr = strings.ToLower(capStr)
switch {
case strings.HasPrefix(capStr, "nvidia"):
return []string{"cuda", "vulkan", "cpu"}
case strings.HasPrefix(capStr, "amd"):
return []string{"rocm", "hip", "vulkan", "cpu"}
case strings.HasPrefix(capStr, "intel"):
return []string{"sycl", "intel", "cpu"}
case strings.HasPrefix(capStr, "metal"):
return []string{"metal", "cpu"}
case strings.HasPrefix(capStr, "darwin-x86"):
return []string{"darwin-x86", "cpu"}
default:
return []string{"cpu"}
}
}
125 changes: 125 additions & 0 deletions core/gallery/backends_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,131 @@ const (
testImage = "quay.io/mudler/tests:localai-backend-test"
)

var _ = Describe("Runtime capability-based backend selection", func() {
var tempDir string

BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "gallery-caps-*")
Expect(err).NotTo(HaveOccurred())
})

AfterEach(func() {
os.RemoveAll(tempDir)
})

It("ResolveBestBackendName selects default CPU or NVIDIA backend from meta", func() {
// Arrange: create installed concrete backends so AvailableBackends considers them
must := func(err error) { Expect(err).NotTo(HaveOccurred()) }

// cpu-llama-cpp (alias: llama-cpp)
cpuDir := filepath.Join(tempDir, "cpu-llama-cpp")
must(os.MkdirAll(cpuDir, 0o750))
cpuMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cpu-llama-cpp"}
b, _ := json.Marshal(cpuMeta)
must(os.WriteFile(filepath.Join(cpuDir, "metadata.json"), b, 0o644))
must(os.WriteFile(filepath.Join(cpuDir, "run.sh"), []byte(""), 0o755))

// cuda12-llama-cpp (alias: llama-cpp)
cudaDir := filepath.Join(tempDir, "cuda12-llama-cpp")
must(os.MkdirAll(cudaDir, 0o750))
cudaMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cuda12-llama-cpp"}
b, _ = json.Marshal(cudaMeta)
must(os.WriteFile(filepath.Join(cudaDir, "metadata.json"), b, 0o644))
must(os.WriteFile(filepath.Join(cudaDir, "run.sh"), []byte(""), 0o755))

// Create a gallery file with a meta backend mapping
meta := &GalleryBackend{Metadata: Metadata{Name: "llama-cpp"}, CapabilitiesMap: map[string]string{
"default": "cpu-llama-cpp",
"nvidia": "cuda12-llama-cpp",
}}
cpu := &GalleryBackend{Metadata: Metadata{Name: "cpu-llama-cpp"}, URI: "quay.io/mudler/tests:localai-backend-test"}
cuda := &GalleryBackend{Metadata: Metadata{Name: "cuda12-llama-cpp"}, URI: "quay.io/mudler/tests:localai-backend-test"}
entries := GalleryBackends{cpu, cuda, meta}
dat, err := yaml.Marshal(entries)
must(err)
galPath := filepath.Join(tempDir, "backend-gallery.yaml")
must(os.WriteFile(galPath, dat, 0o644))
galleries := []config.Gallery{{Name: "test", URL: "file://" + galPath}}

// CPU/default case (no GPU)
sysDefault, err := system.GetSystemState(
system.WithBackendPath(tempDir),
)
must(err)
sysDefault.GPUVendor = "" // ensure default capability
name, err := ResolveBestBackendName(galleries, sysDefault, "llama-cpp")
must(err)
Expect(name).To(Equal("cpu-llama-cpp"))

// NVIDIA case (ensure VRAM high enough to not fallback to CPU)
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
// On macOS arm64, capability is forced to "metal"; this meta mapping lacks metal,
// so it correctly falls back to default. Skip CUDA assertion on this platform.
Skip("CUDA selection not applicable on darwin/arm64 (metal)")
}

sysNvidia, err := system.GetSystemState(
system.WithBackendPath(tempDir),
)
must(err)
sysNvidia.GPUVendor = "nvidia"
sysNvidia.VRAM = 8 * 1024 * 1024 * 1024 // 8GB
name, err = ResolveBestBackendName(galleries, sysNvidia, "llama-cpp")
must(err)
Expect(name).To(Equal("cuda12-llama-cpp"))
})

It("ListSystemBackendsSelected prefers optimal alias candidate", func() {
// Arrange two installed backends sharing the same alias
must := func(err error) { Expect(err).NotTo(HaveOccurred()) }

cpuDir := filepath.Join(tempDir, "cpu-llama-cpp")
must(os.MkdirAll(cpuDir, 0o750))
cpuMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cpu-llama-cpp"}
b, _ := json.Marshal(cpuMeta)
must(os.WriteFile(filepath.Join(cpuDir, "metadata.json"), b, 0o644))
must(os.WriteFile(filepath.Join(cpuDir, "run.sh"), []byte(""), 0o755))

cudaDir := filepath.Join(tempDir, "cuda12-llama-cpp")
must(os.MkdirAll(cudaDir, 0o750))
cudaMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cuda12-llama-cpp"}
b, _ = json.Marshal(cudaMeta)
must(os.WriteFile(filepath.Join(cudaDir, "metadata.json"), b, 0o644))
must(os.WriteFile(filepath.Join(cudaDir, "run.sh"), []byte(""), 0o755))

// Default system: alias should point to CPU
sysDefault, err := system.GetSystemState(
system.WithBackendPath(tempDir),
)
must(err)
sysDefault.GPUVendor = "" // force default selection
backs, err := ListSystemBackendsSelected(sysDefault)
must(err)
aliasBack, ok := backs.Get("llama-cpp")
Expect(ok).To(BeTrue())
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cpuDir, "run.sh")))
// concrete entries remain
_, ok = backs.Get("cpu-llama-cpp")
Expect(ok).To(BeTrue())
_, ok = backs.Get("cuda12-llama-cpp")
Expect(ok).To(BeTrue())

// NVIDIA system: alias should point to CUDA
sysNvidia, err := system.GetSystemState(
system.WithBackendPath(tempDir),
)
must(err)
sysNvidia.GPUVendor = "nvidia"
sysNvidia.VRAM = 8 * 1024 * 1024 * 1024
backs, err = ListSystemBackendsSelected(sysNvidia)
must(err)
aliasBack, ok = backs.Get("llama-cpp")
Expect(ok).To(BeTrue())
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cudaDir, "run.sh")))
})
})

var _ = Describe("Gallery Backends", func() {
var (
tempDir string
Expand Down
Loading