mattt · noorbhatia · Jan 29, 2026 · Copilot · Jan 29, 2026 · mattt
diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift
@@ -357,6 +357,51 @@ import Foundation
 
             return LanguageModelSession.ResponseStream(stream: stream)
         }
+
+        /// Prewarms the model for the given session and optional prompt prefix.
+        public func prewarm(
+            for session: LanguageModelSession,
+            promptPrefix: Prompt?
+        ) {
+            let modelId = self.modelId
+            let hub = self.hub
+            let directory = self.directory
+
+            let instructions = session.instructions?.description
+            let tools = session.tools
+
+            Task {
+
+                let context = try await loadContext(modelId: modelId, hub: hub, directory: directory)
+
+                // Build chat history similar to respond() to prime the cache effectively
+                var chat: [MLXLMCommon.Chat.Message] = []
+
+                // Add system instructions if present
+                if let instructions, !instructions.isEmpty {
+                    chat.append(.init(role: .system, content: instructions))
+                }
+
+                // Add prompt prefix or minimal user message
+                let promptText = promptPrefix?.description ?? "."
+                chat.append(.init(role: .user, content: promptText))
+
+                // Convert tools to MLX format
+                let toolSpecs: [ToolSpec]? =
+                    tools.isEmpty
+                    ? nil
+                    : tools.map { convertToolToMLXSpec($0) }
+
+                let userInput = MLXLMCommon.UserInput(
+                    chat: chat,
+                    processing: .init(resize: .init(width: 512, height: 512)),
+                    tools: toolSpecs
+                )
+
+                // Prepare input - triggers tokenization and processor initialization
+                _ = try await context.processor.prepare(input: userInput)
-                let context = try await loadContext(modelId: modelId, hub: hub, directory: directory)
-
-                // Build chat history similar to respond() to prime the cache effectively
-                var chat: [MLXLMCommon.Chat.Message] = []
-
-                // Add system instructions if present
-                if let instructions, !instructions.isEmpty {
-                    chat.append(.init(role: .system, content: instructions))
-                }
-
-                // Add prompt prefix or minimal user message
-                let promptText = promptPrefix?.description ?? "."
-                chat.append(.init(role: .user, content: promptText))
-
-                // Convert tools to MLX format
-                let toolSpecs: [ToolSpec]? =
-                    tools.isEmpty
-                    ? nil
-                    : tools.map { convertToolToMLXSpec($0) }
-
-                let userInput = MLXLMCommon.UserInput(
-                    chat: chat,
-                    processing: .init(resize: .init(width: 512, height: 512)),
-                    tools: toolSpecs
-                )
-
-                // Prepare input - triggers tokenization and processor initialization
-                _ = try await context.processor.prepare(input: userInput)
+                do {
+                    let context = try await loadContext(modelId: modelId, hub: hub, directory: directory)
+
+                    // Build chat history similar to respond() to prime the cache effectively
+                    var chat: [MLXLMCommon.Chat.Message] = []
+
+                    // Add system instructions if present
+                    if let instructions, !instructions.isEmpty {
+                        chat.append(.init(role: .system, content: instructions))
+                    }
+
+                    // Add prompt prefix or minimal user message
+                    let promptText = promptPrefix?.description ?? "."
+                    chat.append(.init(role: .user, content: promptText))
+
+                    // Convert tools to MLX format
+                    let toolSpecs: [ToolSpec]? =
+                        tools.isEmpty
+                        ? nil
+                        : tools.map { convertToolToMLXSpec($0) }
+
+                    let userInput = MLXLMCommon.UserInput(
+                        chat: chat,
+                        processing: .init(resize: .init(width: 512, height: 512)),
+                        tools: toolSpecs
+                    )
+
+                    // Prepare input - triggers tokenization and processor initialization
+                    _ = try await context.processor.prepare(input: userInput)
+                } catch {
+                    // Intentionally ignore prewarm failures (model will be loaded on demand)
+                    // You may replace this with a more sophisticated logging mechanism if desired.
+                    print("MLXLanguageModel prewarm failed for modelId \(modelId): \(error)")
+                }
-                let context = try await loadContext(modelId: modelId, hub: hub, directory: directory)
-
-                // Build chat history similar to respond() to prime the cache effectively
-                var chat: [MLXLMCommon.Chat.Message] = []
-
-                // Add system instructions if present
-                if let instructions, !instructions.isEmpty {
-                    chat.append(.init(role: .system, content: instructions))
-                }
-
-                // Add prompt prefix or minimal user message
-                let promptText = promptPrefix?.description ?? "."
-                chat.append(.init(role: .user, content: promptText))
-
-                // Convert tools to MLX format
-                let toolSpecs: [ToolSpec]? =
-                    tools.isEmpty
-                    ? nil
-                    : tools.map { convertToolToMLXSpec($0) }
-
-                let userInput = MLXLMCommon.UserInput(
-                    chat: chat,
-                    processing: .init(resize: .init(width: 512, height: 512)),
-                    tools: toolSpecs
-                )
-
-                // Prepare input - triggers tokenization and processor initialization
-                _ = try await context.processor.prepare(input: userInput)
+                do {
+                    let context = try await loadContext(modelId: modelId, hub: hub, directory: directory)
+
+                    // Build chat history similar to respond() to prime the cache effectively
+                    var chat: [MLXLMCommon.Chat.Message] = []
+
+                    // Add system instructions if present
+                    if let instructions, !instructions.isEmpty {
+                        chat.append(.init(role: .system, content: instructions))
+                    }
+
+                    // Add prompt prefix or minimal user message
+                    let promptText = promptPrefix?.description ?? "."
+                    chat.append(.init(role: .user, content: promptText))
+
+                    // Convert tools to MLX format
+                    let toolSpecs: [ToolSpec]? =
+                        tools.isEmpty
+                        ? nil
+                        : tools.map { convertToolToMLXSpec($0) }
+
+                    let userInput = MLXLMCommon.UserInput(
+                        chat: chat,
+                        processing: .init(resize: .init(width: 512, height: 512)),
+                        tools: toolSpecs
+                    )
+
+                    // Prepare input - triggers tokenization and processor initialization
+                    _ = try await context.processor.prepare(input: userInput)
+                } catch {
+                    // Intentionally ignore prewarm failures (model will be loaded on demand)
+                    // You may replace this with a more sophisticated logging mechanism if desired.
+                    print("MLXLanguageModel prewarm failed for modelId \(modelId): \(error)")
+                }
+            }
+        }
     }
 
     // MARK: - Options Mapping