From 221b2c3391090b8a036333231d60a181a47f4617 Mon Sep 17 00:00:00 2001 From: Patrice Kouame Date: Tue, 30 Jun 2026 03:42:55 -0400 Subject: [PATCH 1/3] MLX: expose topP / topK / minP / repetitionPenalty via CustomGenerationOptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MLX backend hardcoded sampling parameters in toGenerateParameters / toStructuredGenerateParameters (topP: 1.0, repetitionPenalty: nil, topK/minP at defaults) and never read GenerationOptions.sampling, so callers could only tune temperature and maximumResponseTokens. MLXLMCommon.GenerateParameters already supports the full set. Add topP / topK / minP / repetitionPenalty / repetitionContextSize to MLXLanguageModel.CustomGenerationOptions (all optional, default nil → existing behavior unchanged) and forward them in both parameter mappers, preserving each path's prior defaults via `custom?.field ?? `. Implements huggingface/AnyLanguageModel#165. Co-Authored-By: Claude Opus 4.8 --- .../Models/MLXLanguageModel.swift | 59 ++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index 0ef37ef..cc789ae 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -287,6 +287,31 @@ import Foundation additionalContext?.mapValues { $0.toSendable() } } + /// Top-p (nucleus) sampling threshold. + /// + /// Set this to `nil` to use the backend default (`1.0`, i.e. disabled). + public var topP: Float? + + /// Top-k sampling: restricts sampling to the `k` most likely tokens. + /// + /// Set this to `nil` or `0` to disable top-k sampling. + public var topK: Int? + + /// Min-p sampling threshold, relative to the most likely token's probability. + /// + /// Set this to `nil` or `0` to disable min-p sampling. + public var minP: Float? + + /// Penalty factor applied to recently generated tokens to reduce repetition. + /// + /// Set this to `nil` to disable the repetition penalty. + public var repetitionPenalty: Float? + + /// Number of recent tokens considered by the repetition penalty. + /// + /// Set this to `nil` to use the backend default. + public var repetitionContextSize: Int? + /// Creates MLX-specific generation options. /// /// - Parameters: @@ -295,14 +320,30 @@ import Foundation /// template rendering context. /// - userInputProcessing: Processing to apply to user media before input preparation. /// Defaults to `nil`, which lets MLX use its default media handling. + /// - topP: Top-p (nucleus) sampling threshold. Defaults to `nil` (backend default). + /// - topK: Top-k sampling count. Defaults to `nil` (disabled). + /// - minP: Min-p sampling threshold. Defaults to `nil` (disabled). + /// - repetitionPenalty: Repetition penalty factor. Defaults to `nil` (disabled). + /// - repetitionContextSize: Repetition-penalty token window. Defaults to `nil` + /// (backend default). public init( kvCache: KVCache, userInputProcessing: UserInputProcessing?, - additionalContext: [String: AnyLanguageModel.JSONValue]? + additionalContext: [String: AnyLanguageModel.JSONValue]?, + topP: Float? = nil, + topK: Int? = nil, + minP: Float? = nil, + repetitionPenalty: Float? = nil, + repetitionContextSize: Int? = nil ) { self.kvCache = kvCache self.additionalContext = additionalContext self.userInputProcessing = userInputProcessing + self.topP = topP + self.topK = topK + self.minP = minP + self.repetitionPenalty = repetitionPenalty + self.repetitionContextSize = repetitionContextSize } /// Default MLX generation options used when none are provided at runtime. @@ -1223,9 +1264,11 @@ import Foundation kvGroupSize: custom?.kvCache.groupSize ?? 64, quantizedKVStart: custom?.kvCache.quantizedStart ?? 0, temperature: Float(options.temperature ?? 0.6), - topP: 1.0, - repetitionPenalty: nil, - repetitionContextSize: 20 + topP: custom?.topP ?? 1.0, + topK: custom?.topK ?? 0, + minP: custom?.minP ?? 0.0, + repetitionPenalty: custom?.repetitionPenalty, + repetitionContextSize: custom?.repetitionContextSize ?? 20 ) } @@ -1239,9 +1282,11 @@ import Foundation kvGroupSize: custom?.kvCache.groupSize ?? 64, quantizedKVStart: custom?.kvCache.quantizedStart ?? 0, temperature: Float(options.temperature ?? 0.2), - topP: 0.95, - repetitionPenalty: 1.1, - repetitionContextSize: 64 + topP: custom?.topP ?? 0.95, + topK: custom?.topK ?? 0, + minP: custom?.minP ?? 0.0, + repetitionPenalty: custom?.repetitionPenalty ?? 1.1, + repetitionContextSize: custom?.repetitionContextSize ?? 64 ) } From 9ad8ca74c260741d84cfb45d1bb1518b660f21e5 Mon Sep 17 00:00:00 2001 From: Patrice Kouame Date: Wed, 1 Jul 2026 17:56:38 -0400 Subject: [PATCH 2/3] MLX: bridge GenerationOptions.sampling into sampler params Reads the core GenerationOptions.sampling (SamplingMode) in toGenerateParameters / toStructuredGenerateParameters so top-p/top-k/greedy set via the standard sampling surface reach MLX, not only the custom block. Precedence: custom block wins, then sampling-derived, then existing default. Seed is not forwarded (no per-call seed in MLXLMCommon.GenerateParameters). Adds derivation + precedence tests. Co-Authored-By: Claude Opus 4.8 --- .../Models/MLXLanguageModel.swift | 41 +++++++++--- .../CustomGenerationOptionsTests.swift | 62 +++++++++++++++++++ 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index cc789ae..551c9ef 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -1255,17 +1255,41 @@ import Foundation // MARK: - Options Mapping - private func toGenerateParameters(_ options: GenerationOptions) -> MLXLMCommon.GenerateParameters { + /// Derives MLX sampler parameters from the core ``GenerationOptions/sampling`` (`SamplingMode`), + /// so `.sampling` acts as a unified sampling surface across backends (the same one Apple + /// FoundationModels consumes). Returns `nil` for any field the sampling mode doesn't express. + /// + /// Precedence at the call sites is custom-block → this (sampling) → existing default, so an + /// explicit `CustomGenerationOptions` value always wins. The `SamplingMode` seed is not + /// forwarded: `MLXLMCommon.GenerateParameters` has no per-call seed field. + func samplingDerivedParameters( + from options: GenerationOptions + ) -> (topP: Float?, topK: Int?, greedyTemperature: Float?) { + switch options.sampling?.mode { + case .greedy: + // Greedy = argmax; MLX realizes this with temperature 0. + return (topP: nil, topK: nil, greedyTemperature: 0) + case .topK(let k, _): + return (topP: nil, topK: k, greedyTemperature: nil) + case .nucleus(let threshold, _): + return (topP: Float(threshold), topK: nil, greedyTemperature: nil) + case nil: + return (topP: nil, topK: nil, greedyTemperature: nil) + } + } + + func toGenerateParameters(_ options: GenerationOptions) -> MLXLMCommon.GenerateParameters { let custom = options[custom: MLXLanguageModel.self] + let derived = samplingDerivedParameters(from: options) return MLXLMCommon.GenerateParameters( maxTokens: options.maximumResponseTokens, maxKVSize: custom?.kvCache.maxSize, kvBits: custom?.kvCache.bits, kvGroupSize: custom?.kvCache.groupSize ?? 64, quantizedKVStart: custom?.kvCache.quantizedStart ?? 0, - temperature: Float(options.temperature ?? 0.6), - topP: custom?.topP ?? 1.0, - topK: custom?.topK ?? 0, + temperature: Float(options.temperature ?? derived.greedyTemperature.map(Double.init) ?? 0.6), + topP: custom?.topP ?? derived.topP ?? 1.0, + topK: custom?.topK ?? derived.topK ?? 0, minP: custom?.minP ?? 0.0, repetitionPenalty: custom?.repetitionPenalty, repetitionContextSize: custom?.repetitionContextSize ?? 20 @@ -1273,17 +1297,18 @@ import Foundation } /// Builds MLX parameters tuned for structured generation. - private func toStructuredGenerateParameters(_ options: GenerationOptions) -> MLXLMCommon.GenerateParameters { + func toStructuredGenerateParameters(_ options: GenerationOptions) -> MLXLMCommon.GenerateParameters { let custom = options[custom: MLXLanguageModel.self] + let derived = samplingDerivedParameters(from: options) return MLXLMCommon.GenerateParameters( maxTokens: options.maximumResponseTokens, maxKVSize: custom?.kvCache.maxSize, kvBits: custom?.kvCache.bits, kvGroupSize: custom?.kvCache.groupSize ?? 64, quantizedKVStart: custom?.kvCache.quantizedStart ?? 0, - temperature: Float(options.temperature ?? 0.2), - topP: custom?.topP ?? 0.95, - topK: custom?.topK ?? 0, + temperature: Float(options.temperature ?? derived.greedyTemperature.map(Double.init) ?? 0.2), + topP: custom?.topP ?? derived.topP ?? 0.95, + topK: custom?.topK ?? derived.topK ?? 0, minP: custom?.minP ?? 0.0, repetitionPenalty: custom?.repetitionPenalty ?? 1.1, repetitionContextSize: custom?.repetitionContextSize ?? 64 diff --git a/Tests/AnyLanguageModelTests/CustomGenerationOptionsTests.swift b/Tests/AnyLanguageModelTests/CustomGenerationOptionsTests.swift index 3a4a812..fbfed73 100644 --- a/Tests/AnyLanguageModelTests/CustomGenerationOptionsTests.swift +++ b/Tests/AnyLanguageModelTests/CustomGenerationOptionsTests.swift @@ -901,6 +901,68 @@ struct GeminiCustomOptionsTests { #expect(retrieved?.kvCache.quantizedStart == 256) } + // MARK: - SamplingMode → MLX derivation + + @Test func samplingDerivationGreedy() { + let derived = samplingDerivedParameters(from: GenerationOptions(sampling: .greedy)) + #expect(derived.topP == nil) + #expect(derived.topK == nil) + #expect(derived.greedyTemperature == 0) + } + + @Test func samplingDerivationTopK() { + let derived = samplingDerivedParameters(from: GenerationOptions(sampling: .random(top: 40, seed: 7))) + #expect(derived.topK == 40) + #expect(derived.topP == nil) + #expect(derived.greedyTemperature == nil) + } + + @Test func samplingDerivationNucleus() { + let derived = samplingDerivedParameters(from: GenerationOptions(sampling: .random(probabilityThreshold: 0.9))) + #expect(derived.topP == 0.9) + #expect(derived.topK == nil) + #expect(derived.greedyTemperature == nil) + } + + @Test func samplingDerivationNil() { + let derived = samplingDerivedParameters(from: GenerationOptions()) + #expect(derived.topP == nil) + #expect(derived.topK == nil) + #expect(derived.greedyTemperature == nil) + } + + // MARK: - Mapping precedence (custom-wins → sampling-fills → default) + + @Test func samplingFillsWhenNoCustomBlock() { + let params = toGenerateParameters(GenerationOptions(sampling: .random(top: 12))) + #expect(params.topK == 12) // top-k now reaches MLX via sampling + #expect(params.topP == 1.0) // untouched default + } + + @Test func customBlockWinsOverSampling() { + var options = GenerationOptions(sampling: .random(probabilityThreshold: 0.9)) + options[custom: MLXLanguageModel.self] = .init( + kvCache: .default, + userInputProcessing: nil, + additionalContext: nil, + topP: 0.3, + topK: 5 + ) + let params = toGenerateParameters(options) + #expect(params.topP == 0.3) // custom wins over sampling's 0.9 + #expect(params.topK == 5) // custom wins (sampling expressed no top-k) + } + + @Test func greedyMapsToZeroTemperature() { + let params = toGenerateParameters(GenerationOptions(sampling: .greedy)) + #expect(params.temperature == 0) + } + + @Test func explicitTemperatureWinsOverGreedy() { + let params = toGenerateParameters(GenerationOptions(sampling: .greedy, temperature: 0.7)) + #expect(params.temperature == Float(0.7)) + } + @Test func codable() throws { let options = MLXLanguageModel.CustomGenerationOptions( kvCache: .init( From 6ceabbc14cfe13acdfbc207cf4bcf53f5ac283b0 Mon Sep 17 00:00:00 2001 From: Patrice Kouame Date: Wed, 1 Jul 2026 18:07:20 -0400 Subject: [PATCH 3/3] Foundation Models: OS 27 toolCallingMode support (compiler+availability gated) toFoundationModels() adopts the OS 27 GenerationOptions initializer (adds toolCallingMode) when built with the Xcode 27 SDK, gated by #if compiler(>=6.4) + #available(macOS/iOS/visionOS 27). Falls back to the existing 26 construction otherwise; deployment floor stays 26. Self-contained on this branch (no FM-parity mapping dependency). Co-Authored-By: Claude Opus 4.8 --- .../Models/SystemLanguageModel.swift | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Sources/AnyLanguageModel/Models/SystemLanguageModel.swift b/Sources/AnyLanguageModel/Models/SystemLanguageModel.swift index ff2e8f2..c1aa2af 100644 --- a/Sources/AnyLanguageModel/Models/SystemLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/SystemLanguageModel.swift @@ -384,6 +384,22 @@ @available(macOS 26.0, iOS 26.0, watchOS 26.0, tvOS 26.0, visionOS 26.0, *) extension GenerationOptions { fileprivate func toFoundationModels() -> FoundationModels.GenerationOptions { + // OS 27 support: `#if compiler(>=6.4)` compiles the 27 path only on the Xcode 27 + // (beta) toolchain — Xcode 26 skips it entirely and never sees the 27-only symbol. + // `#available` then gates it to OS 27 at runtime; deployment floor stays 26 via the + // fall-through below. Self-contained (no FM-parity mapping required on this branch). + // KNOBS to confirm on the first Xcode 27 build: the compiler gate version, and the + // initializer's parameter labels if Apple changed them. + #if compiler(>=6.4) + if #available(macOS 27.0, iOS 27.0, visionOS 27.0, *) { + return FoundationModels.GenerationOptions( + temperature: temperature, + maximumResponseTokens: maximumResponseTokens, + toolCallingMode: nil + ) + } + #endif + var options = FoundationModels.GenerationOptions() if let temperature = self.temperature {