diff --git a/sdk/cs/README.md b/sdk/cs/README.md index 3efdc242..49ec4deb 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -7,6 +7,7 @@ The Foundry Local C# SDK provides a .NET interface for running AI models locally - **Model catalog** — browse and search all available models; filter by cached or loaded state - **Lifecycle management** — download, load, unload, and remove models programmatically - **Chat completions** — synchronous and `IAsyncEnumerable` streaming via OpenAI-compatible types +- **Embeddings** — generate text embeddings via OpenAI-compatible API - **Audio transcription** — transcribe audio files with streaming support - **Download progress** — wire up an `Action` callback for real-time download percentage - **Model variants** — select specific hardware/quantization variants per model alias @@ -246,6 +247,24 @@ chatClient.Settings.TopP = 0.9f; chatClient.Settings.FrequencyPenalty = 0.5f; ``` +### Embeddings + +```csharp +var embeddingClient = await model.GetEmbeddingClientAsync(); + +// Generate an embedding +var response = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox jumps over the lazy dog"); +var embedding = response.Data[0].Embedding; // List +Console.WriteLine($"Dimensions: {embedding.Count}"); +``` + +#### Embedding Settings + +```csharp +embeddingClient.Settings.Dimensions = 512; // optional: reduce dimensionality +embeddingClient.Settings.EncodingFormat = "float"; // "float" or "base64" +``` + ### Audio Transcription ```csharp diff --git a/sdk/cs/docs/api/index.md b/sdk/cs/docs/api/index.md index 4d084f87..c83e0a43 100644 --- a/sdk/cs/docs/api/index.md +++ b/sdk/cs/docs/api/index.md @@ -30,6 +30,8 @@ [OpenAIChatClient](./microsoft.ai.foundry.local.openaichatclient.md) +[OpenAIEmbeddingClient](./microsoft.ai.foundry.local.openaiembeddingclient.md) + [Parameter](./microsoft.ai.foundry.local.parameter.md) [PromptTemplate](./microsoft.ai.foundry.local.prompttemplate.md) diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md index 861386a8..95185abe 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md @@ -208,6 +208,24 @@ Optional cancellation token. [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
OpenAI.AudioClient +### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +Get an OpenAI API based EmbeddingClient + +```csharp +Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+OpenAI.EmbeddingClient + ### **SelectVariant(IModel)** Select a model variant from [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md index 23cd67a3..c6eac5f2 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md @@ -176,6 +176,20 @@ public Task GetAudioClientAsync(Nullable c [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +```csharp +public Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+ ### **UnloadAsync(Nullable<CancellationToken>)** ```csharp diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md index 1f674511..cc2b20a6 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md @@ -181,3 +181,17 @@ public Task GetAudioClientAsync(Nullable c #### Returns [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+ +### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +```csharp +public Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md new file mode 100644 index 00000000..83025fff --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md @@ -0,0 +1,50 @@ +# OpenAIEmbeddingClient + +Namespace: Microsoft.AI.Foundry.Local + +Embedding Client that uses the OpenAI API. + Implemented using Betalgo.Ranul.OpenAI SDK types. + +```csharp +public class OpenAIEmbeddingClient +``` + +Inheritance [Object](https://docs.microsoft.com/en-us/dotnet/api/system.object) → [OpenAIEmbeddingClient](./microsoft.ai.foundry.local.openaiembeddingclient.md)
+Attributes [NullableContextAttribute](https://docs.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.nullablecontextattribute), [NullableAttribute](https://docs.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.nullableattribute) + +## Properties + +### **Settings** + +Settings to use for embedding requests using this client. + +```csharp +public EmbeddingSettings Settings { get; } +``` + +#### Property Value + +EmbeddingSettings
+ +## Methods + +### **GenerateEmbeddingAsync(String, Nullable<CancellationToken>)** + +Generate embeddings for the given input text. + +```csharp +public Task GenerateEmbeddingAsync(string input, Nullable ct) +``` + +#### Parameters + +`input` [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+The text to generate embeddings for. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EmbeddingCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Embedding response containing the embedding vector. diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs index 37cc81ac..0fe5e677 100644 --- a/sdk/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk/cs/src/Detail/JsonSerializationContext.cs @@ -23,6 +23,8 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(ChatCompletionCreateResponse))] [JsonSerializable(typeof(AudioCreateTranscriptionRequest))] [JsonSerializable(typeof(AudioCreateTranscriptionResponse))] +[JsonSerializable(typeof(EmbeddingCreateRequestExtended))] +[JsonSerializable(typeof(EmbeddingCreateResponse))] [JsonSerializable(typeof(string[]))] // list loaded or cached models [JsonSerializable(typeof(EpInfo[]))] [JsonSerializable(typeof(EpDownloadResult))] diff --git a/sdk/cs/src/Detail/Model.cs b/sdk/cs/src/Detail/Model.cs index c4d96057..03e9321b 100644 --- a/sdk/cs/src/Detail/Model.cs +++ b/sdk/cs/src/Detail/Model.cs @@ -99,6 +99,11 @@ public async Task GetAudioClientAsync(CancellationToken? ct = return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false); } + public async Task GetEmbeddingClientAsync(CancellationToken? ct = null) + { + return await SelectedVariant.GetEmbeddingClientAsync(ct).ConfigureAwait(false); + } + public async Task UnloadAsync(CancellationToken? ct = null) { await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false); diff --git a/sdk/cs/src/Detail/ModelVariant.cs b/sdk/cs/src/Detail/ModelVariant.cs index 9f2deaba..250c601a 100644 --- a/sdk/cs/src/Detail/ModelVariant.cs +++ b/sdk/cs/src/Detail/ModelVariant.cs @@ -102,6 +102,13 @@ public async Task GetAudioClientAsync(CancellationToken? ct = .ConfigureAwait(false); } + public async Task GetEmbeddingClientAsync(CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => GetEmbeddingClientImplAsync(ct), + "Error getting embedding client for model", _logger) + .ConfigureAwait(false); + } + private async Task IsLoadedImplAsync(CancellationToken? ct = null) { var loadedModels = await _modelLoadManager.ListLoadedModelsAsync(ct).ConfigureAwait(false); @@ -193,6 +200,16 @@ private async Task GetAudioClientImplAsync(CancellationToken? return new OpenAIAudioClient(Id); } + private async Task GetEmbeddingClientImplAsync(CancellationToken? ct = null) + { + if (!await IsLoadedAsync(ct)) + { + throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first."); + } + + return new OpenAIEmbeddingClient(Id); + } + public void SelectVariant(IModel variant) { throw new FoundryLocalException( diff --git a/sdk/cs/src/IModel.cs b/sdk/cs/src/IModel.cs index a27f3a3d..37249782 100644 --- a/sdk/cs/src/IModel.cs +++ b/sdk/cs/src/IModel.cs @@ -70,6 +70,13 @@ Task DownloadAsync(Action? downloadProgress = null, /// OpenAI.AudioClient Task GetAudioClientAsync(CancellationToken? ct = null); + /// + /// Get an OpenAI API based EmbeddingClient + /// + /// Optional cancellation token. + /// OpenAI.EmbeddingClient + Task GetEmbeddingClientAsync(CancellationToken? ct = null); + /// /// Variants of the model that are available. Variants of the model are optimized for different devices. /// diff --git a/sdk/cs/src/OpenAI/EmbeddingClient.cs b/sdk/cs/src/OpenAI/EmbeddingClient.cs new file mode 100644 index 00000000..e757fada --- /dev/null +++ b/sdk/cs/src/OpenAI/EmbeddingClient.cs @@ -0,0 +1,81 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local; + +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.AI.Foundry.Local.OpenAI; +using Microsoft.Extensions.Logging; + +/// +/// Embedding Client that uses the OpenAI API. +/// Implemented using Betalgo.Ranul.OpenAI SDK types. +/// +public class OpenAIEmbeddingClient +{ + private readonly string _modelId; + + private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop; + private readonly ILogger _logger = FoundryLocalManager.Instance.Logger; + + internal OpenAIEmbeddingClient(string modelId) + { + _modelId = modelId; + } + + /// + /// Settings that are supported by Foundry Local for embeddings. + /// + public record EmbeddingSettings + { + /// + /// The number of dimensions the resulting output embeddings should have. + /// Only supported by some models. + /// + public int? Dimensions { get; set; } + + /// + /// The format to return the embeddings in. Can be either "float" or "base64". + /// + public string? EncodingFormat { get; set; } + } + + /// + /// Settings to use for embedding requests using this client. + /// + public EmbeddingSettings Settings { get; } = new(); + + /// + /// Generate embeddings for the given input text. + /// + /// The text to generate embeddings for. + /// Optional cancellation token. + /// Embedding response containing the embedding vector. + public async Task GenerateEmbeddingAsync(string input, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling( + () => GenerateEmbeddingImplAsync(input, ct), + "Error during embedding generation.", _logger).ConfigureAwait(false); + } + + private async Task GenerateEmbeddingImplAsync(string input, + CancellationToken? ct) + { + var embeddingRequest = EmbeddingCreateRequestExtended.FromUserInput(_modelId, input, Settings); + var embeddingRequestJson = embeddingRequest.ToJson(); + + var request = new CoreInteropRequest { Params = new() { { "OpenAICreateRequest", embeddingRequestJson } } }; + var response = await _coreInterop.ExecuteCommandAsync("embeddings", request, + ct ?? CancellationToken.None).ConfigureAwait(false); + + var embeddingResponse = response.ToEmbeddingResponse(_logger); + + return embeddingResponse; + } +} diff --git a/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs new file mode 100644 index 00000000..d03025b9 --- /dev/null +++ b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs @@ -0,0 +1,81 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.OpenAI; + +using System.Text.Json; +using System.Text.Json.Serialization; + +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.Extensions.Logging; + +// https://platform.openai.com/docs/api-reference/embeddings/create +internal record EmbeddingCreateRequestExtended +{ + [JsonPropertyName("input")] + public string? Input { get; set; } + + [JsonPropertyName("model")] + public string? Model { get; set; } + + [JsonPropertyName("dimensions")] + public int? Dimensions { get; set; } + + [JsonPropertyName("encoding_format")] + public string? EncodingFormat { get; set; } + + internal static EmbeddingCreateRequestExtended FromUserInput(string modelId, + string input, + OpenAIEmbeddingClient.EmbeddingSettings settings) + { + return new EmbeddingCreateRequestExtended + { + Model = modelId, + Input = input, + Dimensions = settings.Dimensions, + EncodingFormat = settings.EncodingFormat + }; + } +} + +internal static class EmbeddingRequestResponseExtensions +{ + internal static string ToJson(this EmbeddingCreateRequestExtended request) + { + return JsonSerializer.Serialize(request, JsonSerializationContext.Default.EmbeddingCreateRequestExtended); + } + + internal static EmbeddingCreateResponse ToEmbeddingResponse(this ICoreInterop.Response response, ILogger logger) + { + if (response.Error != null) + { + logger.LogError("Error from embeddings: {Error}", response.Error); + throw new FoundryLocalException($"Error from embeddings command: {response.Error}"); + } + + if (string.IsNullOrWhiteSpace(response.Data)) + { + logger.LogError("Embeddings command returned no data"); + throw new FoundryLocalException("Embeddings command returned null or empty response data"); + } + + return response.Data.ToEmbeddingResponse(logger); + } + + internal static EmbeddingCreateResponse ToEmbeddingResponse(this string responseData, ILogger logger) + { + var output = JsonSerializer.Deserialize(responseData, JsonSerializationContext.Default.EmbeddingCreateResponse); + if (output == null) + { + logger.LogError("Failed to deserialize EmbeddingCreateResponse (length={Length})", responseData.Length); + throw new JsonException("Failed to deserialize EmbeddingCreateResponse"); + } + + return output; + } +} diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs new file mode 100644 index 00000000..5b308363 --- /dev/null +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -0,0 +1,170 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; + +using System.Threading.Tasks; + +internal sealed class EmbeddingClientTests +{ + private static IModel? model; + + [Before(Class)] + public static async Task Setup() + { + var manager = FoundryLocalManager.Instance; // initialized by Utils + var catalog = await manager.GetCatalogAsync(); + + // Load the specific cached model variant directly + var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false); + await Assert.That(model).IsNotNull(); + + await model!.LoadAsync().ConfigureAwait(false); + await Assert.That(await model.IsLoadedAsync()).IsTrue(); + + EmbeddingClientTests.model = model; + } + + [Test] + public async Task Embedding_BasicRequest_Succeeds() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox jumps over the lazy dog") + .ConfigureAwait(false); + + await Assert.That(response).IsNotNull(); + await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1"); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response.Data[0].Embedding).IsNotNull(); + await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024); + await Assert.That(response.Data[0].Index).IsEqualTo(0); + + Console.WriteLine($"Embedding dimension: {response.Data[0].Embedding.Count}"); + Console.WriteLine($"First value: {response.Data[0].Embedding[0]}"); + Console.WriteLine($"Last value: {response.Data[0].Embedding[1023]}"); + } + + [Test] + public async Task Embedding_IsNormalized() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var inputs = new[] + { + "The quick brown fox jumps over the lazy dog", + "Machine learning is a subset of artificial intelligence", + "The capital of France is Paris" + }; + + foreach (var input in inputs) + { + var response = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + + await Assert.That(response).IsNotNull(); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); + + var embedding = response.Data[0].Embedding; + + await Assert.That(embedding.Count).IsEqualTo(1024); + + // Verify L2 norm is approximately 1.0 + double norm = 0; + foreach (var val in embedding) + { + norm += val * val; + } + + norm = Math.Sqrt(norm); + await Assert.That(norm).IsGreaterThanOrEqualTo(0.99); + await Assert.That(norm).IsLessThanOrEqualTo(1.01); + + // All values should be within [-1, 1] for a normalized vector + foreach (var val in embedding) + { + await Assert.That(val).IsGreaterThanOrEqualTo(-1.0); + await Assert.That(val).IsLessThanOrEqualTo(1.0); + } + } + } + + [Test] + public async Task Embedding_DifferentInputs_ProduceDifferentEmbeddings() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response1 = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox").ConfigureAwait(false); + var response2 = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris").ConfigureAwait(false); + + await Assert.That(response1).IsNotNull(); + await Assert.That(response2).IsNotNull(); + await Assert.That(response1.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response2.Data).IsNotNull().And.IsNotEmpty(); + + // Same dimensionality + await Assert.That(response1.Data[0].Embedding.Count) + .IsEqualTo(response2.Data[0].Embedding.Count); + + // But different values (cosine similarity should not be 1.0) + double dot = 0; + for (int i = 0; i < response1.Data[0].Embedding.Count; i++) + { + dot += response1.Data[0].Embedding[i] * response2.Data[0].Embedding[i]; + } + + await Assert.That(dot).IsLessThan(0.99); + } + + [Test] + public async Task Embedding_SameInput_ProducesSameEmbedding() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var input = "Deterministic embedding test"; + + var response1 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + var response2 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + + await Assert.That(response1).IsNotNull(); + await Assert.That(response2).IsNotNull(); + await Assert.That(response1.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response2.Data).IsNotNull().And.IsNotEmpty(); + + await Assert.That(response1.Data[0].Embedding.Count) + .IsEqualTo(response2.Data[0].Embedding.Count); + + for (int i = 0; i < response1.Data[0].Embedding.Count; i++) + { + await Assert.That(response1.Data[0].Embedding[i]) + .IsEqualTo(response2.Data[0].Embedding[i]); + } + } + + [Test] + public async Task Embedding_KnownValues_CapitalOfFrance() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris") + .ConfigureAwait(false); + await Assert.That(response).IsNotNull(); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); + var embedding = response.Data[0].Embedding; + + await Assert.That(embedding.Count).IsEqualTo(1024); + + // Use tolerance for float32 model outputs which may vary across platforms + const double tolerance = 1e-5; + await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance); + await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance); + } + +}