From 010cdd5d5534555ed4d7330519079c61bffcfec5 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 2 Apr 2026 20:29:18 -0700 Subject: [PATCH 1/9] Added embedding API to SDK --- sdk/cs/src/Detail/JsonSerializationContext.cs | 2 + sdk/cs/src/Detail/Model.cs | 5 ++ sdk/cs/src/Detail/ModelVariant.cs | 17 ++++ sdk/cs/src/IModel.cs | 7 ++ sdk/cs/src/OpenAI/EmbeddingClient.cs | 81 +++++++++++++++++++ .../OpenAI/EmbeddingRequestResponseTypes.cs | 75 +++++++++++++++++ 6 files changed, 187 insertions(+) create mode 100644 sdk/cs/src/OpenAI/EmbeddingClient.cs create mode 100644 sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs index 37cc81ac..0fe5e677 100644 --- a/sdk/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk/cs/src/Detail/JsonSerializationContext.cs @@ -23,6 +23,8 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(ChatCompletionCreateResponse))] [JsonSerializable(typeof(AudioCreateTranscriptionRequest))] [JsonSerializable(typeof(AudioCreateTranscriptionResponse))] +[JsonSerializable(typeof(EmbeddingCreateRequestExtended))] +[JsonSerializable(typeof(EmbeddingCreateResponse))] [JsonSerializable(typeof(string[]))] // list loaded or cached models [JsonSerializable(typeof(EpInfo[]))] [JsonSerializable(typeof(EpDownloadResult))] diff --git a/sdk/cs/src/Detail/Model.cs b/sdk/cs/src/Detail/Model.cs index c4d96057..03e9321b 100644 --- a/sdk/cs/src/Detail/Model.cs +++ b/sdk/cs/src/Detail/Model.cs @@ -99,6 +99,11 @@ public async Task GetAudioClientAsync(CancellationToken? ct = return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false); } + public async Task GetEmbeddingClientAsync(CancellationToken? ct = null) + { + return await SelectedVariant.GetEmbeddingClientAsync(ct).ConfigureAwait(false); + } + public async Task UnloadAsync(CancellationToken? ct = null) { await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false); diff --git a/sdk/cs/src/Detail/ModelVariant.cs b/sdk/cs/src/Detail/ModelVariant.cs index 9f2deaba..250c601a 100644 --- a/sdk/cs/src/Detail/ModelVariant.cs +++ b/sdk/cs/src/Detail/ModelVariant.cs @@ -102,6 +102,13 @@ public async Task GetAudioClientAsync(CancellationToken? ct = .ConfigureAwait(false); } + public async Task GetEmbeddingClientAsync(CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => GetEmbeddingClientImplAsync(ct), + "Error getting embedding client for model", _logger) + .ConfigureAwait(false); + } + private async Task IsLoadedImplAsync(CancellationToken? ct = null) { var loadedModels = await _modelLoadManager.ListLoadedModelsAsync(ct).ConfigureAwait(false); @@ -193,6 +200,16 @@ private async Task GetAudioClientImplAsync(CancellationToken? return new OpenAIAudioClient(Id); } + private async Task GetEmbeddingClientImplAsync(CancellationToken? ct = null) + { + if (!await IsLoadedAsync(ct)) + { + throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first."); + } + + return new OpenAIEmbeddingClient(Id); + } + public void SelectVariant(IModel variant) { throw new FoundryLocalException( diff --git a/sdk/cs/src/IModel.cs b/sdk/cs/src/IModel.cs index a27f3a3d..37249782 100644 --- a/sdk/cs/src/IModel.cs +++ b/sdk/cs/src/IModel.cs @@ -70,6 +70,13 @@ Task DownloadAsync(Action? downloadProgress = null, /// OpenAI.AudioClient Task GetAudioClientAsync(CancellationToken? ct = null); + /// + /// Get an OpenAI API based EmbeddingClient + /// + /// Optional cancellation token. + /// OpenAI.EmbeddingClient + Task GetEmbeddingClientAsync(CancellationToken? ct = null); + /// /// Variants of the model that are available. Variants of the model are optimized for different devices. /// diff --git a/sdk/cs/src/OpenAI/EmbeddingClient.cs b/sdk/cs/src/OpenAI/EmbeddingClient.cs new file mode 100644 index 00000000..e757fada --- /dev/null +++ b/sdk/cs/src/OpenAI/EmbeddingClient.cs @@ -0,0 +1,81 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local; + +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.AI.Foundry.Local.OpenAI; +using Microsoft.Extensions.Logging; + +/// +/// Embedding Client that uses the OpenAI API. +/// Implemented using Betalgo.Ranul.OpenAI SDK types. +/// +public class OpenAIEmbeddingClient +{ + private readonly string _modelId; + + private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop; + private readonly ILogger _logger = FoundryLocalManager.Instance.Logger; + + internal OpenAIEmbeddingClient(string modelId) + { + _modelId = modelId; + } + + /// + /// Settings that are supported by Foundry Local for embeddings. + /// + public record EmbeddingSettings + { + /// + /// The number of dimensions the resulting output embeddings should have. + /// Only supported by some models. + /// + public int? Dimensions { get; set; } + + /// + /// The format to return the embeddings in. Can be either "float" or "base64". + /// + public string? EncodingFormat { get; set; } + } + + /// + /// Settings to use for embedding requests using this client. + /// + public EmbeddingSettings Settings { get; } = new(); + + /// + /// Generate embeddings for the given input text. + /// + /// The text to generate embeddings for. + /// Optional cancellation token. + /// Embedding response containing the embedding vector. + public async Task GenerateEmbeddingAsync(string input, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling( + () => GenerateEmbeddingImplAsync(input, ct), + "Error during embedding generation.", _logger).ConfigureAwait(false); + } + + private async Task GenerateEmbeddingImplAsync(string input, + CancellationToken? ct) + { + var embeddingRequest = EmbeddingCreateRequestExtended.FromUserInput(_modelId, input, Settings); + var embeddingRequestJson = embeddingRequest.ToJson(); + + var request = new CoreInteropRequest { Params = new() { { "OpenAICreateRequest", embeddingRequestJson } } }; + var response = await _coreInterop.ExecuteCommandAsync("embeddings", request, + ct ?? CancellationToken.None).ConfigureAwait(false); + + var embeddingResponse = response.ToEmbeddingResponse(_logger); + + return embeddingResponse; + } +} diff --git a/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs new file mode 100644 index 00000000..d55a69f6 --- /dev/null +++ b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs @@ -0,0 +1,75 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.OpenAI; + +using System.Text.Json; +using System.Text.Json.Serialization; + +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.Extensions.Logging; + +// https://platform.openai.com/docs/api-reference/embeddings/create +internal record EmbeddingCreateRequestExtended +{ + [JsonPropertyName("input")] + public string? Input { get; set; } + + [JsonPropertyName("model")] + public string? Model { get; set; } + + [JsonPropertyName("dimensions")] + public int? Dimensions { get; set; } + + [JsonPropertyName("encoding_format")] + public string? EncodingFormat { get; set; } + + internal static EmbeddingCreateRequestExtended FromUserInput(string modelId, + string input, + OpenAIEmbeddingClient.EmbeddingSettings settings) + { + return new EmbeddingCreateRequestExtended + { + Model = modelId, + Input = input, + Dimensions = settings.Dimensions, + EncodingFormat = settings.EncodingFormat + }; + } +} + +internal static class EmbeddingRequestResponseExtensions +{ + internal static string ToJson(this EmbeddingCreateRequestExtended request) + { + return JsonSerializer.Serialize(request, JsonSerializationContext.Default.EmbeddingCreateRequestExtended); + } + + internal static EmbeddingCreateResponse ToEmbeddingResponse(this ICoreInterop.Response response, ILogger logger) + { + if (response.Error != null) + { + logger.LogError("Error from embeddings: {Error}", response.Error); + throw new FoundryLocalException($"Error from embeddings command: {response.Error}"); + } + + return response.Data!.ToEmbeddingResponse(logger); + } + + internal static EmbeddingCreateResponse ToEmbeddingResponse(this string responseData, ILogger logger) + { + var output = JsonSerializer.Deserialize(responseData, JsonSerializationContext.Default.EmbeddingCreateResponse); + if (output == null) + { + logger.LogError("Failed to deserialize EmbeddingCreateResponse: {ResponseData}", responseData); + throw new JsonException("Failed to deserialize EmbeddingCreateResponse"); + } + + return output; + } +} From ce4ed01712ab4bf831ac14ecdfe98da88310da30 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 2 Apr 2026 22:56:09 -0700 Subject: [PATCH 2/9] Added embedding tests --- .../EmbeddingClientTests.cs | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs new file mode 100644 index 00000000..db1e812a --- /dev/null +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -0,0 +1,181 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; + +using System.Threading.Tasks; + +internal sealed class EmbeddingClientTests +{ + private static IModel? model; + + [Before(Class)] + public static async Task Setup() + { + var manager = FoundryLocalManager.Instance; // initialized by Utils + var catalog = await manager.GetCatalogAsync(); + + // Load the specific cached model variant directly + var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false); + await Assert.That(model).IsNotNull(); + + await model!.LoadAsync().ConfigureAwait(false); + await Assert.That(await model.IsLoadedAsync()).IsTrue(); + + EmbeddingClientTests.model = model; + } + + [Test] + public async Task Embedding_BasicRequest_Succeeds() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox jumps over the lazy dog") + .ConfigureAwait(false); + + await Assert.That(response).IsNotNull(); + await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1"); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response.Data[0].Embedding).IsNotNull(); + await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024); + await Assert.That(response.Data[0].Index).IsEqualTo(0); + + Console.WriteLine($"Embedding dimension: {response.Data[0].Embedding.Count}"); + Console.WriteLine($"First value: {response.Data[0].Embedding[0]}"); + Console.WriteLine($"Last value: {response.Data[0].Embedding[1023]}"); + } + + [Test] + public async Task Embedding_IsNormalized() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var inputs = new[] + { + "The quick brown fox jumps over the lazy dog", + "Machine learning is a subset of artificial intelligence", + "The capital of France is Paris" + }; + + foreach (var input in inputs) + { + var response = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + var embedding = response.Data[0].Embedding; + + await Assert.That(embedding.Count).IsEqualTo(1024); + + // Verify L2 norm is approximately 1.0 + double norm = 0; + foreach (var val in embedding) + { + norm += val * val; + } + + norm = Math.Sqrt(norm); + await Assert.That(norm).IsGreaterThanOrEqualTo(0.99); + await Assert.That(norm).IsLessThanOrEqualTo(1.01); + + // All values should be within [-1, 1] for a normalized vector + foreach (var val in embedding) + { + await Assert.That(val).IsGreaterThanOrEqualTo(-1.0); + await Assert.That(val).IsLessThanOrEqualTo(1.0); + } + } + } + + [Test] + public async Task Embedding_DifferentInputs_ProduceDifferentEmbeddings() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response1 = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox").ConfigureAwait(false); + var response2 = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris").ConfigureAwait(false); + + // Same dimensionality + await Assert.That(response1.Data[0].Embedding.Count) + .IsEqualTo(response2.Data[0].Embedding.Count); + + // But different values (cosine similarity should not be 1.0) + double dot = 0; + for (int i = 0; i < response1.Data[0].Embedding.Count; i++) + { + dot += response1.Data[0].Embedding[i] * response2.Data[0].Embedding[i]; + } + + await Assert.That(dot).IsLessThan(0.99); + } + + [Test] + public async Task Embedding_SameInput_ProducesSameEmbedding() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var input = "Deterministic embedding test"; + + var response1 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + var response2 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + + await Assert.That(response1.Data[0].Embedding.Count) + .IsEqualTo(response2.Data[0].Embedding.Count); + + for (int i = 0; i < response1.Data[0].Embedding.Count; i++) + { + await Assert.That(response1.Data[0].Embedding[i]) + .IsEqualTo(response2.Data[0].Embedding[i]); + } + } + + [Test] + public async Task Embedding_KnownValues_CapitalOfFrance() + { + var embeddingClient = await model!.GetEmbeddingClientAsync(); + await Assert.That(embeddingClient).IsNotNull(); + + var response = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris") + .ConfigureAwait(false); + var embedding = response.Data[0].Embedding; + + await Assert.That(embedding.Count).IsEqualTo(1024); + await Assert.That(embedding[0]).IsEqualTo(-0.023386012762784958); + await Assert.That(embedding[1023]).IsEqualTo(-0.011731955222785473); + } + + [Test] + public async Task Embedding_UnloadedModel_Throws() + { + var manager = FoundryLocalManager.Instance; + var catalog = await manager.GetCatalogAsync(); + + // Get a model but don't load it + var unloadedModel = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-cpu:4") + .ConfigureAwait(false); + await Assert.That(unloadedModel).IsNotNull(); + + // Unload it if loaded + if (await unloadedModel!.IsLoadedAsync()) + { + await unloadedModel.UnloadAsync(); + } + + FoundryLocalException? caught = null; + try + { + await unloadedModel.GetEmbeddingClientAsync(); + } + catch (FoundryLocalException ex) + { + caught = ex; + } + + await Assert.That(caught).IsNotNull(); + await Assert.That(caught!.Message).Contains("not loaded"); + } +} From cb20fd81be66c9eb9d83eaa193889fcee92e9cb2 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 2 Apr 2026 23:30:59 -0700 Subject: [PATCH 3/9] tests fix --- .../EmbeddingClientTests.cs | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index db1e812a..b2a2223d 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -148,34 +148,4 @@ public async Task Embedding_KnownValues_CapitalOfFrance() await Assert.That(embedding[1023]).IsEqualTo(-0.011731955222785473); } - [Test] - public async Task Embedding_UnloadedModel_Throws() - { - var manager = FoundryLocalManager.Instance; - var catalog = await manager.GetCatalogAsync(); - - // Get a model but don't load it - var unloadedModel = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-cpu:4") - .ConfigureAwait(false); - await Assert.That(unloadedModel).IsNotNull(); - - // Unload it if loaded - if (await unloadedModel!.IsLoadedAsync()) - { - await unloadedModel.UnloadAsync(); - } - - FoundryLocalException? caught = null; - try - { - await unloadedModel.GetEmbeddingClientAsync(); - } - catch (FoundryLocalException ex) - { - caught = ex; - } - - await Assert.That(caught).IsNotNull(); - await Assert.That(caught!.Message).Contains("not loaded"); - } } From 99a70278916195a140c674e4a2005420717ca7b5 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 2 Apr 2026 23:56:42 -0700 Subject: [PATCH 4/9] Updated documentation --- sdk/cs/README.md | 19 +++++++ sdk/cs/docs/api/index.md | 2 + .../api/microsoft.ai.foundry.local.imodel.md | 18 +++++++ .../api/microsoft.ai.foundry.local.model.md | 14 ++++++ ...microsoft.ai.foundry.local.modelvariant.md | 14 ++++++ ....ai.foundry.local.openaiembeddingclient.md | 50 +++++++++++++++++++ 6 files changed, 117 insertions(+) create mode 100644 sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md diff --git a/sdk/cs/README.md b/sdk/cs/README.md index 3efdc242..51500664 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -7,6 +7,7 @@ The Foundry Local C# SDK provides a .NET interface for running AI models locally - **Model catalog** — browse and search all available models; filter by cached or loaded state - **Lifecycle management** — download, load, unload, and remove models programmatically - **Chat completions** — synchronous and `IAsyncEnumerable` streaming via OpenAI-compatible types +- **Embeddings** — generate text embeddings with last-token pooling and L2 normalization - **Audio transcription** — transcribe audio files with streaming support - **Download progress** — wire up an `Action` callback for real-time download percentage - **Model variants** — select specific hardware/quantization variants per model alias @@ -246,6 +247,24 @@ chatClient.Settings.TopP = 0.9f; chatClient.Settings.FrequencyPenalty = 0.5f; ``` +### Embeddings + +```csharp +var embeddingClient = await model.GetEmbeddingClientAsync(); + +// Generate an embedding +var response = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox jumps over the lazy dog"); +var embedding = response.Data[0].Embedding; // List, L2-normalized +Console.WriteLine($"Dimensions: {embedding.Count}"); +``` + +#### Embedding Settings + +```csharp +embeddingClient.Settings.Dimensions = 512; // optional: reduce dimensionality +embeddingClient.Settings.EncodingFormat = "float"; // "float" or "base64" +``` + ### Audio Transcription ```csharp diff --git a/sdk/cs/docs/api/index.md b/sdk/cs/docs/api/index.md index 4d084f87..c83e0a43 100644 --- a/sdk/cs/docs/api/index.md +++ b/sdk/cs/docs/api/index.md @@ -30,6 +30,8 @@ [OpenAIChatClient](./microsoft.ai.foundry.local.openaichatclient.md) +[OpenAIEmbeddingClient](./microsoft.ai.foundry.local.openaiembeddingclient.md) + [Parameter](./microsoft.ai.foundry.local.parameter.md) [PromptTemplate](./microsoft.ai.foundry.local.prompttemplate.md) diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md index 861386a8..95185abe 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md @@ -208,6 +208,24 @@ Optional cancellation token. [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
OpenAI.AudioClient +### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +Get an OpenAI API based EmbeddingClient + +```csharp +Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+OpenAI.EmbeddingClient + ### **SelectVariant(IModel)** Select a model variant from [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md index 23cd67a3..c6eac5f2 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md @@ -176,6 +176,20 @@ public Task GetAudioClientAsync(Nullable c [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +```csharp +public Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+ ### **UnloadAsync(Nullable<CancellationToken>)** ```csharp diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md index 1f674511..cc2b20a6 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelvariant.md @@ -181,3 +181,17 @@ public Task GetAudioClientAsync(Nullable c #### Returns [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+ +### **GetEmbeddingClientAsync(Nullable<CancellationToken>)** + +```csharp +public Task GetEmbeddingClientAsync(Nullable ct) +``` + +#### Parameters + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +#### Returns + +[Task<OpenAIEmbeddingClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md new file mode 100644 index 00000000..83025fff --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiembeddingclient.md @@ -0,0 +1,50 @@ +# OpenAIEmbeddingClient + +Namespace: Microsoft.AI.Foundry.Local + +Embedding Client that uses the OpenAI API. + Implemented using Betalgo.Ranul.OpenAI SDK types. + +```csharp +public class OpenAIEmbeddingClient +``` + +Inheritance [Object](https://docs.microsoft.com/en-us/dotnet/api/system.object) → [OpenAIEmbeddingClient](./microsoft.ai.foundry.local.openaiembeddingclient.md)
+Attributes [NullableContextAttribute](https://docs.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.nullablecontextattribute), [NullableAttribute](https://docs.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.nullableattribute) + +## Properties + +### **Settings** + +Settings to use for embedding requests using this client. + +```csharp +public EmbeddingSettings Settings { get; } +``` + +#### Property Value + +EmbeddingSettings
+ +## Methods + +### **GenerateEmbeddingAsync(String, Nullable<CancellationToken>)** + +Generate embeddings for the given input text. + +```csharp +public Task GenerateEmbeddingAsync(string input, Nullable ct) +``` + +#### Parameters + +`input` [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+The text to generate embeddings for. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EmbeddingCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Embedding response containing the embedding vector. From e0dc1c3ad28387ff65af642718a8f53054b9d697 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 3 Apr 2026 01:47:52 -0700 Subject: [PATCH 5/9] Update sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs Added null checks Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index b2a2223d..1753cd36 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -98,6 +98,10 @@ public async Task Embedding_DifferentInputs_ProduceDifferentEmbeddings() var response1 = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox").ConfigureAwait(false); var response2 = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris").ConfigureAwait(false); + await Assert.That(response1).IsNotNull(); + await Assert.That(response2).IsNotNull(); + await Assert.That(response1.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response2.Data).IsNotNull().And.IsNotEmpty(); // Same dimensionality await Assert.That(response1.Data[0].Embedding.Count) .IsEqualTo(response2.Data[0].Embedding.Count); From e9d887d06de05a06942f5053b14900214eae5745 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 3 Apr 2026 01:53:33 -0700 Subject: [PATCH 6/9] Added more null checks in tests --- sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index 1753cd36..53b7cb88 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -65,6 +65,10 @@ public async Task Embedding_IsNormalized() foreach (var input in inputs) { var response = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + + await Assert.That(response).IsNotNull(); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); + var embedding = response.Data[0].Embedding; await Assert.That(embedding.Count).IsEqualTo(1024); @@ -102,6 +106,7 @@ public async Task Embedding_DifferentInputs_ProduceDifferentEmbeddings() await Assert.That(response2).IsNotNull(); await Assert.That(response1.Data).IsNotNull().And.IsNotEmpty(); await Assert.That(response2.Data).IsNotNull().And.IsNotEmpty(); + // Same dimensionality await Assert.That(response1.Data[0].Embedding.Count) .IsEqualTo(response2.Data[0].Embedding.Count); @@ -127,6 +132,11 @@ public async Task Embedding_SameInput_ProducesSameEmbedding() var response1 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); var response2 = await embeddingClient.GenerateEmbeddingAsync(input).ConfigureAwait(false); + await Assert.That(response1).IsNotNull(); + await Assert.That(response2).IsNotNull(); + await Assert.That(response1.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response2.Data).IsNotNull().And.IsNotEmpty(); + await Assert.That(response1.Data[0].Embedding.Count) .IsEqualTo(response2.Data[0].Embedding.Count); From b29d66338098716601b61d82caaa0b2831843686 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 3 Apr 2026 02:38:23 -0700 Subject: [PATCH 7/9] fixed copilot comments --- sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs | 10 ++++++++-- sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs | 9 +++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs index d55a69f6..d03025b9 100644 --- a/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs +++ b/sdk/cs/src/OpenAI/EmbeddingRequestResponseTypes.cs @@ -58,7 +58,13 @@ internal static EmbeddingCreateResponse ToEmbeddingResponse(this ICoreInterop.Re throw new FoundryLocalException($"Error from embeddings command: {response.Error}"); } - return response.Data!.ToEmbeddingResponse(logger); + if (string.IsNullOrWhiteSpace(response.Data)) + { + logger.LogError("Embeddings command returned no data"); + throw new FoundryLocalException("Embeddings command returned null or empty response data"); + } + + return response.Data.ToEmbeddingResponse(logger); } internal static EmbeddingCreateResponse ToEmbeddingResponse(this string responseData, ILogger logger) @@ -66,7 +72,7 @@ internal static EmbeddingCreateResponse ToEmbeddingResponse(this string response var output = JsonSerializer.Deserialize(responseData, JsonSerializationContext.Default.EmbeddingCreateResponse); if (output == null) { - logger.LogError("Failed to deserialize EmbeddingCreateResponse: {ResponseData}", responseData); + logger.LogError("Failed to deserialize EmbeddingCreateResponse (length={Length})", responseData.Length); throw new JsonException("Failed to deserialize EmbeddingCreateResponse"); } diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index 53b7cb88..aa5c64ee 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -155,11 +155,16 @@ public async Task Embedding_KnownValues_CapitalOfFrance() var response = await embeddingClient.GenerateEmbeddingAsync("The capital of France is Paris") .ConfigureAwait(false); + await Assert.That(response).IsNotNull(); + await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); var embedding = response.Data[0].Embedding; await Assert.That(embedding.Count).IsEqualTo(1024); - await Assert.That(embedding[0]).IsEqualTo(-0.023386012762784958); - await Assert.That(embedding[1023]).IsEqualTo(-0.011731955222785473); + + // Use tolerance for float32 model outputs which may vary across platforms + const double tolerance = 1e-5; + await Assert.That(Math.Abs(embedding[0] - (-0.023386012762784958))).IsLessThanOrEqualTo(tolerance); + await Assert.That(Math.Abs(embedding[1023] - (-0.011731955222785473))).IsLessThanOrEqualTo(tolerance); } } From e2366abeb336b3ab2d8adb20f5aca76df15bd47c Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 3 Apr 2026 03:54:47 -0700 Subject: [PATCH 8/9] Updated tests with new int4 embedding model results --- sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index aa5c64ee..5b308363 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -163,8 +163,8 @@ public async Task Embedding_KnownValues_CapitalOfFrance() // Use tolerance for float32 model outputs which may vary across platforms const double tolerance = 1e-5; - await Assert.That(Math.Abs(embedding[0] - (-0.023386012762784958))).IsLessThanOrEqualTo(tolerance); - await Assert.That(Math.Abs(embedding[1023] - (-0.011731955222785473))).IsLessThanOrEqualTo(tolerance); + await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance); + await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance); } } From ed20218bb657f0a4a1d2b2b6b4c22db78d1f133f Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 3 Apr 2026 04:42:23 -0700 Subject: [PATCH 9/9] Updated documentation --- sdk/cs/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cs/README.md b/sdk/cs/README.md index 51500664..49ec4deb 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -7,7 +7,7 @@ The Foundry Local C# SDK provides a .NET interface for running AI models locally - **Model catalog** — browse and search all available models; filter by cached or loaded state - **Lifecycle management** — download, load, unload, and remove models programmatically - **Chat completions** — synchronous and `IAsyncEnumerable` streaming via OpenAI-compatible types -- **Embeddings** — generate text embeddings with last-token pooling and L2 normalization +- **Embeddings** — generate text embeddings via OpenAI-compatible API - **Audio transcription** — transcribe audio files with streaming support - **Download progress** — wire up an `Action` callback for real-time download percentage - **Model variants** — select specific hardware/quantization variants per model alias @@ -254,7 +254,7 @@ var embeddingClient = await model.GetEmbeddingClientAsync(); // Generate an embedding var response = await embeddingClient.GenerateEmbeddingAsync("The quick brown fox jumps over the lazy dog"); -var embedding = response.Data[0].Embedding; // List, L2-normalized +var embedding = response.Data[0].Embedding; // List Console.WriteLine($"Dimensions: {embedding.Count}"); ```