From 0dc589e232d67ea4a3c5954580fe67db78524785 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Feb 2026 21:06:26 +0000
Subject: [PATCH 01/16] Initial plan
From d3187bddb80809d61d3f18232842987d04d09ede Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Feb 2026 21:11:16 +0000
Subject: [PATCH 02/16] Add EmbeddingsOptions and EmbeddingProviderType
configuration models
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
---
.../ObjectModel/EmbeddingProviderType.cs | 27 +++
src/Config/ObjectModel/EmbeddingsOptions.cs | 163 +++++++++++++
src/Config/ObjectModel/RuntimeOptions.cs | 13 +-
src/Core/Services/EmbeddingService.cs | 229 ++++++++++++++++++
src/Core/Services/IEmbeddingService.cs | 27 +++
5 files changed, 458 insertions(+), 1 deletion(-)
create mode 100644 src/Config/ObjectModel/EmbeddingProviderType.cs
create mode 100644 src/Config/ObjectModel/EmbeddingsOptions.cs
create mode 100644 src/Core/Services/EmbeddingService.cs
create mode 100644 src/Core/Services/IEmbeddingService.cs
diff --git a/src/Config/ObjectModel/EmbeddingProviderType.cs b/src/Config/ObjectModel/EmbeddingProviderType.cs
new file mode 100644
index 0000000000..0a18d491bb
--- /dev/null
+++ b/src/Config/ObjectModel/EmbeddingProviderType.cs
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+using System.Runtime.Serialization;
+using System.Text.Json.Serialization;
+using Azure.DataApiBuilder.Config.Converters;
+
+namespace Azure.DataApiBuilder.Config.ObjectModel;
+
+///
+/// Represents the supported embedding provider types.
+///
+[JsonConverter(typeof(EnumMemberJsonEnumConverterFactory))]
+public enum EmbeddingProviderType
+{
+ ///
+ /// Azure OpenAI embedding provider.
+ ///
+ [EnumMember(Value = "azure-openai")]
+ AzureOpenAI,
+
+ ///
+ /// OpenAI embedding provider.
+ ///
+ [EnumMember(Value = "openai")]
+ OpenAI
+}
diff --git a/src/Config/ObjectModel/EmbeddingsOptions.cs b/src/Config/ObjectModel/EmbeddingsOptions.cs
new file mode 100644
index 0000000000..41147adc33
--- /dev/null
+++ b/src/Config/ObjectModel/EmbeddingsOptions.cs
@@ -0,0 +1,163 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Text.Json.Serialization;
+
+namespace Azure.DataApiBuilder.Config.ObjectModel;
+
+///
+/// Represents the options for configuring the embedding service.
+/// Used for text embedding/vectorization with OpenAI or Azure OpenAI providers.
+///
+public record EmbeddingsOptions
+{
+ ///
+ /// Default timeout in milliseconds for embedding requests.
+ ///
+ public const int DEFAULT_TIMEOUT_MS = 30000;
+
+ ///
+ /// Default API version for Azure OpenAI.
+ ///
+ public const string DEFAULT_AZURE_API_VERSION = "2024-02-01";
+
+ ///
+ /// Default model for OpenAI embeddings.
+ ///
+ public const string DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
+
+ ///
+ /// The embedding provider type (azure-openai or openai).
+ /// Required.
+ ///
+ [JsonPropertyName("provider")]
+ public EmbeddingProviderType Provider { get; init; }
+
+ ///
+ /// The provider base URL endpoint.
+ /// Required.
+ ///
+ [JsonPropertyName("endpoint")]
+ public string Endpoint { get; init; }
+
+ ///
+ /// The API key for authentication.
+ /// Required.
+ ///
+ [JsonPropertyName("api-key")]
+ public string ApiKey { get; init; }
+
+ ///
+ /// The model or deployment name.
+ /// For Azure OpenAI, this is the deployment name.
+ /// For OpenAI, this is the model name (defaults to text-embedding-3-small if not specified).
+ ///
+ [JsonPropertyName("model")]
+ public string? Model { get; init; }
+
+ ///
+ /// Azure API version. Only used for Azure OpenAI provider.
+ /// Defaults to 2024-02-01.
+ ///
+ [JsonPropertyName("api-version")]
+ public string? ApiVersion { get; init; }
+
+ ///
+ /// Output vector dimensions. Optional, uses model default if not specified.
+ ///
+ [JsonPropertyName("dimensions")]
+ public int? Dimensions { get; init; }
+
+ ///
+ /// Request timeout in milliseconds. Defaults to 30000 (30 seconds).
+ ///
+ [JsonPropertyName("timeout-ms")]
+ public int? TimeoutMs { get; init; }
+
+ ///
+ /// Flag which informs whether the user provided a custom timeout value.
+ ///
+ [JsonIgnore(Condition = JsonIgnoreCondition.Always)]
+ [MemberNotNullWhen(true, nameof(TimeoutMs))]
+ public bool UserProvidedTimeoutMs { get; init; }
+
+ ///
+ /// Flag which informs whether the user provided a custom API version.
+ ///
+ [JsonIgnore(Condition = JsonIgnoreCondition.Always)]
+ [MemberNotNullWhen(true, nameof(ApiVersion))]
+ public bool UserProvidedApiVersion { get; init; }
+
+ ///
+ /// Flag which informs whether the user provided custom dimensions.
+ ///
+ [JsonIgnore(Condition = JsonIgnoreCondition.Always)]
+ [MemberNotNullWhen(true, nameof(Dimensions))]
+ public bool UserProvidedDimensions { get; init; }
+
+ ///
+ /// Flag which informs whether the user provided a custom model.
+ ///
+ [JsonIgnore(Condition = JsonIgnoreCondition.Always)]
+ [MemberNotNullWhen(true, nameof(Model))]
+ public bool UserProvidedModel { get; init; }
+
+ ///
+ /// Gets the effective timeout in milliseconds, using default if not specified.
+ ///
+ [JsonIgnore]
+ public int EffectiveTimeoutMs => TimeoutMs ?? DEFAULT_TIMEOUT_MS;
+
+ ///
+ /// Gets the effective API version for Azure OpenAI, using default if not specified.
+ ///
+ [JsonIgnore]
+ public string EffectiveApiVersion => ApiVersion ?? DEFAULT_AZURE_API_VERSION;
+
+ ///
+ /// Gets the effective model name, using default for OpenAI if not specified.
+ /// For Azure OpenAI, model is required (no default).
+ ///
+ [JsonIgnore]
+ public string? EffectiveModel => Model ?? (Provider == EmbeddingProviderType.OpenAI ? DEFAULT_OPENAI_MODEL : null);
+
+ [JsonConstructor]
+ public EmbeddingsOptions(
+ EmbeddingProviderType Provider,
+ string Endpoint,
+ string ApiKey,
+ string? Model = null,
+ string? ApiVersion = null,
+ int? Dimensions = null,
+ int? TimeoutMs = null)
+ {
+ this.Provider = Provider;
+ this.Endpoint = Endpoint;
+ this.ApiKey = ApiKey;
+
+ if (Model is not null)
+ {
+ this.Model = Model;
+ UserProvidedModel = true;
+ }
+
+ if (ApiVersion is not null)
+ {
+ this.ApiVersion = ApiVersion;
+ UserProvidedApiVersion = true;
+ }
+
+ if (Dimensions is not null)
+ {
+ this.Dimensions = Dimensions;
+ UserProvidedDimensions = true;
+ }
+
+ if (TimeoutMs is not null)
+ {
+ this.TimeoutMs = TimeoutMs;
+ UserProvidedTimeoutMs = true;
+ }
+ }
+}
diff --git a/src/Config/ObjectModel/RuntimeOptions.cs b/src/Config/ObjectModel/RuntimeOptions.cs
index 6f6c046651..991cb814c4 100644
--- a/src/Config/ObjectModel/RuntimeOptions.cs
+++ b/src/Config/ObjectModel/RuntimeOptions.cs
@@ -17,6 +17,7 @@ public record RuntimeOptions
public RuntimeCacheOptions? Cache { get; init; }
public PaginationOptions? Pagination { get; init; }
public RuntimeHealthCheckConfig? Health { get; init; }
+ public EmbeddingsOptions? Embeddings { get; init; }
[JsonConstructor]
public RuntimeOptions(
@@ -28,7 +29,8 @@ public RuntimeOptions(
TelemetryOptions? Telemetry = null,
RuntimeCacheOptions? Cache = null,
PaginationOptions? Pagination = null,
- RuntimeHealthCheckConfig? Health = null)
+ RuntimeHealthCheckConfig? Health = null,
+ EmbeddingsOptions? Embeddings = null)
{
this.Rest = Rest;
this.GraphQL = GraphQL;
@@ -39,6 +41,7 @@ public RuntimeOptions(
this.Cache = Cache;
this.Pagination = Pagination;
this.Health = Health;
+ this.Embeddings = Embeddings;
}
///
@@ -74,4 +77,12 @@ Mcp is null ||
Health is null ||
Health?.Enabled is null ||
Health?.Enabled is true;
+
+ ///
+ /// Indicates whether embeddings are configured.
+ /// Embeddings are considered configured when the Embeddings property is not null.
+ ///
+ [JsonIgnore]
+ [MemberNotNullWhen(true, nameof(Embeddings))]
+ public bool IsEmbeddingsConfigured => Embeddings is not null;
}
diff --git a/src/Core/Services/EmbeddingService.cs b/src/Core/Services/EmbeddingService.cs
new file mode 100644
index 0000000000..6371ceeecc
--- /dev/null
+++ b/src/Core/Services/EmbeddingService.cs
@@ -0,0 +1,229 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+using System.Net.Http.Headers;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Azure.DataApiBuilder.Config.ObjectModel;
+using Microsoft.Extensions.Logging;
+
+namespace Azure.DataApiBuilder.Core.Services;
+
+///
+/// Service implementation for text embedding/vectorization.
+/// Supports both OpenAI and Azure OpenAI providers.
+///
+public class EmbeddingService : IEmbeddingService
+{
+ private readonly HttpClient _httpClient;
+ private readonly EmbeddingsOptions _options;
+ private readonly ILogger _logger;
+
+ ///
+ /// JSON serializer options for request/response handling.
+ ///
+ private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
+ {
+ PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+ DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
+ };
+
+ ///
+ /// Initializes a new instance of the EmbeddingService.
+ ///
+ /// The HTTP client factory for creating HTTP clients.
+ /// The embedding configuration options.
+ /// The logger instance.
+ public EmbeddingService(
+ HttpClient httpClient,
+ EmbeddingsOptions options,
+ ILogger logger)
+ {
+ _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
+ _options = options ?? throw new ArgumentNullException(nameof(options));
+ _logger = logger ?? throw new ArgumentNullException(nameof(logger));
+
+ ConfigureHttpClient();
+ }
+
+ ///
+ /// Configures the HTTP client with timeout and authentication headers.
+ ///
+ private void ConfigureHttpClient()
+ {
+ _httpClient.Timeout = TimeSpan.FromMilliseconds(_options.EffectiveTimeoutMs);
+
+ if (_options.Provider == EmbeddingProviderType.AzureOpenAI)
+ {
+ _httpClient.DefaultRequestHeaders.Add("api-key", _options.ApiKey);
+ }
+ else
+ {
+ _httpClient.DefaultRequestHeaders.Authorization =
+ new AuthenticationHeaderValue("Bearer", _options.ApiKey);
+ }
+
+ _httpClient.DefaultRequestHeaders.Accept.Clear();
+ _httpClient.DefaultRequestHeaders.Accept.Add(
+ new MediaTypeWithQualityHeaderValue("application/json"));
+ }
+
+ ///
+ public async Task EmbedAsync(string text, CancellationToken cancellationToken = default)
+ {
+ if (string.IsNullOrEmpty(text))
+ {
+ throw new ArgumentException("Text cannot be null or empty.", nameof(text));
+ }
+
+ float[][] results = await EmbedBatchAsync(new[] { text }, cancellationToken);
+ return results[0];
+ }
+
+ ///
+ public async Task EmbedBatchAsync(string[] texts, CancellationToken cancellationToken = default)
+ {
+ if (texts is null || texts.Length == 0)
+ {
+ throw new ArgumentException("Texts cannot be null or empty.", nameof(texts));
+ }
+
+ string requestUrl = BuildRequestUrl();
+ object requestBody = BuildRequestBody(texts);
+
+ string requestJson = JsonSerializer.Serialize(requestBody, _jsonSerializerOptions);
+ using HttpContent content = new StringContent(requestJson, Encoding.UTF8, "application/json");
+
+ _logger.LogDebug("Sending embedding request to {Url} with {Count} text(s)", requestUrl, texts.Length);
+
+ HttpResponseMessage response = await _httpClient.PostAsync(requestUrl, content, cancellationToken);
+
+ if (!response.IsSuccessStatusCode)
+ {
+ string errorContent = await response.Content.ReadAsStringAsync(cancellationToken);
+ _logger.LogError("Embedding request failed with status {StatusCode}: {ErrorContent}",
+ response.StatusCode, errorContent);
+ throw new HttpRequestException(
+ $"Embedding request failed with status code {response.StatusCode}: {errorContent}");
+ }
+
+ string responseJson = await response.Content.ReadAsStringAsync(cancellationToken);
+ EmbeddingResponse? embeddingResponse = JsonSerializer.Deserialize(responseJson, _jsonSerializerOptions);
+
+ if (embeddingResponse?.Data is null || embeddingResponse.Data.Count == 0)
+ {
+ throw new InvalidOperationException("No embedding data received from the provider.");
+ }
+
+ // Sort by index to ensure correct order and extract embeddings
+ List sortedData = embeddingResponse.Data.OrderBy(d => d.Index).ToList();
+ return sortedData.Select(d => d.Embedding).ToArray();
+ }
+
+ ///
+ /// Builds the request URL based on the provider type.
+ ///
+ private string BuildRequestUrl()
+ {
+ string endpoint = _options.Endpoint.TrimEnd('/');
+
+ if (_options.Provider == EmbeddingProviderType.AzureOpenAI)
+ {
+ // Azure OpenAI: {endpoint}/openai/deployments/{deployment}/embeddings?api-version={version}
+ string model = _options.EffectiveModel
+ ?? throw new InvalidOperationException("Model/deployment name is required for Azure OpenAI.");
+
+ return $"{endpoint}/openai/deployments/{model}/embeddings?api-version={_options.EffectiveApiVersion}";
+ }
+ else
+ {
+ // OpenAI: {endpoint}/v1/embeddings
+ return $"{endpoint}/v1/embeddings";
+ }
+ }
+
+ ///
+ /// Builds the request body based on the provider type.
+ ///
+ private object BuildRequestBody(string[] texts)
+ {
+ // Use single string for single text, array for batch
+ object input = texts.Length == 1 ? texts[0] : texts;
+
+ if (_options.Provider == EmbeddingProviderType.AzureOpenAI)
+ {
+ // Azure OpenAI request body
+ if (_options.UserProvidedDimensions)
+ {
+ return new
+ {
+ input,
+ dimensions = _options.Dimensions
+ };
+ }
+
+ return new { input };
+ }
+ else
+ {
+ // OpenAI request body - includes model in body
+ string model = _options.EffectiveModel ?? EmbeddingsOptions.DEFAULT_OPENAI_MODEL;
+
+ if (_options.UserProvidedDimensions)
+ {
+ return new
+ {
+ model,
+ input,
+ dimensions = _options.Dimensions
+ };
+ }
+
+ return new
+ {
+ model,
+ input
+ };
+ }
+ }
+
+ ///
+ /// Response model for embedding API responses.
+ ///
+ private sealed class EmbeddingResponse
+ {
+ [JsonPropertyName("data")]
+ public List? Data { get; set; }
+
+ [JsonPropertyName("model")]
+ public string? Model { get; set; }
+
+ [JsonPropertyName("usage")]
+ public EmbeddingUsage? Usage { get; set; }
+ }
+
+ ///
+ /// Individual embedding data in the response.
+ ///
+ private sealed class EmbeddingData
+ {
+ [JsonPropertyName("index")]
+ public int Index { get; set; }
+
+ [JsonPropertyName("embedding")]
+ public float[] Embedding { get; set; } = Array.Empty();
+ }
+
+ ///
+ /// Token usage information in the response.
+ ///
+ private sealed class EmbeddingUsage
+ {
+ [JsonPropertyName("prompt_tokens")]
+ public int PromptTokens { get; set; }
+
+ [JsonPropertyName("total_tokens")]
+ public int TotalTokens { get; set; }
+ }
+}
diff --git a/src/Core/Services/IEmbeddingService.cs b/src/Core/Services/IEmbeddingService.cs
new file mode 100644
index 0000000000..6e7ffb8a19
--- /dev/null
+++ b/src/Core/Services/IEmbeddingService.cs
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+namespace Azure.DataApiBuilder.Core.Services;
+
+///
+/// Service interface for text embedding/vectorization.
+/// Supports both single text and batch embedding operations.
+///
+public interface IEmbeddingService
+{
+ ///
+ /// Generates an embedding vector for a single text input.
+ ///
+ /// The text to embed.
+ /// Cancellation token for the operation.
+ /// The embedding vector as an array of floats.
+ Task EmbedAsync(string text, CancellationToken cancellationToken = default);
+
+ ///
+ /// Generates embedding vectors for multiple text inputs in a batch.
+ ///
+ /// The texts to embed.
+ /// Cancellation token for the operation.
+ /// The embedding vectors as an array of float arrays, matching input order.
+ Task EmbedBatchAsync(string[] texts, CancellationToken cancellationToken = default);
+}
From 60648263bd98ad3c87b5e288560b821a94bb25b1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Feb 2026 21:14:52 +0000
Subject: [PATCH 03/16] Add CLI configure options for embeddings and register
embedding service
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
---
src/Cli/Commands/ConfigureOptions.cs | 36 +++++++++
src/Cli/ConfigGenerator.cs | 107 +++++++++++++++++++++++++++
src/Service/Startup.cs | 13 ++++
3 files changed, 156 insertions(+)
diff --git a/src/Cli/Commands/ConfigureOptions.cs b/src/Cli/Commands/ConfigureOptions.cs
index c3e0352249..93810ddacf 100644
--- a/src/Cli/Commands/ConfigureOptions.cs
+++ b/src/Cli/Commands/ConfigureOptions.cs
@@ -71,6 +71,13 @@ public ConfigureOptions(
RollingInterval? fileSinkRollingInterval = null,
int? fileSinkRetainedFileCountLimit = null,
long? fileSinkFileSizeLimitBytes = null,
+ EmbeddingProviderType? runtimeEmbeddingsProvider = null,
+ string? runtimeEmbeddingsEndpoint = null,
+ string? runtimeEmbeddingsApiKey = null,
+ string? runtimeEmbeddingsModel = null,
+ string? runtimeEmbeddingsApiVersion = null,
+ int? runtimeEmbeddingsDimensions = null,
+ int? runtimeEmbeddingsTimeoutMs = null,
string? config = null)
: base(config)
{
@@ -132,6 +139,14 @@ public ConfigureOptions(
FileSinkRollingInterval = fileSinkRollingInterval;
FileSinkRetainedFileCountLimit = fileSinkRetainedFileCountLimit;
FileSinkFileSizeLimitBytes = fileSinkFileSizeLimitBytes;
+ // Embeddings
+ RuntimeEmbeddingsProvider = runtimeEmbeddingsProvider;
+ RuntimeEmbeddingsEndpoint = runtimeEmbeddingsEndpoint;
+ RuntimeEmbeddingsApiKey = runtimeEmbeddingsApiKey;
+ RuntimeEmbeddingsModel = runtimeEmbeddingsModel;
+ RuntimeEmbeddingsApiVersion = runtimeEmbeddingsApiVersion;
+ RuntimeEmbeddingsDimensions = runtimeEmbeddingsDimensions;
+ RuntimeEmbeddingsTimeoutMs = runtimeEmbeddingsTimeoutMs;
}
[Option("data-source.database-type", Required = false, HelpText = "Database type. Allowed values: MSSQL, PostgreSQL, CosmosDB_NoSQL, MySQL.")]
@@ -281,6 +296,27 @@ public ConfigureOptions(
[Option("runtime.telemetry.file.file-size-limit-bytes", Required = false, HelpText = "Configure maximum file size limit in bytes. Default: 1048576")]
public long? FileSinkFileSizeLimitBytes { get; }
+ [Option("runtime.embeddings.provider", Required = false, HelpText = "Configure embedding provider type. Allowed values: azure-openai, openai.")]
+ public EmbeddingProviderType? RuntimeEmbeddingsProvider { get; }
+
+ [Option("runtime.embeddings.endpoint", Required = false, HelpText = "Configure the embedding provider base URL endpoint.")]
+ public string? RuntimeEmbeddingsEndpoint { get; }
+
+ [Option("runtime.embeddings.api-key", Required = false, HelpText = "Configure the embedding API key for authentication.")]
+ public string? RuntimeEmbeddingsApiKey { get; }
+
+ [Option("runtime.embeddings.model", Required = false, HelpText = "Configure the model/deployment name. Required for Azure OpenAI, defaults to text-embedding-3-small for OpenAI.")]
+ public string? RuntimeEmbeddingsModel { get; }
+
+ [Option("runtime.embeddings.api-version", Required = false, HelpText = "Configure the Azure API version. Only used for Azure OpenAI provider. Default: 2024-02-01")]
+ public string? RuntimeEmbeddingsApiVersion { get; }
+
+ [Option("runtime.embeddings.dimensions", Required = false, HelpText = "Configure the output vector dimensions. Optional, uses model default if not specified.")]
+ public int? RuntimeEmbeddingsDimensions { get; }
+
+ [Option("runtime.embeddings.timeout-ms", Required = false, HelpText = "Configure the request timeout in milliseconds. Default: 30000")]
+ public int? RuntimeEmbeddingsTimeoutMs { get; }
+
public int Handler(ILogger logger, FileSystemRuntimeConfigLoader loader, IFileSystem fileSystem)
{
logger.LogInformation("{productName} {version}", PRODUCT_NAME, ProductInfo.GetProductVersion());
diff --git a/src/Cli/ConfigGenerator.cs b/src/Cli/ConfigGenerator.cs
index 78a5e63a7d..b9cb93207e 100644
--- a/src/Cli/ConfigGenerator.cs
+++ b/src/Cli/ConfigGenerator.cs
@@ -908,6 +908,26 @@ options.FileSinkRetainedFileCountLimit is not null ||
}
}
+ // Embeddings: Provider, Endpoint, ApiKey, Model, ApiVersion, Dimensions, TimeoutMs
+ if (options.RuntimeEmbeddingsProvider is not null ||
+ options.RuntimeEmbeddingsEndpoint is not null ||
+ options.RuntimeEmbeddingsApiKey is not null ||
+ options.RuntimeEmbeddingsModel is not null ||
+ options.RuntimeEmbeddingsApiVersion is not null ||
+ options.RuntimeEmbeddingsDimensions is not null ||
+ options.RuntimeEmbeddingsTimeoutMs is not null)
+ {
+ bool status = TryUpdateConfiguredEmbeddingsValues(options, runtimeConfig?.Runtime?.Embeddings, out EmbeddingsOptions? updatedEmbeddingsOptions);
+ if (status && updatedEmbeddingsOptions is not null)
+ {
+ runtimeConfig = runtimeConfig! with { Runtime = runtimeConfig.Runtime! with { Embeddings = updatedEmbeddingsOptions } };
+ }
+ else
+ {
+ return false;
+ }
+ }
+
return runtimeConfig != null;
}
@@ -1522,6 +1542,93 @@ private static bool TryUpdateConfiguredFileOptions(
}
}
+ ///
+ /// Attempts to update the embeddings configuration based on the provided options.
+ /// Creates a new EmbeddingsOptions object if the configuration is valid.
+ /// Provider, endpoint, and API key are required when configuring embeddings.
+ ///
+ /// The configuration options provided by the user.
+ /// The existing embeddings options from the runtime configuration.
+ /// The resulting embeddings options if successful.
+ /// True if the embeddings options were successfully configured; otherwise, false.
+ private static bool TryUpdateConfiguredEmbeddingsValues(
+ ConfigureOptions options,
+ EmbeddingsOptions? existingEmbeddingsOptions,
+ out EmbeddingsOptions? updatedEmbeddingsOptions)
+ {
+ updatedEmbeddingsOptions = null;
+
+ try
+ {
+ // Get values from options or fall back to existing configuration
+ EmbeddingProviderType? provider = options.RuntimeEmbeddingsProvider ?? existingEmbeddingsOptions?.Provider;
+ string? endpoint = options.RuntimeEmbeddingsEndpoint ?? existingEmbeddingsOptions?.Endpoint;
+ string? apiKey = options.RuntimeEmbeddingsApiKey ?? existingEmbeddingsOptions?.ApiKey;
+ string? model = options.RuntimeEmbeddingsModel ?? existingEmbeddingsOptions?.Model;
+ string? apiVersion = options.RuntimeEmbeddingsApiVersion ?? existingEmbeddingsOptions?.ApiVersion;
+ int? dimensions = options.RuntimeEmbeddingsDimensions ?? existingEmbeddingsOptions?.Dimensions;
+ int? timeoutMs = options.RuntimeEmbeddingsTimeoutMs ?? existingEmbeddingsOptions?.TimeoutMs;
+
+ // Validate required fields
+ if (provider is null)
+ {
+ _logger.LogError("Failed to configure embeddings: provider is required. Use --runtime.embeddings.provider to specify the provider (azure-openai or openai).");
+ return false;
+ }
+
+ if (string.IsNullOrEmpty(endpoint))
+ {
+ _logger.LogError("Failed to configure embeddings: endpoint is required. Use --runtime.embeddings.endpoint to specify the provider base URL.");
+ return false;
+ }
+
+ if (string.IsNullOrEmpty(apiKey))
+ {
+ _logger.LogError("Failed to configure embeddings: api-key is required. Use --runtime.embeddings.api-key to specify the authentication key.");
+ return false;
+ }
+
+ // Validate Azure OpenAI requires model/deployment name
+ if (provider == EmbeddingProviderType.AzureOpenAI && string.IsNullOrEmpty(model))
+ {
+ _logger.LogError("Failed to configure embeddings: model/deployment name is required for Azure OpenAI provider. Use --runtime.embeddings.model to specify the deployment name.");
+ return false;
+ }
+
+ // Validate dimensions if provided
+ if (dimensions is not null && dimensions <= 0)
+ {
+ _logger.LogError("Failed to configure embeddings: dimensions must be a positive integer.");
+ return false;
+ }
+
+ // Validate timeout if provided
+ if (timeoutMs is not null && timeoutMs <= 0)
+ {
+ _logger.LogError("Failed to configure embeddings: timeout-ms must be a positive integer.");
+ return false;
+ }
+
+ // Create the embeddings options
+ updatedEmbeddingsOptions = new EmbeddingsOptions(
+ Provider: (EmbeddingProviderType)provider,
+ Endpoint: endpoint,
+ ApiKey: apiKey,
+ Model: model,
+ ApiVersion: apiVersion,
+ Dimensions: dimensions,
+ TimeoutMs: timeoutMs);
+
+ _logger.LogInformation("Updated RuntimeConfig with Runtime.Embeddings configuration.");
+ return true;
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError("Failed to update RuntimeConfig.Embeddings with exception message: {exceptionMessage}.", ex.Message);
+ return false;
+ }
+ }
+
///
/// Parse permission string to create PermissionSetting array.
///
diff --git a/src/Service/Startup.cs b/src/Service/Startup.cs
index 333bf57234..563f42d440 100644
--- a/src/Service/Startup.cs
+++ b/src/Service/Startup.cs
@@ -387,6 +387,19 @@ public void ConfigureServices(IServiceCollection services)
services.AddSingleton();
services.AddSingleton();
+ // Register embedding service if configured
+ if (runtimeConfigAvailable
+ && runtimeConfig?.Runtime?.IsEmbeddingsConfigured == true)
+ {
+ EmbeddingsOptions embeddingsOptions = runtimeConfig.Runtime.Embeddings;
+ services.AddHttpClient(client =>
+ {
+ // Base configuration is done in the EmbeddingService constructor
+ }).ConfigurePrimaryHttpMessageHandler(() => new HttpClientHandler());
+
+ services.AddSingleton(embeddingsOptions);
+ }
+
AddGraphQLService(services, runtimeConfig?.Runtime?.GraphQL);
// Subscribe the GraphQL schema refresh method to the specific hot-reload event
From 0653f15e12c31b7cee6978f6d3b29836f7bb9fc4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Feb 2026 21:22:16 +0000
Subject: [PATCH 04/16] Add unit tests for embeddings and update JSON schema
with embeddings configuration
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
---
schemas/dab.draft.schema.json | 81 ++++
.../ObjectModel/EmbeddingProviderType.cs | 2 +-
.../UnitTests/EmbeddingServiceTests.cs | 328 +++++++++++++++++
.../UnitTests/EmbeddingsOptionsTests.cs | 345 ++++++++++++++++++
4 files changed, 755 insertions(+), 1 deletion(-)
create mode 100644 src/Service.Tests/UnitTests/EmbeddingServiceTests.cs
create mode 100644 src/Service.Tests/UnitTests/EmbeddingsOptionsTests.cs
diff --git a/schemas/dab.draft.schema.json b/schemas/dab.draft.schema.json
index 920c0a4da6..cb7d309828 100644
--- a/schemas/dab.draft.schema.json
+++ b/schemas/dab.draft.schema.json
@@ -642,6 +642,87 @@
"default": 4
}
}
+ },
+ "embeddings": {
+ "type": "object",
+ "description": "Configuration for text embedding/vectorization service. Supports OpenAI and Azure OpenAI providers.",
+ "additionalProperties": false,
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "The embedding provider type.",
+ "enum": ["azure-openai", "openai"]
+ },
+ "endpoint": {
+ "type": "string",
+ "description": "The provider base URL endpoint. For Azure OpenAI, use the Azure resource endpoint. For OpenAI, use https://api.openai.com."
+ },
+ "api-key": {
+ "type": "string",
+ "description": "The API key for authentication. Supports environment variable substitution with @env('VAR_NAME')."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model or deployment name. Required for Azure OpenAI (deployment name). For OpenAI, defaults to 'text-embedding-3-small' if not specified."
+ },
+ "api-version": {
+ "type": "string",
+ "description": "Azure API version. Only used for Azure OpenAI provider.",
+ "default": "2024-02-01"
+ },
+ "dimensions": {
+ "type": "integer",
+ "description": "Output vector dimensions. Optional, uses model default if not specified. Useful for Redis schema alignment.",
+ "minimum": 1
+ },
+ "timeout-ms": {
+ "type": "integer",
+ "description": "Request timeout in milliseconds.",
+ "default": 30000,
+ "minimum": 1,
+ "maximum": 300000
+ }
+ },
+ "required": ["provider", "endpoint", "api-key"],
+ "allOf": [
+ {
+ "$comment": "Azure OpenAI requires the model (deployment name) to be specified.",
+ "if": {
+ "properties": {
+ "provider": {
+ "const": "azure-openai"
+ }
+ },
+ "required": ["provider"]
+ },
+ "then": {
+ "required": ["model"],
+ "properties": {
+ "api-version": {
+ "type": "string",
+ "description": "Azure API version. Required for Azure OpenAI provider.",
+ "default": "2024-02-01"
+ }
+ }
+ }
+ },
+ {
+ "$comment": "OpenAI does not require model (defaults to text-embedding-3-small) and does not use api-version.",
+ "if": {
+ "properties": {
+ "provider": {
+ "const": "openai"
+ }
+ },
+ "required": ["provider"]
+ },
+ "then": {
+ "properties": {
+ "api-version": false
+ }
+ }
+ }
+ ]
}
}
},
diff --git a/src/Config/ObjectModel/EmbeddingProviderType.cs b/src/Config/ObjectModel/EmbeddingProviderType.cs
index 0a18d491bb..2ead4470dd 100644
--- a/src/Config/ObjectModel/EmbeddingProviderType.cs
+++ b/src/Config/ObjectModel/EmbeddingProviderType.cs
@@ -21,7 +21,7 @@ public enum EmbeddingProviderType
///
/// OpenAI embedding provider.
+ /// Lowercase "openai" is the serialized value.
///
- [EnumMember(Value = "openai")]
OpenAI
}
diff --git a/src/Service.Tests/UnitTests/EmbeddingServiceTests.cs b/src/Service.Tests/UnitTests/EmbeddingServiceTests.cs
new file mode 100644
index 0000000000..d5f00e494e
--- /dev/null
+++ b/src/Service.Tests/UnitTests/EmbeddingServiceTests.cs
@@ -0,0 +1,328 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+using System;
+using System.Net;
+using System.Net.Http;
+using System.Text;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using Azure.DataApiBuilder.Config.ObjectModel;
+using Azure.DataApiBuilder.Core.Services;
+using Microsoft.Extensions.Logging;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Moq;
+using Moq.Protected;
+
+namespace Azure.DataApiBuilder.Service.Tests.UnitTests;
+
+///
+/// Unit tests for EmbeddingService.
+///
+[TestClass]
+public class EmbeddingServiceTests
+{
+ private Mock> _mockLogger = null!;
+
+ [TestInitialize]
+ public void Setup()
+ {
+ _mockLogger = new Mock>();
+ }
+
+ ///
+ /// Tests that EmbedAsync returns embedding for a single text input.
+ ///
+ [TestMethod]
+ public async Task EmbedAsync_SingleText_ReturnsEmbedding()
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ float[] expectedEmbedding = new[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f };
+ HttpClient httpClient = CreateMockHttpClient(CreateSuccessResponse(expectedEmbedding));
+ EmbeddingService service = new(httpClient, options, _mockLogger.Object);
+
+ // Act
+ float[] result = await service.EmbedAsync("Hello world");
+
+ // Assert
+ Assert.IsNotNull(result);
+ Assert.AreEqual(expectedEmbedding.Length, result.Length);
+ for (int i = 0; i < expectedEmbedding.Length; i++)
+ {
+ Assert.AreEqual(expectedEmbedding[i], result[i]);
+ }
+ }
+
+ ///
+ /// Tests that EmbedBatchAsync returns embeddings for multiple text inputs.
+ ///
+ [TestMethod]
+ public async Task EmbedBatchAsync_MultipleTexts_ReturnsEmbeddings()
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ float[][] expectedEmbeddings = new[]
+ {
+ new[] { 0.1f, 0.2f, 0.3f },
+ new[] { 0.4f, 0.5f, 0.6f },
+ new[] { 0.7f, 0.8f, 0.9f }
+ };
+ HttpClient httpClient = CreateMockHttpClient(CreateBatchSuccessResponse(expectedEmbeddings));
+ EmbeddingService service = new(httpClient, options, _mockLogger.Object);
+
+ // Act
+ float[][] result = await service.EmbedBatchAsync(new[] { "Text 1", "Text 2", "Text 3" });
+
+ // Assert
+ Assert.IsNotNull(result);
+ Assert.AreEqual(expectedEmbeddings.Length, result.Length);
+ for (int i = 0; i < expectedEmbeddings.Length; i++)
+ {
+ Assert.AreEqual(expectedEmbeddings[i].Length, result[i].Length);
+ }
+ }
+
+ ///
+ /// Tests that EmbedAsync throws ArgumentException for null or empty text.
+ ///
+ [DataTestMethod]
+ [DataRow(null, DisplayName = "Null text throws ArgumentException")]
+ [DataRow("", DisplayName = "Empty text throws ArgumentException")]
+ public async Task EmbedAsync_NullOrEmptyText_ThrowsArgumentException(string text)
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ HttpClient httpClient = CreateMockHttpClient(CreateSuccessResponse(new[] { 0.1f }));
+ EmbeddingService service = new(httpClient, options, _mockLogger.Object);
+
+ // Act & Assert
+ await Assert.ThrowsExceptionAsync(() => service.EmbedAsync(text!));
+ }
+
+ ///
+ /// Tests that EmbedBatchAsync throws ArgumentException for null or empty texts array.
+ ///
+ [TestMethod]
+ public async Task EmbedBatchAsync_EmptyTexts_ThrowsArgumentException()
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ HttpClient httpClient = CreateMockHttpClient(CreateSuccessResponse(new[] { 0.1f }));
+ EmbeddingService service = new(httpClient, options, _mockLogger.Object);
+
+ // Act & Assert
+ await Assert.ThrowsExceptionAsync(() => service.EmbedBatchAsync(Array.Empty()));
+ }
+
+ ///
+ /// Tests that HttpRequestException is thrown when API returns an error.
+ ///
+ [TestMethod]
+ public async Task EmbedAsync_ApiError_ThrowsHttpRequestException()
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ HttpClient httpClient = CreateMockHttpClient(CreateErrorResponse(HttpStatusCode.Unauthorized, "Invalid API key"));
+ EmbeddingService service = new(httpClient, options, _mockLogger.Object);
+
+ // Act & Assert
+ await Assert.ThrowsExceptionAsync(() => service.EmbedAsync("Test text"));
+ }
+
+ ///
+ /// Tests that InvalidOperationException is thrown when API returns empty data.
+ ///
+ [TestMethod]
+ public async Task EmbedAsync_EmptyResponse_ThrowsInvalidOperationException()
+ {
+ // Arrange
+ EmbeddingsOptions options = CreateAzureOpenAIOptions();
+ string emptyResponse = JsonSerializer.Serialize(new { data = Array.Empty