From dbc3d0f670994de8c2a248a582795f49d4b37a3a Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 07:26:16 -0700 Subject: [PATCH 01/10] fix: address code review issues in select-algorithm samples Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/Models/Configuration.cs | 2 +- ai/select-algorithm-go/src/utils.go | 2 +- .../com/azure/documentdb/selectalgorithm/CompareAll.java | 9 ++++----- .../java/com/azure/documentdb/selectalgorithm/Utils.java | 9 ++++----- ai/select-algorithm-python/src/utils.py | 2 +- ai/select-algorithm-typescript/src/utils.ts | 9 +++++++-- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/ai/select-algorithm-dotnet/Models/Configuration.cs b/ai/select-algorithm-dotnet/Models/Configuration.cs index cbca25b..9f241a8 100644 --- a/ai/select-algorithm-dotnet/Models/Configuration.cs +++ b/ai/select-algorithm-dotnet/Models/Configuration.cs @@ -37,5 +37,5 @@ public class VectorSearchConfiguration public class DataFilesConfiguration { - public string WithVectors { get; set; } = "data/Hotels_Vector.json"; + public string WithVectors { get; set; } = "../data/Hotels_Vector.json"; } diff --git a/ai/select-algorithm-go/src/utils.go b/ai/select-algorithm-go/src/utils.go index c358892..c6042b6 100644 --- a/ai/select-algorithm-go/src/utils.go +++ b/ai/select-algorithm-go/src/utils.go @@ -60,7 +60,7 @@ func LoadConfig() (*Config, error) { return &Config{ ClusterName: getEnvOrDefault("DOCUMENTDB_CLUSTER_NAME", ""), DatabaseName: getEnvOrDefault("AZURE_DOCUMENTDB_DATABASENAME", "Hotels"), - DataFile: getEnvOrDefault("DATA_FILE_WITH_VECTORS", "data/Hotels_Vector.json"), + DataFile: getEnvOrDefault("DATA_FILE_WITH_VECTORS", "../data/Hotels_Vector.json"), VectorField: getEnvOrDefault("EMBEDDED_FIELD", "DescriptionVector"), ModelName: getEnvOrDefault("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"), Dimensions: dimensions, diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java index 66281ed..e41fd46 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java @@ -28,7 +28,7 @@ public static void run() { int topK = Integer.parseInt(Utils.getEnv("TOP_K", "5")); String databaseName = Utils.getEnv("AZURE_DOCUMENTDB_DATABASENAME", "Hotels"); - String dataFile = Utils.getEnv("DATA_FILE_WITH_VECTORS", "data/Hotels_Vector.json"); + String dataFile = Utils.getEnv("DATA_FILE_WITH_VECTORS", "../data/Hotels_Vector.json"); String vectorField = Utils.getEnv("EMBEDDED_FIELD", "DescriptionVector"); int dimensions = Integer.parseInt(Utils.getEnv("EMBEDDING_DIMENSIONS", "1536")); String model = Utils.getEnv("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"); @@ -113,12 +113,11 @@ public static void run() { int successCount = (int) results.stream().filter(r -> !r.top1Name().equals("(failed)")).count(); if (successCount == 0) { - System.out.println("\n❌ All 9 comparisons failed — no algorithm returned results."); - System.exit(1); - } else { - System.out.printf("%nSummary: %d succeeded, %d failed%n", successCount, 9 - successCount); + throw new IllegalStateException("All 9 comparisons failed — no algorithm returned results."); } + System.out.printf("%nSummary: %d succeeded, %d failed%n", successCount, 9 - successCount); + // Cleanup: drop the comparison collection System.out.println("\n Cleanup: dropping comparison collection..."); collection.drop(); diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java index b8b761e..30dca39 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java @@ -7,7 +7,6 @@ import com.azure.core.credential.AccessToken; import com.azure.identity.DefaultAzureCredential; import com.azure.identity.DefaultAzureCredentialBuilder; -import com.mongodb.ConnectionString; import com.mongodb.MongoClientSettings; import com.mongodb.MongoCredential; import com.mongodb.client.MongoClient; @@ -39,8 +38,7 @@ public static MongoClient getMongoClient() { throw new IllegalStateException("DOCUMENTDB_CLUSTER_NAME environment variable is required"); } - String connectionUri = String.format( - "mongodb+srv://%s.global.mongocluster.cosmos.azure.com/", clusterName); + String clusterHost = String.format("%s.global.mongocluster.cosmos.azure.com", clusterName); // Use custom OIDC callback with DefaultAzureCredential // This chains through CLI, managed identity, etc. @@ -56,9 +54,10 @@ public static MongoClient getMongoClient() { }); MongoClientSettings settings = MongoClientSettings.builder() - .applyConnectionString(new ConnectionString(connectionUri)) + .applyToClusterSettings(builder -> builder.srvHost(clusterHost)) + .applyToSslSettings(builder -> builder.enabled(true)) .credential(mongoCredential) - .retryWrites(false) + .retryWrites(true) .build(); return MongoClients.create(settings); diff --git a/ai/select-algorithm-python/src/utils.py b/ai/select-algorithm-python/src/utils.py index de0f8b4..e97ea9a 100644 --- a/ai/select-algorithm-python/src/utils.py +++ b/ai/select-algorithm-python/src/utils.py @@ -64,7 +64,7 @@ def get_config() -> Dict[str, Any]: """Load configuration from environment variables.""" return { 'database_name': os.getenv('AZURE_DOCUMENTDB_DATABASENAME', 'Hotels'), - 'data_file': os.getenv('DATA_FILE_WITH_VECTORS', 'data/Hotels_Vector.json'), + 'data_file': os.getenv('DATA_FILE_WITH_VECTORS', '../data/Hotels_Vector.json'), 'vector_field': os.getenv('EMBEDDED_FIELD', 'DescriptionVector'), 'model_name': os.getenv('AZURE_OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'), 'dimensions': int(os.getenv('EMBEDDING_DIMENSIONS', '1536')), diff --git a/ai/select-algorithm-typescript/src/utils.ts b/ai/select-algorithm-typescript/src/utils.ts index 09ec3dd..5f99337 100644 --- a/ai/select-algorithm-typescript/src/utils.ts +++ b/ai/select-algorithm-typescript/src/utils.ts @@ -20,9 +20,14 @@ export function getConfig() { export const AzureIdentityTokenCallback = async (params: OIDCCallbackParams, credential: TokenCredential): Promise => { const tokenResponse: AccessToken | null = await credential.getToken(['https://ossrdbms-aad.database.windows.net/.default']); + + if (!tokenResponse || !tokenResponse.token) { + throw new Error('Failed to acquire token'); + } + return { - accessToken: tokenResponse?.token || '', - expiresInSeconds: Math.floor(((tokenResponse?.expiresOnTimestamp || 0) - Date.now()) / 1000) + accessToken: tokenResponse.token, + expiresInSeconds: Math.floor((tokenResponse.expiresOnTimestamp - Date.now()) / 1000) }; }; From 8a2796cb5560e6979e595db4bd627d4826de6c06 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 07:49:28 -0700 Subject: [PATCH 02/10] Fix data paths (local data/) and Java retryWrites(false) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/Models/Configuration.cs | 2 +- ai/select-algorithm-dotnet/README.md | 6 +++--- ai/select-algorithm-dotnet/appsettings.json | 2 +- ai/select-algorithm-go/README.md | 8 ++++---- ai/select-algorithm-go/src/utils.go | 2 +- ai/select-algorithm-java/README.md | 4 ++-- .../com/azure/documentdb/selectalgorithm/CompareAll.java | 2 +- .../java/com/azure/documentdb/selectalgorithm/Utils.java | 2 +- ai/select-algorithm-python/README.md | 4 ++-- ai/select-algorithm-python/src/utils.py | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ai/select-algorithm-dotnet/Models/Configuration.cs b/ai/select-algorithm-dotnet/Models/Configuration.cs index 9f241a8..cbca25b 100644 --- a/ai/select-algorithm-dotnet/Models/Configuration.cs +++ b/ai/select-algorithm-dotnet/Models/Configuration.cs @@ -37,5 +37,5 @@ public class VectorSearchConfiguration public class DataFilesConfiguration { - public string WithVectors { get; set; } = "../data/Hotels_Vector.json"; + public string WithVectors { get; set; } = "data/Hotels_Vector.json"; } diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index 2621f77..fd30957 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -46,7 +46,7 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: "EmbeddingSizeBatch": 16 }, "DataFiles": { - "WithVectors": "../data/Hotels_Vector.json" + "WithVectors": "data/Hotels_Vector.json" } } ``` @@ -54,7 +54,7 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: 3. Copy the shared data file: ```bash - cp ../data/Hotels_Vector.json . + cp data/Hotels_Vector.json . ``` 4. Restore packages: @@ -78,7 +78,7 @@ dotnet run | `MongoDB:ClusterName` | (required) | DocumentDB cluster name | | `AzureOpenAI:Endpoint` | (required) | Azure OpenAI endpoint | | `AzureOpenAI:EmbeddingModel` | (required) | Embedding model deployment name | -| `DataFiles:WithVectors` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `DataFiles:WithVectors` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `Embedding:EmbeddedField` | `DescriptionVector` | Field name containing embeddings | | `Embedding:Dimensions` | `1536` | Vector dimensions | | `MongoDB:DatabaseName` | `Hotels` | Target database name | diff --git a/ai/select-algorithm-dotnet/appsettings.json b/ai/select-algorithm-dotnet/appsettings.json index 68ee696..3b38afb 100644 --- a/ai/select-algorithm-dotnet/appsettings.json +++ b/ai/select-algorithm-dotnet/appsettings.json @@ -14,7 +14,7 @@ "EmbeddingModel": "text-embedding-3-small" }, "DataFiles": { - "WithVectors": "../data/Hotels_Vector.json" + "WithVectors": "data/Hotels_Vector.json" }, "Embedding": { "EmbeddedField": "DescriptionVector", diff --git a/ai/select-algorithm-go/README.md b/ai/select-algorithm-go/README.md index f03828e..de14f79 100644 --- a/ai/select-algorithm-go/README.md +++ b/ai/select-algorithm-go/README.md @@ -38,7 +38,7 @@ This sample demonstrates how to compare different vector search algorithms (IVF, AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small AZURE_DOCUMENTDB_DATABASENAME=Hotels - DATA_FILE_WITH_VECTORS=../data/Hotels_Vector.json + DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json EMBEDDED_FIELD=DescriptionVector EMBEDDING_DIMENSIONS=1536 ``` @@ -46,10 +46,10 @@ This sample demonstrates how to compare different vector search algorithms (IVF, 3. **Copy the shared data file** into this directory: ```bash - cp ../data/Hotels_Vector.json . + cp data/Hotels_Vector.json . ``` - The `DATA_FILE_WITH_VECTORS` env var defaults to `../data/Hotels_Vector.json`. + The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. 4. **Install dependencies**: @@ -116,7 +116,7 @@ go run ./src/... | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | *(required)* | Azure OpenAI endpoint | | `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model name | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Database name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to data file | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to data file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Embedding vector dimensions | | `LOAD_SIZE_BATCH` | `100` | Batch size for data insertion | diff --git a/ai/select-algorithm-go/src/utils.go b/ai/select-algorithm-go/src/utils.go index c6042b6..c358892 100644 --- a/ai/select-algorithm-go/src/utils.go +++ b/ai/select-algorithm-go/src/utils.go @@ -60,7 +60,7 @@ func LoadConfig() (*Config, error) { return &Config{ ClusterName: getEnvOrDefault("DOCUMENTDB_CLUSTER_NAME", ""), DatabaseName: getEnvOrDefault("AZURE_DOCUMENTDB_DATABASENAME", "Hotels"), - DataFile: getEnvOrDefault("DATA_FILE_WITH_VECTORS", "../data/Hotels_Vector.json"), + DataFile: getEnvOrDefault("DATA_FILE_WITH_VECTORS", "data/Hotels_Vector.json"), VectorField: getEnvOrDefault("EMBEDDED_FIELD", "DescriptionVector"), ModelName: getEnvOrDefault("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"), Dimensions: dimensions, diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index 2449f40..119efb0 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -37,7 +37,7 @@ This sample demonstrates how to compare all three vector search index algorithms 3. Copy the shared data file: ```bash - cp ../data/Hotels_Vector.json . + cp data/Hotels_Vector.json . ``` ## Build @@ -81,7 +81,7 @@ $env:ALGORITHM="compare"; mvn exec:java | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | | `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java index e41fd46..2d22e64 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java @@ -28,7 +28,7 @@ public static void run() { int topK = Integer.parseInt(Utils.getEnv("TOP_K", "5")); String databaseName = Utils.getEnv("AZURE_DOCUMENTDB_DATABASENAME", "Hotels"); - String dataFile = Utils.getEnv("DATA_FILE_WITH_VECTORS", "../data/Hotels_Vector.json"); + String dataFile = Utils.getEnv("DATA_FILE_WITH_VECTORS", "data/Hotels_Vector.json"); String vectorField = Utils.getEnv("EMBEDDED_FIELD", "DescriptionVector"); int dimensions = Integer.parseInt(Utils.getEnv("EMBEDDING_DIMENSIONS", "1536")); String model = Utils.getEnv("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"); diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java index 30dca39..641dcb5 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java @@ -57,7 +57,7 @@ public static MongoClient getMongoClient() { .applyToClusterSettings(builder -> builder.srvHost(clusterHost)) .applyToSslSettings(builder -> builder.enabled(true)) .credential(mongoCredential) - .retryWrites(true) + .retryWrites(false) .build(); return MongoClients.create(settings); diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index 1fe7746..becef58 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -57,7 +57,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each 3. Copy the shared data file: ```bash - cp ../data/Hotels_Vector.json . + cp data/Hotels_Vector.json . ``` 4. Ensure you're logged in to Azure: @@ -83,7 +83,7 @@ The script creates a single `hotels` collection, loads data once, then for each | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | | `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | diff --git a/ai/select-algorithm-python/src/utils.py b/ai/select-algorithm-python/src/utils.py index e97ea9a..de0f8b4 100644 --- a/ai/select-algorithm-python/src/utils.py +++ b/ai/select-algorithm-python/src/utils.py @@ -64,7 +64,7 @@ def get_config() -> Dict[str, Any]: """Load configuration from environment variables.""" return { 'database_name': os.getenv('AZURE_DOCUMENTDB_DATABASENAME', 'Hotels'), - 'data_file': os.getenv('DATA_FILE_WITH_VECTORS', '../data/Hotels_Vector.json'), + 'data_file': os.getenv('DATA_FILE_WITH_VECTORS', 'data/Hotels_Vector.json'), 'vector_field': os.getenv('EMBEDDED_FIELD', 'DescriptionVector'), 'model_name': os.getenv('AZURE_OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'), 'dimensions': int(os.getenv('EMBEDDING_DIMENSIONS', '1536')), From 3167d632572e81294edea848fec4466c85cc7930 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:03:47 -0700 Subject: [PATCH 03/10] Fix TS README: data path is local data/ not ../data/ --- ai/select-algorithm-typescript/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md index 73414a8..254eee3 100644 --- a/ai/select-algorithm-typescript/README.md +++ b/ai/select-algorithm-typescript/README.md @@ -52,13 +52,14 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using | `LOAD_SIZE_BATCH` | Batch size for data insertion | | `SIMILARITY` | Similarity metric: `COS`, `L2`, or `IP` | -5. **Copy the shared data file** into this directory: +5. **Create a `data/` directory and copy the data file:** ```bash - cp ../data/Hotels_Vector.json . + mkdir -p data + cp ../data/Hotels_Vector.json data/ ``` - The `DATA_FILE_WITH_VECTORS` env var defaults to `../data/Hotels_Vector.json`. + The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. 6. **Build the project:** From 63da6f3f1cbb73867397f6f5ef508d0aa9ff31fb Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:05:10 -0700 Subject: [PATCH 04/10] Rename .NET config section MongoDB -> DocumentDB Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/CompareAll.cs | 4 ++-- ai/select-algorithm-dotnet/Models/Configuration.cs | 4 ++-- ai/select-algorithm-dotnet/README.md | 8 ++++---- ai/select-algorithm-dotnet/Utils.cs | 4 ++-- ai/select-algorithm-dotnet/appsettings.json | 2 +- ai/select-algorithm-dotnet/quickstart.md | 10 +++++----- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ai/select-algorithm-dotnet/CompareAll.cs b/ai/select-algorithm-dotnet/CompareAll.cs index 9eb9c75..2cf3d10 100644 --- a/ai/select-algorithm-dotnet/CompareAll.cs +++ b/ai/select-algorithm-dotnet/CompareAll.cs @@ -30,11 +30,11 @@ public static void Run(AppConfiguration appConfig) Console.WriteLine(new string('=', 60)); // Use config values with env var overrides for compare-specific settings - var databaseName = appConfig.MongoDB.DatabaseName; + var databaseName = appConfig.DocumentDB.DatabaseName; var dataFile = appConfig.DataFiles.WithVectors; var vectorField = appConfig.Embedding.EmbeddedField; var dimensions = appConfig.Embedding.Dimensions; - var batchSize = appConfig.MongoDB.LoadBatchSize; + var batchSize = appConfig.DocumentDB.LoadBatchSize; var queryText = Environment.GetEnvironmentVariable("QUERY_TEXT") ?? "luxury hotel near the beach"; var topK = int.Parse(Environment.GetEnvironmentVariable("TOP_K") ?? "5"); diff --git a/ai/select-algorithm-dotnet/Models/Configuration.cs b/ai/select-algorithm-dotnet/Models/Configuration.cs index cbca25b..3b4cb09 100644 --- a/ai/select-algorithm-dotnet/Models/Configuration.cs +++ b/ai/select-algorithm-dotnet/Models/Configuration.cs @@ -3,7 +3,7 @@ namespace SelectAlgorithm.Models; public class AppConfiguration { public AzureOpenAIConfiguration AzureOpenAI { get; set; } = new(); - public MongoDBConfiguration MongoDB { get; set; } = new(); + public DocumentDBConfiguration DocumentDB { get; set; } = new(); public EmbeddingConfiguration Embedding { get; set; } = new(); public VectorSearchConfiguration VectorSearch { get; set; } = new(); public DataFilesConfiguration DataFiles { get; set; } = new(); @@ -15,7 +15,7 @@ public class AzureOpenAIConfiguration public string EmbeddingModel { get; set; } = "text-embedding-3-small"; } -public class MongoDBConfiguration +public class DocumentDBConfiguration { public string ClusterName { get; set; } = string.Empty; public string DatabaseName { get; set; } = "Hotels"; diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index fd30957..32e25cf 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -35,7 +35,7 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: "EmbeddingModel": "text-embedding-3-small", "Endpoint": "https://.openai.azure.com" }, - "MongoDB": { + "DocumentDB": { "ClusterName": "", "DatabaseName": "Hotels", "LoadBatchSize": 100 @@ -75,14 +75,14 @@ dotnet run | Setting (appsettings.json) | Default | Description | |---------------------------|---------|-------------| -| `MongoDB:ClusterName` | (required) | DocumentDB cluster name | +| `DocumentDB:ClusterName` | (required) | DocumentDB cluster name | | `AzureOpenAI:Endpoint` | (required) | Azure OpenAI endpoint | | `AzureOpenAI:EmbeddingModel` | (required) | Embedding model deployment name | | `DataFiles:WithVectors` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `Embedding:EmbeddedField` | `DescriptionVector` | Field name containing embeddings | | `Embedding:Dimensions` | `1536` | Vector dimensions | -| `MongoDB:DatabaseName` | `Hotels` | Target database name | -| `MongoDB:LoadBatchSize` | `100` | Batch size for data loading | +| `DocumentDB:DatabaseName` | `Hotels` | Target database name | +| `DocumentDB:LoadBatchSize` | `100` | Batch size for data loading | | `Embedding:EmbeddingSizeBatch` | `16` | Batch size for embedding requests | **Additional environment variables for compare mode:** diff --git a/ai/select-algorithm-dotnet/Utils.cs b/ai/select-algorithm-dotnet/Utils.cs index 62590ad..01d97a2 100644 --- a/ai/select-algorithm-dotnet/Utils.cs +++ b/ai/select-algorithm-dotnet/Utils.cs @@ -34,9 +34,9 @@ public static class Utils { public static IMongoClient GetMongoClientPasswordless(AppConfiguration config) { - var clusterName = config.MongoDB.ClusterName; + var clusterName = config.DocumentDB.ClusterName; if (string.IsNullOrEmpty(clusterName)) - throw new InvalidOperationException("MongoDB:ClusterName is required in appsettings.json"); + throw new InvalidOperationException("DocumentDB:ClusterName is required in appsettings.json"); var credential = new DefaultAzureCredential(); diff --git a/ai/select-algorithm-dotnet/appsettings.json b/ai/select-algorithm-dotnet/appsettings.json index 3b38afb..4e62a99 100644 --- a/ai/select-algorithm-dotnet/appsettings.json +++ b/ai/select-algorithm-dotnet/appsettings.json @@ -1,5 +1,5 @@ { - "MongoDB": { + "DocumentDB": { "DatabaseName": "Hotels", "ClusterName": "", "LoadBatchSize": 50 diff --git a/ai/select-algorithm-dotnet/quickstart.md b/ai/select-algorithm-dotnet/quickstart.md index e94c2da..140a83d 100644 --- a/ai/select-algorithm-dotnet/quickstart.md +++ b/ai/select-algorithm-dotnet/quickstart.md @@ -149,7 +149,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ ```bash export AzureOpenAI__Endpoint="https://.openai.azure.com" export AzureOpenAI__EmbeddingModel="text-embedding-3-small" - export MongoDB__ClusterName="" + export DocumentDB__ClusterName="" export DataFiles__WithVectors="data/Hotels_Vector.json" export AZURE_TENANT_ID="" ``` @@ -159,7 +159,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ ```powershell $env:AzureOpenAI__Endpoint="https://.openai.azure.com" $env:AzureOpenAI__EmbeddingModel="text-embedding-3-small" - $env:MongoDB__ClusterName="" + $env:DocumentDB__ClusterName="" $env:DataFiles__WithVectors="data/Hotels_Vector.json" $env:AZURE_TENANT_ID="" ``` @@ -171,7 +171,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ - ``: Your Azure DocumentDB cluster name - ``: Your Microsoft Entra tenant ID - These environment variables override the matching values in `appsettings.json`. For example, `MongoDB__ClusterName` overrides `MongoDB:ClusterName` and `AzureOpenAI__Endpoint` overrides `AzureOpenAI:Endpoint`. + These environment variables override the matching values in `appsettings.json`. For example, `DocumentDB__ClusterName` overrides `DocumentDB:ClusterName` and `AzureOpenAI__Endpoint` overrides `AzureOpenAI:Endpoint`. You should always prefer passwordless authentication. For more information on setting up managed identity and the full range of your authentication options, see [Authenticate .NET apps to Azure services by using the Azure SDK for .NET](/dotnet/azure/sdk/authentication). @@ -205,7 +205,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ "Endpoint": "https://.openai.azure.com", "EmbeddingModel": "text-embedding-3-small" }, - "MongoDB": { + "DocumentDB": { "ClusterName": "", "DatabaseName": "Hotels", "LoadBatchSize": 100 @@ -226,7 +226,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ } ``` - You can keep placeholder values in `appsettings.json` and override them at runtime with environment variables such as `AzureOpenAI__Endpoint` and `MongoDB__ClusterName`. + You can keep placeholder values in `appsettings.json` and override them at runtime with environment variables such as `AzureOpenAI__Endpoint` and `DocumentDB__ClusterName`. ## Create code files From 87cbef42378367b19b77487b853fe9c3a375c18d Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:13:42 -0700 Subject: [PATCH 05/10] Fix READMEs: data paths, connection string language, env var instructions - All 5 READMEs: fix data copy to 'mkdir -p data && cp ../data/Hotels_Vector.json data/' - Python, Java: replace 'connection strings' with 'configuration values' - TypeScript: replace .env/.env.example instructions with shell export pattern, annotate SIMILARITY as unused in compare-all mode, fix step numbering (1-5) - .NET: add note that azd is optional, users can edit appsettings.json directly - appsettings.json: LoadBatchSize 50 -> 100 to match all other languages Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/README.md | 8 +++---- ai/select-algorithm-dotnet/appsettings.json | 2 +- ai/select-algorithm-go/README.md | 4 ++-- ai/select-algorithm-java/README.md | 6 ++--- ai/select-algorithm-python/README.md | 6 ++--- ai/select-algorithm-typescript/README.md | 26 ++++++++++----------- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index 32e25cf..abde5f6 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -19,13 +19,13 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: 1. **Configure environment:** - The .NET sample uses `appsettings.json` for configuration. After deploying with `azd up`, you can export values: + The .NET sample uses `appsettings.json` for configuration. You can set values directly in `appsettings.json`, or if you deployed with `azd up`, export your provisioned values first: ```bash azd env get-values ``` - Then update `appsettings.json` with your Azure resource values. + > **Note:** `azd` is optional. You can skip it and edit `appsettings.json` manually with your Azure resource values. 2. Edit `appsettings.json` with your configuration: @@ -51,10 +51,10 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: } ``` -3. Copy the shared data file: +3. Copy the shared data file into the local `data/` directory: ```bash - cp data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` 4. Restore packages: diff --git a/ai/select-algorithm-dotnet/appsettings.json b/ai/select-algorithm-dotnet/appsettings.json index 4e62a99..472d7ff 100644 --- a/ai/select-algorithm-dotnet/appsettings.json +++ b/ai/select-algorithm-dotnet/appsettings.json @@ -2,7 +2,7 @@ "DocumentDB": { "DatabaseName": "Hotels", "ClusterName": "", - "LoadBatchSize": 50 + "LoadBatchSize": 100 }, "VectorSearch": { "Similarity": "", diff --git a/ai/select-algorithm-go/README.md b/ai/select-algorithm-go/README.md index de14f79..452cde8 100644 --- a/ai/select-algorithm-go/README.md +++ b/ai/select-algorithm-go/README.md @@ -43,10 +43,10 @@ This sample demonstrates how to compare different vector search algorithms (IVF, EMBEDDING_DIMENSIONS=1536 ``` -3. **Copy the shared data file** into this directory: +3. **Copy the shared data file** into the local `data/` directory: ```bash - cp data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index 119efb0..b73f613 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -20,7 +20,7 @@ This sample demonstrates how to compare all three vector search index algorithms azd env get-values > .env ``` - This creates a `.env` file at the repository root with the connection strings and endpoints needed to run the sample. + This creates a `.env` file at the repository root with the configuration values and endpoints needed to run the sample. Alternatively, copy the example and fill in values manually: @@ -34,10 +34,10 @@ This sample demonstrates how to compare all three vector search index algorithms - `AZURE_OPENAI_EMBEDDING_MODEL` — deployment name (e.g., `text-embedding-3-small`) - `DATA_FILE_WITH_VECTORS` — path to the pre-computed vectors JSON file -3. Copy the shared data file: +3. Copy the shared data file into the local `data/` directory: ```bash - cp data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` ## Build diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index becef58..7c9ba5c 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -40,7 +40,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each azd env get-values > .env ``` - This creates a `.env` file at the repository root with the connection strings and endpoints needed to run the sample. + This creates a `.env` file at the repository root with the configuration values and endpoints needed to run the sample. Alternatively, copy the example and fill in values manually: @@ -54,10 +54,10 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each pip install -r ../requirements.txt ``` -3. Copy the shared data file: +3. Copy the shared data file into the local `data/` directory: ```bash - cp data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` 4. Ensure you're logged in to Azure: diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md index 254eee3..094bd4d 100644 --- a/ai/select-algorithm-typescript/README.md +++ b/ai/select-algorithm-typescript/README.md @@ -25,18 +25,18 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using 3. **Configure environment variables:** - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + This sample reads configuration from environment variables. Export them in your shell before running: ```bash - azd env get-values > .env - ``` - - This creates a `.env` file in the project folder with the connection strings and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_OPENAI_EMBEDDING_API_VERSION=2023-05-15 + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 + export LOAD_SIZE_BATCH=100 ``` | Variable | Description | @@ -50,9 +50,9 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using | `EMBEDDED_FIELD` | Field name containing the vector (default: `DescriptionVector`) | | `EMBEDDING_DIMENSIONS` | Vector dimensions (default: `1536`) | | `LOAD_SIZE_BATCH` | Batch size for data insertion | - | `SIMILARITY` | Similarity metric: `COS`, `L2`, or `IP` | + | `SIMILARITY` | Not used in compare-all mode — all 3 similarity metrics (COS, L2, IP) are tested automatically | -5. **Create a `data/` directory and copy the data file:** +4. **Create a `data/` directory and copy the data file:** ```bash mkdir -p data @@ -61,7 +61,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. -6. **Build the project:** +5. **Build the project:** ```bash npm run build From feba4f3b5b4df91fb287a760341c283fb1f4dae2 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:31:48 -0700 Subject: [PATCH 06/10] Fix READMEs: replace .env with shell exports, remove env:init script Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-go/README.md | 28 ++++++--------------- ai/select-algorithm-java/README.md | 26 +++++++------------ ai/select-algorithm-python/README.md | 18 ++++++------- ai/select-algorithm-typescript/package.json | 1 - 4 files changed, 25 insertions(+), 48 deletions(-) diff --git a/ai/select-algorithm-go/README.md b/ai/select-algorithm-go/README.md index 452cde8..18d6f86 100644 --- a/ai/select-algorithm-go/README.md +++ b/ai/select-algorithm-go/README.md @@ -20,27 +20,16 @@ This sample demonstrates how to compare different vector search algorithms (IVF, 2. **Configure environment variables:** - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set these required environment variables in your shell: ```bash - azd env get-values > .env - ``` - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env - ``` - - Required variables: - ```env - DOCUMENTDB_CLUSTER_NAME=your-cluster-name - AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com - AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small - AZURE_DOCUMENTDB_DATABASENAME=Hotels - DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json - EMBEDDED_FIELD=DescriptionVector - EMBEDDING_DIMENSIONS=1536 + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` 3. **Copy the shared data file** into the local `data/` directory: @@ -147,7 +136,6 @@ go run ./src/... ``` select-algorithm-go/ -├── .env.example # Environment variable template ├── go.mod # Go module dependencies ├── go.sum # Go module checksums ├── output/ # Sample output files diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index b73f613..e9daf45 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -14,27 +14,19 @@ This sample demonstrates how to compare all three vector search index algorithms 1. ### Configure environment variables - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set the required environment variables in your terminal: ```bash - azd env get-values > .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` - This creates a `.env` file at the repository root with the configuration values and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env - ``` - -2. Update `.env` with your Azure resource details (if not using `azd`): - - `DOCUMENTDB_CLUSTER_NAME` — your DocumentDB cluster name - - `AZURE_OPENAI_EMBEDDING_ENDPOINT` — your Azure OpenAI endpoint - - `AZURE_OPENAI_EMBEDDING_MODEL` — deployment name (e.g., `text-embedding-3-small`) - - `DATA_FILE_WITH_VECTORS` — path to the pre-computed vectors JSON file - -3. Copy the shared data file into the local `data/` directory: +2. Copy the shared data file into the local `data/` directory: ```bash mkdir -p data && cp ../data/Hotels_Vector.json data/ diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index 7c9ba5c..fc01802 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -34,18 +34,16 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each 1. ### Configure environment variables - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set the required environment variables in your terminal: ```bash - azd env get-values > .env - ``` - - This creates a `.env` file at the repository root with the configuration values and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` 2. Install dependencies: diff --git a/ai/select-algorithm-typescript/package.json b/ai/select-algorithm-typescript/package.json index 5c1f24a..8a9d155 100644 --- a/ai/select-algorithm-typescript/package.json +++ b/ai/select-algorithm-typescript/package.json @@ -4,7 +4,6 @@ "description": "Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB", "type": "module", "scripts": { - "env:init": "azd env get-values > .env", "build": "tsc", "start": "node dist/compare-all.js" }, From 3fbb310792f1ddc1ab83aaee5727d579e8a0b203 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:42:10 -0700 Subject: [PATCH 07/10] Fix .NET README env var docs, remove misleading algorithm env knobs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/README.md | 17 ++++++++++++++++- ai/select-algorithm-java/README.md | 12 +++--------- ai/select-algorithm-python/README.md | 4 ++-- ai/select-algorithm-typescript/README.md | 5 +++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index abde5f6..01d3345 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -51,13 +51,28 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: } ``` + > **Note:** .NET configuration also supports environment variable overrides. Use double-underscore (`__`) as the hierarchy separator: + > ```bash + > export DocumentDB__ClusterName=your-cluster-name + > export DocumentDB__DatabaseName=Hotels + > export AzureOpenAI__Endpoint=https://your-resource.openai.azure.com + > export AzureOpenAI__EmbeddingModel=text-embedding-3-small + > ``` + > Environment variables take precedence over `appsettings.json` values. + 3. Copy the shared data file into the local `data/` directory: ```bash mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` -4. Restore packages: +4. Sign in to Azure for passwordless authentication: + + ```bash + az login + ``` + +5. Restore packages: ```bash dotnet restore diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index e9daf45..85361f7 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -46,16 +46,10 @@ Compare all 9 algorithm × similarity combinations: mvn exec:java -Pcompare ``` -Or via the `ALGORITHM` environment variable: - -```bash -ALGORITHM=compare mvn exec:java -``` - On Windows (PowerShell): ```powershell -$env:ALGORITHM="compare"; mvn exec:java +mvn exec:java -Pcompare ``` ## Algorithms @@ -79,12 +73,12 @@ $env:ALGORITHM="compare"; mvn exec:java | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | | `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | | `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | -| `ALGORITHM` | (empty = all) | Which algorithm to run | -| `SIMILARITY` | (empty = all) | Similarity metric: `COS`, `L2`, `IP` | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | | `VERBOSE` | `false` | Print detailed per-index results | +`CompareAll.java` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. + ## Authentication This sample uses **passwordless authentication** via `DefaultAzureCredential`: diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index fc01802..e867fbc 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -87,8 +87,8 @@ The script creates a single `hotels` collection, loads data once, then for each | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | | `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | | `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | -| `ALGORITHM` | (empty = all) | Which algorithm to run | -| `SIMILARITY` | (empty = all) | Similarity metric: `COS`, `L2`, `IP` | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | | `VERBOSE` | `false` | Show all k results per combo | + +`compare_all.py` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md index 094bd4d..302df29 100644 --- a/ai/select-algorithm-typescript/README.md +++ b/ai/select-algorithm-typescript/README.md @@ -50,9 +50,10 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using | `EMBEDDED_FIELD` | Field name containing the vector (default: `DescriptionVector`) | | `EMBEDDING_DIMENSIONS` | Vector dimensions (default: `1536`) | | `LOAD_SIZE_BATCH` | Batch size for data insertion | - | `SIMILARITY` | Not used in compare-all mode — all 3 similarity metrics (COS, L2, IP) are tested automatically | -4. **Create a `data/` directory and copy the data file:** + `compare-all.ts` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. + + 4. **Create a `data/` directory and copy the data file:** ```bash mkdir -p data From 912b511afb77078f0c1d76754b9e8a0c46217b65 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 08:58:47 -0700 Subject: [PATCH 08/10] Fix README phantom config docs, align .NET dimensions with appsettings Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-dotnet/CompareAll.cs | 10 +++++----- ai/select-algorithm-dotnet/README.md | 7 ++----- ai/select-algorithm-dotnet/appsettings.json | 3 +-- ai/select-algorithm-java/README.md | 5 +---- ai/select-algorithm-python/README.md | 5 ++--- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/ai/select-algorithm-dotnet/CompareAll.cs b/ai/select-algorithm-dotnet/CompareAll.cs index 2cf3d10..562d8c1 100644 --- a/ai/select-algorithm-dotnet/CompareAll.cs +++ b/ai/select-algorithm-dotnet/CompareAll.cs @@ -65,7 +65,7 @@ public static void Run(AppConfiguration appConfig) Console.WriteLine("Embedding generated (reused for all searches)\n"); // Define 9 index configurations - var configs = BuildIndexConfigs(dimensions); + var configs = BuildIndexConfigs(); // Run each config sequentially: drop→create→wait→search // DocumentDB doesn't allow multiple vector indexes of the same kind on the same field @@ -77,7 +77,7 @@ public static void Run(AppConfiguration appConfig) DropVectorIndexes(collection, vectorField); // 2. Create this specific index - CreateIndex(collection, vectorField, config); + CreateIndex(collection, vectorField, dimensions, config); Console.WriteLine($" ✓ {config.Name} created"); // 3. Search with retries while the index becomes available @@ -139,7 +139,7 @@ public static void Run(AppConfiguration appConfig) } } - private static List BuildIndexConfigs(int dimensions) + private static List BuildIndexConfigs() { string[] metrics = ["COS", "L2", "IP"]; var configs = new List(); @@ -177,7 +177,7 @@ private static void DropVectorIndexes(IMongoCollection collection, catch { } } - private static void CreateIndex(IMongoCollection collection, string vectorField, IndexConfig config) + private static void CreateIndex(IMongoCollection collection, string vectorField, int dimensions, IndexConfig config) { // Drop existing index with same name if present try @@ -192,7 +192,7 @@ private static void CreateIndex(IMongoCollection collection, strin var cosmosSearchOptions = new BsonDocument { { "kind", config.Kind }, - { "dimensions", int.Parse(Environment.GetEnvironmentVariable("EMBEDDING_DIMENSIONS") ?? "1536") }, + { "dimensions", dimensions }, { "similarity", config.Similarity } }; diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index 01d3345..b750f20 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -42,8 +42,7 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: }, "Embedding": { "EmbeddedField": "DescriptionVector", - "Dimensions": 1536, - "EmbeddingSizeBatch": 16 + "Dimensions": 1536 }, "DataFiles": { "WithVectors": "data/Hotels_Vector.json" @@ -92,13 +91,12 @@ dotnet run |---------------------------|---------|-------------| | `DocumentDB:ClusterName` | (required) | DocumentDB cluster name | | `AzureOpenAI:Endpoint` | (required) | Azure OpenAI endpoint | -| `AzureOpenAI:EmbeddingModel` | (required) | Embedding model deployment name | +| `AzureOpenAI:EmbeddingModel` | `text-embedding-3-small` | Embedding model deployment name | | `DataFiles:WithVectors` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `Embedding:EmbeddedField` | `DescriptionVector` | Field name containing embeddings | | `Embedding:Dimensions` | `1536` | Vector dimensions | | `DocumentDB:DatabaseName` | `Hotels` | Target database name | | `DocumentDB:LoadBatchSize` | `100` | Batch size for data loading | -| `Embedding:EmbeddingSizeBatch` | `16` | Batch size for embedding requests | **Additional environment variables for compare mode:** @@ -106,7 +104,6 @@ dotnet run |----------|---------|-------------| | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Show detailed per-result output | ## How It Works diff --git a/ai/select-algorithm-dotnet/appsettings.json b/ai/select-algorithm-dotnet/appsettings.json index 472d7ff..b6634fb 100644 --- a/ai/select-algorithm-dotnet/appsettings.json +++ b/ai/select-algorithm-dotnet/appsettings.json @@ -18,7 +18,6 @@ }, "Embedding": { "EmbeddedField": "DescriptionVector", - "Dimensions": 1536, - "EmbeddingSizeBatch": 16 + "Dimensions": 1536 } } diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index 85361f7..ca2648c 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -66,16 +66,13 @@ mvn exec:java -Pcompare |----------|---------|-------------| | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | -| `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | +| `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model deployment name | | `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | -| `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | -| `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Print detailed per-index results | `CompareAll.java` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index e867fbc..8d5da02 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -80,15 +80,14 @@ The script creates a single `hotels` collection, loads data once, then for each |----------|---------|-------------| | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | -| `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | +| `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model deployment name | +| `AZURE_OPENAI_EMBEDDING_API_VERSION` | `2023-05-15` | Azure OpenAI API version | | `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | | `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | -| `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Show all k results per combo | `compare_all.py` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. From 2cfa69d88a60e65a70e83d59210690f712959836 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 09:10:02 -0700 Subject: [PATCH 09/10] Fix Java batch size: read LOAD_SIZE_BATCH env var (cross-language consistency) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../java/com/azure/documentdb/selectalgorithm/CompareAll.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java index 2d22e64..ee8733b 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java @@ -55,7 +55,8 @@ public static void run() { collection.drop(); System.out.println(" Collection reset."); - Utils.insertData(collection, data, 100); + int batchSize = Integer.parseInt(System.getenv().getOrDefault("LOAD_SIZE_BATCH", "100")); + Utils.insertData(collection, data, batchSize); // Generate ONE embedding for the query (reused for all 9 searches) OpenAIClient aiClient = Utils.getOpenAIClient(); From 6394c978eb31bbb1e7a9303236537d8b013df548 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Fri, 22 May 2026 09:18:39 -0700 Subject: [PATCH 10/10] Fix Python README: run from sample root (consistent with other languages) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-python/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index 8d5da02..b89a800 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -48,8 +48,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each 2. Install dependencies: ```bash - cd src - pip install -r ../requirements.txt + pip install -r requirements.txt ``` 3. Copy the shared data file into the local `data/` directory: @@ -68,8 +67,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each Compare all 9 combinations (3 algorithms × 3 similarity metrics) in a single invocation: ```bash -cd src -python compare_all.py +python src/compare_all.py ``` The script creates a single `hotels` collection, loads data once, then for each of the 9 algorithm/metric combinations: creates the index → searches → drops the index. DocumentDB only allows one vector index per kind per field, so indexes are created sequentially.