From 20e9431679d35e1ff25261b8fdeda41e14381ac2 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Thu, 21 May 2026 13:51:30 -0700 Subject: [PATCH] Remove extraneous select-algorithm.ts from TypeScript sample Delete the unused single-algorithm runner and remove stale quickstart references. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ai/select-algorithm-typescript/quickstart.md | 5 - .../src/select-algorithm.ts | 317 ------------------ 2 files changed, 322 deletions(-) delete mode 100644 ai/select-algorithm-typescript/src/select-algorithm.ts diff --git a/ai/select-algorithm-typescript/quickstart.md b/ai/select-algorithm-typescript/quickstart.md index b04fc58..8955f25 100644 --- a/ai/select-algorithm-typescript/quickstart.md +++ b/ai/select-algorithm-typescript/quickstart.md @@ -202,8 +202,6 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ - `AZURE_OPENAI_EMBEDDING_ENDPOINT`: Your Azure OpenAI resource endpoint URL - `DOCUMENTDB_CLUSTER_NAME`: Your Azure DocumentDB cluster name - The compare-all mode always runs all 9 combinations (3 algorithms × 3 metrics). The `ALGORITHM` and `SIMILARITY` environment variables are used only by the single-algorithm mode. - You should always prefer passwordless authentication, but it requires additional setup. For more information on setting up managed identity and the full range of your authentication options, see [Authenticate JavaScript apps to Azure services using the Azure SDK for JavaScript](/azure/developer/javascript/sdk/authentication/overview). ## Create code files @@ -218,7 +216,6 @@ select-algorithm-typescript/ │ └── compare_all.txt ├── src/ │ ├── compare-all.ts -│ ├── select-algorithm.ts │ └── utils.ts ├── .gitignore ├── package.json @@ -335,8 +332,6 @@ The **Diff** column shows the score gap between the top-1 and top-2 results. A s ### Run all combinations -The compare-all mode always runs all 9 combinations (3 algorithms × 3 metrics). The `ALGORITHM` and `SIMILARITY` environment variables are used only by the single-algorithm mode. - ```bash npm run build npm start diff --git a/ai/select-algorithm-typescript/src/select-algorithm.ts b/ai/select-algorithm-typescript/src/select-algorithm.ts deleted file mode 100644 index 38451af..0000000 --- a/ai/select-algorithm-typescript/src/select-algorithm.ts +++ /dev/null @@ -1,317 +0,0 @@ -import path from 'path'; -import { readFileReturnJson, getClientsPasswordless, insertData, printComparisonTable } from './utils.js'; - -// ESM specific features - create __dirname equivalent -import { fileURLToPath } from "node:url"; -import { dirname } from "node:path"; -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -// Validate required environment variables at startup -const requiredEnvVars = [ - 'DOCUMENTDB_CLUSTER_NAME', - 'AZURE_OPENAI_EMBEDDING_ENDPOINT', - 'AZURE_OPENAI_EMBEDDING_MODEL', - 'DATA_FILE_WITH_VECTORS' -]; - -const missing = requiredEnvVars.filter(v => !process.env[v]); -if (missing.length > 0) { - console.error(`Missing required environment variables: ${missing.join(', ')}`); - console.error('See .env.example for required values.'); - process.exit(1); -} - -type Algorithm = 'diskann' | 'hnsw' | 'ivf'; -type Similarity = 'COS' | 'L2' | 'IP'; -type SimilarityEnv = 'cos' | 'l2' | 'ip'; - -const ALGORITHMS: Algorithm[] = ['diskann', 'hnsw', 'ivf']; -const SIMILARITIES: Similarity[] = ['COS', 'L2', 'IP']; -const SIMILARITY_ENV_VALUES: SimilarityEnv[] = ['cos', 'l2', 'ip']; -const SIMILARITY_BY_ENV: Record = { - cos: 'COS', - l2: 'L2', - ip: 'IP', -}; - -const ALGORITHM_LABELS: Record = { - diskann: 'DiskANN', - hnsw: 'HNSW', - ivf: 'IVF', -}; - -// Index creation configs per algorithm -function getIndexOptions( - collectionName: string, - indexName: string, - embeddedField: string, - dimensions: number, - algorithm: Algorithm, - similarity: Similarity -) { - const base = { - createIndexes: collectionName, - indexes: [ - { - name: indexName, - key: { [embeddedField]: 'cosmosSearch' }, - cosmosSearchOptions: {} as Record, - }, - ], - }; - - switch (algorithm) { - case 'diskann': - base.indexes[0].cosmosSearchOptions = { - kind: 'vector-diskann', - dimensions, - similarity, - maxDegree: 32, - lBuild: 50, - }; - break; - case 'hnsw': - base.indexes[0].cosmosSearchOptions = { - kind: 'vector-hnsw', - dimensions, - similarity, - m: 16, - efConstruction: 64, - }; - break; - case 'ivf': - base.indexes[0].cosmosSearchOptions = { - kind: 'vector-ivf', - dimensions, - similarity, - numLists: 1, - }; - break; - } - - return base; -} - -// Algorithm-specific query params -function getSearchPipeline( - queryEmbedding: number[], - embeddedField: string, - k: number, - algorithm: Algorithm -) { - const cosmosSearch: Record = { - vector: queryEmbedding, - path: embeddedField, - k, - }; - - // Add algorithm-specific search params - switch (algorithm) { - case 'diskann': - cosmosSearch.lSearch = 100; - break; - case 'hnsw': - cosmosSearch.efSearch = 80; - break; - case 'ivf': - cosmosSearch.nProbes = 1; - break; - } - - return [ - { $search: { cosmosSearch } }, - { $project: { score: { $meta: "searchScore" }, document: "$$ROOT" } }, - ]; -} - -/** - * Determine which collections to create/query based on ALGORITHM and SIMILARITY env vars. - * Leave either env var unset or empty to run all valid combinations. - * Valid ALGORITHM values: ivf, hnsw, diskann - * Valid SIMILARITY values: cos, l2, ip - * Collection naming: hotels_{algorithm}_{similarity} - */ -function getTargetCollections( - algorithmEnv: string, - similarityEnv: string -): Array<{ collectionName: string; algorithm: Algorithm; similarity: Similarity }> { - const algorithms: Algorithm[] = !algorithmEnv - ? ALGORITHMS - : (() => { - if (!ALGORITHMS.includes(algorithmEnv as Algorithm)) { - throw new Error(`Invalid ALGORITHM '${algorithmEnv}'. Must be one of: ${ALGORITHMS.join(', ')}`); - } - return [algorithmEnv as Algorithm]; - })(); - - const similarities: Similarity[] = !similarityEnv - ? SIMILARITIES - : (() => { - if (!SIMILARITY_ENV_VALUES.includes(similarityEnv as SimilarityEnv)) { - throw new Error(`Invalid SIMILARITY '${similarityEnv}'. Must be one of: ${SIMILARITY_ENV_VALUES.join(', ')}`); - } - return [SIMILARITY_BY_ENV[similarityEnv as SimilarityEnv]]; - })(); - - const targets: Array<{ collectionName: string; algorithm: Algorithm; similarity: Similarity }> = []; - - for (const alg of algorithms) { - for (const sim of similarities) { - targets.push({ - collectionName: `hotels_${alg}_${sim.toLowerCase()}`, - algorithm: alg, - similarity: sim, - }); - } - } - - return targets; -} - -async function main() { - const { aiClient, dbClient } = getClientsPasswordless(); - - try { - if (!aiClient) { - throw new Error('Azure OpenAI client is not configured. Please check your environment variables.'); - } - if (!dbClient) { - throw new Error('Database client is not configured. Please check your environment variables.'); - } - - const dbName = process.env.AZURE_DOCUMENTDB_DATABASENAME || 'Hotels'; - const embeddedField = process.env.EMBEDDED_FIELD || 'DescriptionVector'; - const embeddingDimensions = parseInt(process.env.EMBEDDING_DIMENSIONS || '1536', 10); - const dataFile = process.env.DATA_FILE_WITH_VECTORS || 'data/Hotels_Vector.json'; - const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; - const batchSize = parseInt(process.env.LOAD_SIZE_BATCH || '100', 10); - const algorithmEnv = (process.env.ALGORITHM || '').trim().toLowerCase(); - const similarityEnv = (process.env.SIMILARITY || '').trim().toLowerCase(); - const searchQuery = 'quintessential lodging near running trails, eateries, retail'; - - const targets = getTargetCollections(algorithmEnv, similarityEnv); - - console.log(`\n🔬 Vector Algorithm Comparison`); - console.log(` Database: ${dbName}`); - console.log(` Algorithms: ${algorithmEnv || ALGORITHMS.join(', ')}`); - console.log(` Similarity: ${similarityEnv || SIMILARITY_ENV_VALUES.join(', ')}`); - console.log(` Collections to query: ${targets.map(t => t.collectionName).join(', ')}`); - console.log(` Search query: "${searchQuery}"\n`); - - await dbClient.connect(); - const db = dbClient.db(dbName); - - // Load data once (shared across collections) - const data = await readFileReturnJson(path.join(__dirname, '..', dataFile)); - - // Generate query embedding once (reuse across collections) - console.log('Generating query embedding...'); - const embeddingResponse = await aiClient.embeddings.create({ - model: deployment, - input: [searchQuery], - }); - const queryEmbedding = embeddingResponse.data[0].embedding; - if (queryEmbedding.length !== embeddingDimensions) { - throw new Error( - `Embedding dimension mismatch: expected ${embeddingDimensions}, got ${queryEmbedding.length}. ` + - `Verify AZURE_OPENAI_EMBEDDING_MODEL matches the configured EMBEDDING_DIMENSIONS.` - ); - } - console.log(`Query embedding: ${queryEmbedding.length} dimensions\n`); - - const config = { batchSize }; - - const comparisonResults: Array<{ - collectionName: string; - algorithm: string; - similarity: string; - searchResults: any[]; - latencyMs: number; - }> = []; - const failedTargets: Array<{ collectionName: string; error: string }> = []; - - for (const target of targets) { - console.log(`\n━━━ ${ALGORITHM_LABELS[target.algorithm]} / ${target.similarity} ━━━`); - console.log(`Collection: ${target.collectionName}`); - - try { - // Create collection (drops existing to ensure clean state) - try { - await db.dropCollection(target.collectionName); - } catch { - // Collection may not exist yet - } - const collection = await db.createCollection(target.collectionName); - console.log('Created collection:', target.collectionName); - - // Insert data - const insertSummary = await insertData(config, collection, data); - console.log(`Inserted: ${insertSummary.inserted}/${insertSummary.total}`); - - // Create vector index - const indexName = `vectorIndex_${target.algorithm}_${target.similarity.toLowerCase()}`; - const indexOptions = getIndexOptions( - target.collectionName, - indexName, - embeddedField, - embeddingDimensions, - target.algorithm, - target.similarity - ); - await db.command(indexOptions); - console.log('Created vector index:', indexName); - - // Run vector search - console.log('Executing vector search...'); - const startTime = Date.now(); - - const pipeline = getSearchPipeline(queryEmbedding, embeddedField, 5, target.algorithm); - const searchResults = await collection.aggregate(pipeline).toArray(); - - const latencyMs = Date.now() - startTime; - - comparisonResults.push({ - collectionName: target.collectionName, - algorithm: ALGORITHM_LABELS[target.algorithm], - similarity: target.similarity, - searchResults, - latencyMs, - }); - - console.log(`✓ ${searchResults.length} results, ${latencyMs}ms`); - } catch (error) { - const message = (error as Error).message; - failedTargets.push({ collectionName: target.collectionName, error: message }); - console.error(`✗ Error with ${target.collectionName}:`, message); - } - } - - if (failedTargets.length > 0) { - console.error(`\nFailure summary: ${failedTargets.length} of ${targets.length} target collection(s) failed.`); - for (const failure of failedTargets) { - console.error(` - ${failure.collectionName}: ${failure.error}`); - } - } - - // Print comparison table - if (comparisonResults.length > 0) { - printComparisonTable(comparisonResults); - } else { - console.error('\nNo comparison results were produced. All target collections failed.'); - process.exitCode = 1; - } - } catch (error) { - console.error('App failed:', error); - process.exitCode = 1; - } finally { - console.log('\nClosing database connection...'); - if (dbClient) await dbClient.close(); - console.log('Database connection closed'); - } -} - -main().catch(error => { - console.error('Unhandled error:', error); - process.exitCode = 1; -});