fix(knowledge): require explicit Azure deployment per OpenAI embedding model

waleedlatif1 · claude · waleedlatif1 · commit 542d2edc7f3d · 2026-04-29T19:09:30.000-07:00
Greptile P1: when AZURE_OPENAI_* was set, every OpenAI embedding model was
routed to the single KB_OPENAI_MODEL_NAME deployment. A KB created with
text-embedding-3-large would be embedded by whatever model that deployment
serves while billing tracked 3-large pricing — and chunks ingested via Azure
versus queried via real OpenAI would land in mismatched vector spaces.

Now require AZURE_OPENAI_DEPLOYMENT_TEXT_EMBEDDING_3_(SMALL|LARGE) per model.
Falls back to KB_OPENAI_MODEL_NAME only for text-embedding-3-small (legacy).
If no deployment is configured for the chosen model, route to direct OpenAI
instead of silently routing to the wrong deployment.

Also fix type predicate in search/route.ts to use KnowledgeBaseAccessResult
so the build passes.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/apps/sim/app/api/knowledge/search/route.ts b/apps/sim/app/api/knowledge/search/route.ts
@@ -21,7 +21,7 @@ import {
   handleVectorOnlySearch,
   type SearchResult,
 } from '@/app/api/knowledge/search/utils'
-import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
+import { checkKnowledgeBaseAccess, type KnowledgeBaseAccessResult } from '@/app/api/knowledge/utils'
 import { getRerankModelPricing } from '@/providers/models'
 import { calculateCost } from '@/providers/utils'
 
@@ -243,18 +243,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
       }
 
       const accessibleKbs = accessChecks
-        .filter(
-          (
-            ac
-          ): ac is {
-            hasAccess: true
-            knowledgeBase: {
-              id: string
-              embeddingModel: string
-              workspaceId?: string | null
-            }
-          } => Boolean(ac?.hasAccess)
-        )
+        .filter((ac): ac is KnowledgeBaseAccessResult => Boolean(ac?.hasAccess))
         .map((ac) => ac.knowledgeBase)
       const workspaceId = accessibleKbs[0]?.workspaceId
 
diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts
@@ -122,7 +122,9 @@ export const env = createEnv({
     AZURE_ANTHROPIC_ENDPOINT:              z.string().url().optional(),            // Azure Anthropic service endpoint
     AZURE_ANTHROPIC_API_KEY:               z.string().min(1).optional(),           // Azure Anthropic API key
     AZURE_ANTHROPIC_API_VERSION:           z.string().min(1).optional(),           // Azure Anthropic API version (e.g. 2023-06-01)
-    KB_OPENAI_MODEL_NAME:                  z.string().optional(),                  // Knowledge base OpenAI model name (works with both regular OpenAI and Azure OpenAI)
+    KB_OPENAI_MODEL_NAME:                  z.string().optional(),                  // Knowledge base OpenAI model name (works with both regular OpenAI and Azure OpenAI). Used as the Azure deployment for text-embedding-3-small (legacy/default).
+    AZURE_OPENAI_DEPLOYMENT_TEXT_EMBEDDING_3_SMALL: z.string().optional(),          // Azure deployment name serving text-embedding-3-small. If unset, falls back to KB_OPENAI_MODEL_NAME.
+    AZURE_OPENAI_DEPLOYMENT_TEXT_EMBEDDING_3_LARGE: z.string().optional(),          // Azure deployment name serving text-embedding-3-large. Required to use 3-large via Azure.
     WAND_OPENAI_MODEL_NAME:                z.string().optional(),                  // Wand generation OpenAI model name (works with both regular OpenAI and Azure OpenAI)
     OCR_AZURE_ENDPOINT:                    z.string().url().optional(),            // Azure Mistral OCR service endpoint
     OCR_AZURE_MODEL_NAME:                  z.string().optional(),                  // Azure Mistral OCR model name for document processing
diff --git a/apps/sim/lib/knowledge/embeddings.ts b/apps/sim/lib/knowledge/embeddings.ts
@@ -173,25 +173,40 @@ function buildGeminiProvider(modelName: string, apiKey: string): ResolvedProvide
   })
 }
 
+/**
+ * Resolve the Azure deployment name for a given OpenAI embedding model.
+ * Returns null if no deployment is configured for that model — caller falls
+ * back to direct OpenAI rather than risk routing to a wrong-model deployment
+ * (which would silently produce mismatched vectors).
+ */
+function resolveAzureDeployment(embeddingModel: string): string | null {
+  if (embeddingModel === 'text-embedding-3-small') {
+    return env.AZURE_OPENAI_DEPLOYMENT_TEXT_EMBEDDING_3_SMALL || env.KB_OPENAI_MODEL_NAME || null
+  }
+  if (embeddingModel === 'text-embedding-3-large') {
+    return env.AZURE_OPENAI_DEPLOYMENT_TEXT_EMBEDDING_3_LARGE || null
+  }
+  return null
+}
+
 async function resolveProvider(
   embeddingModel: string,
   workspaceId?: string | null
 ): Promise<ResolvedProvider> {
   const azureApiKey = env.AZURE_OPENAI_API_KEY
   const azureEndpoint = env.AZURE_OPENAI_ENDPOINT
   const azureApiVersion = env.AZURE_OPENAI_API_VERSION
-  const useAzure =
-    !!(azureApiKey && azureEndpoint) &&
-    SUPPORTED_EMBEDDING_MODELS[embeddingModel]?.provider === 'openai'
+  const isOpenAIModel = SUPPORTED_EMBEDDING_MODELS[embeddingModel]?.provider === 'openai'
+  const azureDeployment =
+    isOpenAIModel && azureApiKey && azureEndpoint ? resolveAzureDeployment(embeddingModel) : null
 
-  if (useAzure) {
-    const deployment = env.KB_OPENAI_MODEL_NAME || embeddingModel
+  if (azureDeployment) {
     return {
-      modelName: deployment,
+      modelName: azureDeployment,
       pricingId: getEmbeddingModelInfo(embeddingModel).pricingId,
       isBYOK: false,
       buildRequest: buildAzureOpenAIProvider(
-        deployment,
+        azureDeployment,
         azureApiKey!,
         azureEndpoint!,
         azureApiVersion!,