Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions lib/crewai/src/crewai/llms/providers/azure/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,11 @@
)

self.api_key = api_key or os.getenv("AZURE_API_KEY")
# Support both 'endpoint' and 'base_url' parameters for consistency with other providers
base_url = kwargs.get("base_url")
self.endpoint = (
endpoint
or base_url
or os.getenv("AZURE_ENDPOINT")
or os.getenv("AZURE_OPENAI_ENDPOINT")
or os.getenv("AZURE_API_BASE")
Expand Down Expand Up @@ -170,25 +173,63 @@
prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"]
)

# Azure OpenAI endpoints use openai.azure.com domain and require deployment path
# Other Azure AI endpoints (cognitiveservices.azure.com, etc.) are also valid
self.is_azure_openai_endpoint = (
"openai.azure.com" in self.endpoint
and "/openai/deployments/" in self.endpoint
)

# Check if this is any Azure endpoint (for proper API handling)
self.is_azure_endpoint = self._is_azure_endpoint(self.endpoint)

@staticmethod
def _is_azure_endpoint(endpoint: str) -> bool:
"""Check if the endpoint is an Azure endpoint.

Azure endpoints can have various domain formats:
- openai.azure.com (Azure OpenAI Service)
- cognitiveservices.azure.com (Azure AI Services / Cognitive Services)
- services.ai.azure.com (Azure AI Services)
- Other *.azure.com domains

Args:
endpoint: The endpoint URL to check

Returns:
True if the endpoint is an Azure endpoint, False otherwise
"""
azure_domains = [
"openai.azure.com",
"cognitiveservices.azure.com",
"services.ai.azure.com",
]
# Check for known Azure domains
for domain in azure_domains:
if domain in endpoint:
return True
# Also check for generic .azure.com pattern (e.g., cservices.azure.com)
return ".azure.com" in endpoint

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
.azure.com
may be at an arbitrary position in the sanitized URL.

Copilot Autofix

AI 3 months ago

In general, the problem should be fixed by parsing the endpoint as a URL, extracting its hostname, and performing checks against that hostname (and possibly its subdomains), instead of checking the raw endpoint string for substrings like "openai.azure.com" or ".azure.com". This ensures we classify and normalize only real Azure domains and not arbitrary strings that happen to contain those substrings.

Concretely for this file:

  1. Add an import for urlparse from Python’s standard urllib.parse module at the top of the file (we’re allowed to add well-known standard-library imports).
  2. Update _is_azure_endpoint to:
    • Parse the endpoint with urlparse.
    • Extract hostname (lowercased).
    • Compare this hostname against an allowlist of exact Azure hostnames (e.g., openai.azure.com, cognitiveservices.azure.com, services.ai.azure.com) and a generic *.azure.com check implemented as hostname == "azure.com" or hostname.endswith(".azure.com").
    • Remove all raw in endpoint substring checks.
  3. Update _validate_and_fix_endpoint to determine whether the host is an Azure OpenAI host by parsing the URL and checking hostname == "openai.azure.com" or ending with .openai.azure.com (to support resource-specific subdomains like <resource>.openai.azure.com). Use this boolean instead of "openai.azure.com" in endpoint. This avoids misclassifying URLs like https://evil-openai.azure.com.attacker.com.
  4. Keep existing functionality: the method should still auto-construct the deployment path only when the endpoint is an Azure OpenAI endpoint and doesn’t already contain /openai/deployments/. The only behavioral change is stricter and correct host recognition.

No new third-party dependencies are needed; we only use the standard library.

Suggested changeset 1
lib/crewai/src/crewai/llms/providers/azure/completion.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py
--- a/lib/crewai/src/crewai/llms/providers/azure/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -4,6 +4,7 @@
 import logging
 import os
 from typing import TYPE_CHECKING, Any, TypedDict
+from urllib.parse import urlparse
 
 from pydantic import BaseModel
 from typing_extensions import Self
@@ -188,8 +189,8 @@
         """Check if the endpoint is an Azure endpoint.
 
         Azure endpoints can have various domain formats:
-        - openai.azure.com (Azure OpenAI Service)
-        - cognitiveservices.azure.com (Azure AI Services / Cognitive Services)
+        - <resource-name>.openai.azure.com (Azure OpenAI Service)
+        - <resource-name>.cognitiveservices.azure.com (Azure AI Services / Cognitive Services)
         - services.ai.azure.com (Azure AI Services)
         - Other *.azure.com domains
 
@@ -199,18 +200,24 @@
         Returns:
             True if the endpoint is an Azure endpoint, False otherwise
         """
-        azure_domains = [
+        parsed = urlparse(endpoint)
+        hostname = (parsed.hostname or "").lower()
+        if not hostname:
+            return False
+
+        azure_domains = {
             "openai.azure.com",
             "cognitiveservices.azure.com",
             "services.ai.azure.com",
-        ]
-        # Check for known Azure domains
-        for domain in azure_domains:
-            if domain in endpoint:
-                return True
-        # Also check for generic .azure.com pattern (e.g., cservices.azure.com)
-        return ".azure.com" in endpoint
+        }
 
+        # Check for known Azure hostnames
+        if hostname in azure_domains:
+            return True
+
+        # Also check for generic .azure.com pattern (e.g., <resource>.azure.com)
+        return hostname == "azure.com" or hostname.endswith(".azure.com")
+
     @staticmethod
     def _validate_and_fix_endpoint(endpoint: str, model: str) -> str:
         """Validate and fix Azure endpoint URL format.
@@ -228,9 +231,18 @@
         Returns:
             Validated and potentially corrected endpoint URL
         """
-        # Only auto-construct deployment path for Azure OpenAI endpoints (openai.azure.com)
+        parsed = urlparse(endpoint)
+        hostname = (parsed.hostname or "").lower()
+
+        # Only auto-construct deployment path for Azure OpenAI endpoints
+        # (e.g., <resource-name>.openai.azure.com)
+        is_openai_host = bool(
+            hostname
+            and (hostname == "openai.azure.com" or hostname.endswith(".openai.azure.com"))
+        )
+
         # Other Azure endpoints (cognitiveservices.azure.com, etc.) should be used as-is
-        if "openai.azure.com" in endpoint and "/openai/deployments/" not in endpoint:
+        if is_openai_host and "/openai/deployments/" not in endpoint:
             endpoint = endpoint.rstrip("/")
 
             if not endpoint.endswith("/openai/deployments"):
EOF
Copilot is powered by AI and may make mistakes. Always verify output.
Unable to commit as this autofix suggestion is now outdated

@staticmethod
def _validate_and_fix_endpoint(endpoint: str, model: str) -> str:
"""Validate and fix Azure endpoint URL format.

Azure OpenAI endpoints should be in the format:
https://<resource-name>.openai.azure.com/openai/deployments/<deployment-name>

Other Azure AI endpoints (cognitiveservices.azure.com, etc.) are used as-is
since they may have different URL structures.

Args:
endpoint: The endpoint URL
model: The model/deployment name

Returns:
Validated and potentially corrected endpoint URL
"""
# Only auto-construct deployment path for Azure OpenAI endpoints (openai.azure.com)
# Other Azure endpoints (cognitiveservices.azure.com, etc.) should be used as-is
if "openai.azure.com" in endpoint and "/openai/deployments/" not in endpoint:
endpoint = endpoint.rstrip("/")

Expand Down
174 changes: 174 additions & 0 deletions lib/crewai/tests/llms/azure/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,3 +1215,177 @@
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1


def test_azure_cognitive_services_endpoint():
"""
Test that Azure Cognitive Services endpoints (cognitiveservices.azure.com) are supported.
This addresses GitHub issue #4260 where non-openai.azure.com endpoints were not working.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

with patch.dict(os.environ, {
"AZURE_API_KEY": "test-key",
"AZURE_ENDPOINT": "https://my-resource.cognitiveservices.azure.com"
}):
llm = LLM(model="azure/gpt-4")

assert isinstance(llm, AzureCompletion)
# Cognitive Services endpoints should NOT have deployment path auto-constructed
assert llm.endpoint == "https://my-resource.cognitiveservices.azure.com"
# Should be recognized as an Azure endpoint
assert llm.is_azure_endpoint == True
# But NOT as an Azure OpenAI endpoint (different URL structure)
assert llm.is_azure_openai_endpoint == False


def test_azure_ai_services_endpoint():
"""
Test that Azure AI Services endpoints (services.ai.azure.com) are supported.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

with patch.dict(os.environ, {
"AZURE_API_KEY": "test-key",
"AZURE_ENDPOINT": "https://my-resource.services.ai.azure.com"
}):
llm = LLM(model="azure/gpt-4")

assert isinstance(llm, AzureCompletion)
assert llm.endpoint == "https://my-resource.services.ai.azure.com"
assert llm.is_azure_endpoint == True
assert llm.is_azure_openai_endpoint == False


def test_azure_generic_azure_com_endpoint():
"""
Test that generic .azure.com endpoints are supported (e.g., cservices.azure.com).
This addresses the specific case from GitHub issue #4260.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

with patch.dict(os.environ, {
"AZURE_API_KEY": "test-key",
"AZURE_ENDPOINT": "https://my-resource.cservices.azure.com"
}):
llm = LLM(model="azure/gpt-4")

assert isinstance(llm, AzureCompletion)
assert llm.endpoint == "https://my-resource.cservices.azure.com"
assert llm.is_azure_endpoint == True
assert llm.is_azure_openai_endpoint == False


def test_azure_is_azure_endpoint_detection():
"""
Test the _is_azure_endpoint static method for various endpoint formats.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

# Azure OpenAI endpoints
assert AzureCompletion._is_azure_endpoint("https://my-resource.openai.azure.com") == True
assert AzureCompletion._is_azure_endpoint("https://my-resource.openai.azure.com/openai/deployments/gpt-4") == True

# Azure Cognitive Services endpoints
assert AzureCompletion._is_azure_endpoint("https://my-resource.cognitiveservices.azure.com") == True

# Azure AI Services endpoints
assert AzureCompletion._is_azure_endpoint("https://my-resource.services.ai.azure.com") == True

# Generic .azure.com endpoints (like cservices.azure.com from issue #4260)
assert AzureCompletion._is_azure_endpoint("https://my-resource.cservices.azure.com") == True

# Azure AI Inference endpoint
assert AzureCompletion._is_azure_endpoint("https://models.inference.ai.azure.com") == True

# Non-Azure endpoints should return False
assert AzureCompletion._is_azure_endpoint("https://api.openai.com") == False
assert AzureCompletion._is_azure_endpoint("https://example.com") == False


def test_azure_base_url_parameter_support():
"""
Test that the base_url parameter is supported as an alias for endpoint.
This provides consistency with other LLM providers.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

# Test with base_url parameter directly
llm = AzureCompletion(
model="gpt-4",
api_key="test-key",
base_url="https://my-resource.cognitiveservices.azure.com"
)

assert llm.endpoint == "https://my-resource.cognitiveservices.azure.com"
assert llm.is_azure_endpoint == True


def test_azure_endpoint_takes_precedence_over_base_url():
"""
Test that explicit endpoint parameter takes precedence over base_url.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

llm = AzureCompletion(
model="gpt-4",
api_key="test-key",
endpoint="https://explicit-endpoint.openai.azure.com",
base_url="https://base-url-endpoint.cognitiveservices.azure.com"
)

# endpoint should take precedence
assert "explicit-endpoint.openai.azure.com" in llm.endpoint

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High test

The string
explicit-endpoint.openai.azure.com
may be at an arbitrary position in the sanitized URL.

Copilot Autofix

AI 3 months ago

In general, to avoid incomplete URL substring sanitization, URLs should be parsed and validated based on structured components (hostname, scheme, path), or compared exactly against expected values, rather than using substring checks like in or endswith on the raw URL string.

For this specific case, the test test_azure_endpoint_takes_precedence_over_base_url currently asserts that the chosen endpoint contains "explicit-endpoint.openai.azure.com". Since the intention is to verify that the endpoint argument wins over base_url, the best fix is to assert equality against the exact endpoint string passed in: "https://explicit-endpoint.openai.azure.com". This keeps the behavior check intact while removing the substring pattern that CodeQL flags. No additional parsing is needed here because we know the full expected string.

Concretely, in lib/crewai/tests/llms/azure/test_azure.py, around line 1338, replace:

# endpoint should take precedence
assert "explicit-endpoint.openai.azure.com" in llm.endpoint

with:

# endpoint should take precedence
assert llm.endpoint == "https://explicit-endpoint.openai.azure.com"

No new imports or helper methods are required.

Suggested changeset 1
lib/crewai/tests/llms/azure/test_azure.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py
--- a/lib/crewai/tests/llms/azure/test_azure.py
+++ b/lib/crewai/tests/llms/azure/test_azure.py
@@ -1335,7 +1335,7 @@
     )
 
     # endpoint should take precedence
-    assert "explicit-endpoint.openai.azure.com" in llm.endpoint
+    assert llm.endpoint == "https://explicit-endpoint.openai.azure.com"
 
 
 def test_azure_non_openai_endpoint_model_parameter_included():
EOF
@@ -1335,7 +1335,7 @@
)

# endpoint should take precedence
assert "explicit-endpoint.openai.azure.com" in llm.endpoint
assert llm.endpoint == "https://explicit-endpoint.openai.azure.com"


def test_azure_non_openai_endpoint_model_parameter_included():
Copilot is powered by AI and may make mistakes. Always verify output.
Unable to commit as this autofix suggestion is now outdated


def test_azure_non_openai_endpoint_model_parameter_included():
"""
Test that model parameter IS included for non-Azure OpenAI endpoints.
This is important for Cognitive Services and other Azure AI endpoints.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

with patch.dict(os.environ, {
"AZURE_API_KEY": "test-key",
"AZURE_ENDPOINT": "https://my-resource.cognitiveservices.azure.com"
}):
llm = LLM(model="azure/gpt-4")

params = llm._prepare_completion_params(
messages=[{"role": "user", "content": "test"}]
)

# Model parameter should be included for non-Azure OpenAI endpoints
assert "model" in params
assert params["model"] == "gpt-4"


def test_azure_validate_and_fix_endpoint_only_modifies_openai_azure():
"""
Test that _validate_and_fix_endpoint only auto-constructs deployment path
for openai.azure.com endpoints, not for other Azure endpoints.
"""
from crewai.llms.providers.azure.completion import AzureCompletion

# Azure OpenAI endpoint should have deployment path auto-constructed
result = AzureCompletion._validate_and_fix_endpoint(
"https://my-resource.openai.azure.com",
"gpt-4"
)
assert "/openai/deployments/gpt-4" in result

# Cognitive Services endpoint should NOT be modified
result = AzureCompletion._validate_and_fix_endpoint(
"https://my-resource.cognitiveservices.azure.com",
"gpt-4"
)
assert result == "https://my-resource.cognitiveservices.azure.com"
assert "/openai/deployments/" not in result

# Generic Azure endpoint should NOT be modified
result = AzureCompletion._validate_and_fix_endpoint(
"https://my-resource.cservices.azure.com",
"gpt-4"
)
assert result == "https://my-resource.cservices.azure.com"
assert "/openai/deployments/" not in result
Loading