From f013176984aebedb93066df62f91c22c28763215 Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:07:34 -0700 Subject: [PATCH 1/7] Adding-Upload-Evaluator --- .../aio/operations/_patch_evaluators_async.py | 187 ++++++++++++++++ .../azure/ai/projects/operations/_patch.py | 4 +- .../projects/operations/_patch_evaluators.py | 211 ++++++++++++++++++ .../answer_length_evaluator.py | 14 ++ .../custom_evaluator/common_util/__init__.py | 0 .../custom_evaluator/common_util/util.py | 72 ++++++ .../custom_evaluator/friendly_evaluator.py | 62 +++++ .../sample_eval_upload_custom_evaluator.py | 127 +++++++++++ .../sample_eval_upload_friendly_evaluator.py | 145 ++++++++++++ 9 files changed, 821 insertions(+), 1 deletion(-) create mode 100644 sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py create mode 100644 sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/__init__.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py create mode 100644 sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py new file mode 100644 index 000000000000..aa37adaa48ca --- /dev/null +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py @@ -0,0 +1,187 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +import os +import logging +from typing import Any, IO, Tuple, Optional, Union +from pathlib import Path +from azure.storage.blob.aio import ContainerClient +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.core.exceptions import ResourceNotFoundError +from ._operations import BetaEvaluatorsOperations as EvaluatorsOperationsGenerated, JSON +from ...models._models import ( + EvaluatorVersion, +) + +logger = logging.getLogger(__name__) + + +class EvaluatorsOperations(EvaluatorsOperationsGenerated): + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~azure.ai.projects.aio.AIProjectClient`'s + :attr:`beta.evaluators` attribute. + """ + + async def _start_pending_upload_and_get_container_client( + self, + name: str, + version: str, + connection_name: Optional[str] = None, + ) -> Tuple[ContainerClient, str, str]: + """Call startPendingUpload to get a SAS URI and return a ContainerClient and blob URI.""" + + request_body: dict = {} + if connection_name: + request_body["connectionName"] = connection_name + + pending_upload_response = await self.pending_upload( + name=name, + version=version, + pending_upload_request=request_body, + ) + + # The service returns blobReferenceForConsumption + blob_ref = pending_upload_response.get("blobReferenceForConsumption") + if not blob_ref: + raise ValueError("Blob reference is not present in the pending upload response") + + credential = blob_ref.get("credential") if isinstance(blob_ref, dict) else None + if not credential: + raise ValueError("SAS credential is not present in the pending upload response") + + sas_uri = credential.get("sasUri") if isinstance(credential, dict) else None + if not sas_uri: + raise ValueError("SAS URI is missing or empty in the pending upload response") + + blob_uri = blob_ref.get("blobUri") if isinstance(blob_ref, dict) else None + if not blob_uri: + raise ValueError("Blob URI is missing or empty in the pending upload response") + + return ( + ContainerClient.from_container_url(container_url=sas_uri), + version, + blob_uri, + ) + + async def _get_next_version(self, name: str) -> str: + """Get the next version number for an evaluator by fetching existing versions.""" + try: + versions = [] + async for v in self.list_versions(name=name): + versions.append(v) + if versions: + numeric_versions = [] + for v in versions: + ver = v.get("version") if isinstance(v, dict) else getattr(v, "version", None) + if ver and ver.isdigit(): + numeric_versions.append(int(ver)) + if numeric_versions: + return str(max(numeric_versions) + 1) + return "1" + except ResourceNotFoundError: + return "1" + + @distributed_trace_async + async def upload( + self, + name: str, + evaluator_version: Union[EvaluatorVersion, JSON, IO[bytes]], + *, + folder: str, + version: Optional[str] = None, + connection_name: Optional[str] = None, + **kwargs: Any, + ) -> EvaluatorVersion: + """Upload all files in a folder to blob storage and create a code-based evaluator version + that references the uploaded code. + + This method calls startPendingUpload to get a SAS URI, uploads files from the folder + to blob storage, then creates an evaluator version referencing the uploaded blob. + + If no version is provided, the method will auto-increment based on existing versions. + + :param name: The name of the evaluator. Required. + :type name: str + :param evaluator_version: The evaluator version definition. This is the same object accepted + by ``create_version``. Is one of the following types: EvaluatorVersion, JSON, + IO[bytes]. Required. + :type evaluator_version: ~azure.ai.projects.models.EvaluatorVersion or JSON or IO[bytes] + :keyword folder: Path to the folder containing the evaluator Python code. Required. + :paramtype folder: str + :keyword version: The version identifier for the evaluator. If not provided, will + auto-increment from the latest existing version. Optional. + :paramtype version: str + :keyword connection_name: The name of an Azure Storage Account connection where the files + should be uploaded. If not specified, the default Azure Storage Account connection will be + used. Optional. + :paramtype connection_name: str + :return: The created evaluator version. + :rtype: ~azure.ai.projects.models.EvaluatorVersion + :raises ~azure.core.exceptions.HttpResponseError: If an error occurs during the HTTP request. + """ + path_folder = Path(folder) + if not path_folder.exists(): + raise ValueError(f"The provided folder `{folder}` does not exist.") + if path_folder.is_file(): + raise ValueError("The provided path is a file, not a folder.") + + # Determine version + if not version: + version = await self._get_next_version(name) + logger.info("[upload] Auto-resolved version to '%s'.", version) + + # Get SAS URI via startPendingUpload + container_client, output_version, blob_uri = await self._start_pending_upload_and_get_container_client( + name=name, + version=version, + connection_name=connection_name, + ) + + async with container_client: + # Upload all files from the folder + files_uploaded: bool = False + for root, _, files in os.walk(folder): + for file in files: + file_path = os.path.join(root, file) + blob_name = os.path.relpath(file_path, folder).replace("\\", "/") + logger.debug( + "[upload] Start uploading file `%s` as blob `%s`.", + file_path, + blob_name, + ) + with open(file=file_path, mode="rb") as data: + await container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + logger.debug("[upload] Done uploading file") + files_uploaded = True + logger.debug("[upload] Done uploading all files.") + + if not files_uploaded: + raise ValueError("The provided folder is empty.") + + # Set the blob_uri in the evaluator version definition + if isinstance(evaluator_version, dict): + definition = evaluator_version.get("definition", {}) + if isinstance(definition, dict): + definition["blob_uri"] = blob_uri + else: + definition.blob_uri = blob_uri + else: + if hasattr(evaluator_version, "definition") and evaluator_version.definition: + evaluator_version.definition.blob_uri = blob_uri + + result = await self.create_version( + name=name, + evaluator_version=evaluator_version, + ) + + return result diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py index bc78f4d6baf8..f628dcfce670 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py @@ -12,12 +12,12 @@ from ._patch_agents import AgentsOperations from ._patch_datasets import DatasetsOperations from ._patch_evaluation_rules import EvaluationRulesOperations +from ._patch_evaluators import EvaluatorsOperations as BetaEvaluatorsOperations from ._patch_telemetry import TelemetryOperations from ._patch_connections import ConnectionsOperations from ._patch_memories import BetaMemoryStoresOperations from ._operations import ( BetaEvaluationTaxonomiesOperations, - BetaEvaluatorsOperations, BetaInsightsOperations, BetaRedTeamsOperations, BetaSchedulesOperations, @@ -50,6 +50,8 @@ class BetaOperations(GeneratedBetaOperations): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) + # Replace with patched class that includes upload() + self.evaluators = BetaEvaluatorsOperations(self._client, self._config, self._serialize, self._deserialize) # Replace with patched class that includes begin_update_memories self.memory_stores = BetaMemoryStoresOperations(self._client, self._config, self._serialize, self._deserialize) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py new file mode 100644 index 000000000000..0f5dbfd2c474 --- /dev/null +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py @@ -0,0 +1,211 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +import os +import logging +from typing import Any, IO, Tuple, Optional, Union +from pathlib import Path +from urllib.parse import urlsplit +from azure.storage.blob import ContainerClient +from azure.core.tracing.decorator import distributed_trace +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError +from ._operations import BetaEvaluatorsOperations as EvaluatorsOperationsGenerated, JSON +from ..models._models import ( + EvaluatorVersion, +) + +logger = logging.getLogger(__name__) + + +class EvaluatorsOperations(EvaluatorsOperationsGenerated): + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~azure.ai.projects.AIProjectClient`'s + :attr:`beta.evaluators` attribute. + """ + + def _start_pending_upload_and_get_container_client( + self, + name: str, + version: str, + connection_name: Optional[str] = None, + ) -> Tuple[ContainerClient, str, str]: + """Call startPendingUpload to get a SAS URI and return a ContainerClient and blob URI.""" + + request_body: dict = {} + if connection_name: + request_body["connectionName"] = connection_name + + try: + pending_upload_response = self.pending_upload( + name=name, + version=version, + pending_upload_request=request_body, + ) + except HttpResponseError as e: + if e.message and "409" in e.message: + raise ValueError( + f"Evaluator '{name}' version '{version}' already exists. " + f"Use a different version, delete the existing version with " + f"'client.beta.evaluators.delete_version(name=\"{name}\", version=\"{version}\")' " + f"before uploading, or set 'overwrite=True' in the upload call." + ) from e + raise + + # The service returns blobReferenceForConsumption + blob_ref = pending_upload_response.get("blobReferenceForConsumption") + if not blob_ref: + raise ValueError("Blob reference is not present in the pending upload response") + + credential = blob_ref.get("credential") if isinstance(blob_ref, dict) else None + if not credential: + raise ValueError("SAS credential is not present in the pending upload response") + + sas_uri = credential.get("sasUri") if isinstance(credential, dict) else None + if not sas_uri: + raise ValueError("SAS URI is missing or empty in the pending upload response") + + blob_uri = blob_ref.get("blobUri") if isinstance(blob_ref, dict) else None + if not blob_uri: + raise ValueError("Blob URI is missing or empty in the pending upload response") + + return ( + ContainerClient.from_container_url(container_url=sas_uri), + version, + blob_uri, + ) + + def _get_next_version(self, name: str) -> str: + """Get the next version number for an evaluator by fetching existing versions.""" + try: + versions = list(self.list_versions(name=name)) + if versions: + numeric_versions = [] + for v in versions: + ver = v.get("version") if isinstance(v, dict) else getattr(v, "version", None) + if ver and ver.isdigit(): + numeric_versions.append(int(ver)) + if numeric_versions: + return str(max(numeric_versions) + 1) + return "1" + except ResourceNotFoundError: + return "1" + + @distributed_trace + def upload( + self, + name: str, + evaluator_version: Union[EvaluatorVersion, JSON, IO[bytes]], + *, + folder: str, + connection_name: Optional[str] = None, + **kwargs: Any, + ) -> EvaluatorVersion: + """Upload all files in a folder to blob storage and create a code-based evaluator version + that references the uploaded code. + + This method calls startPendingUpload to get a SAS URI, uploads files from the folder + to blob storage, then creates an evaluator version referencing the uploaded blob. + + The version is automatically determined by incrementing the latest existing version. + + :param name: The name of the evaluator. Required. + :type name: str + :param evaluator_version: The evaluator version definition. This is the same object accepted + by ``create_version``. Is one of the following types: EvaluatorVersion, JSON, + IO[bytes]. Required. + :type evaluator_version: ~azure.ai.projects.models.EvaluatorVersion or JSON or IO[bytes] + :keyword folder: Path to the folder containing the evaluator Python code. Required. + :paramtype folder: str + :keyword connection_name: The name of an Azure Storage Account connection where the files + should be uploaded. If not specified, the default Azure Storage Account connection will be + used. Optional. + :paramtype connection_name: str + :return: The created evaluator version. + :rtype: ~azure.ai.projects.models.EvaluatorVersion + :raises ~azure.core.exceptions.HttpResponseError: If an error occurs during the HTTP request. + """ + path_folder = Path(folder) + if not path_folder.exists(): + raise ValueError(f"The provided folder `{folder}` does not exist.") + if path_folder.is_file(): + raise ValueError("The provided path is a file, not a folder.") + + version = self._get_next_version(name) + logger.info("[upload] Auto-resolved version to '%s'.", version) + + # Get SAS URI via startPendingUpload + container_client, output_version, blob_uri = self._start_pending_upload_and_get_container_client( + name=name, + version=version, + connection_name=connection_name, + ) + + with container_client: + # Upload all files from the folder (including nested subdirectories) + skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} + skip_extensions = {".pyc", ".pyo"} + files_uploaded: bool = False + for root, dirs, files in os.walk(folder): + # Prune directories we don't want to traverse + dirs[:] = [d for d in dirs if d not in skip_dirs] + for file in files: + if any(file.endswith(ext) for ext in skip_extensions): + continue + file_path = os.path.join(root, file) + blob_name = os.path.relpath(file_path, folder).replace("\\", "/") + logger.debug( + "[upload] Start uploading file `%s` as blob `%s`.", + file_path, + blob_name, + ) + with open(file=file_path, mode="rb") as data: + try: + container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + except HttpResponseError as e: + if e.error_code == "AuthorizationPermissionMismatch": + storage_account = urlsplit(container_client.url).hostname + raise HttpResponseError( + message=( + f"Failed to upload file '{blob_name}' to blob storage: " + f"permission denied. Ensure the identity that signed the SAS token " + f"has the 'Storage Blob Data Contributor' role on the storage account " + f"'{storage_account}'. " + f"Original error: {e.message}" + ), + response=e.response, + ) from e + raise + logger.debug("[upload] Done uploading file") + files_uploaded = True + logger.debug("[upload] Done uploading all files.") + + if not files_uploaded: + raise ValueError("The provided folder is empty.") + + # Set the blob_uri in the evaluator version definition + if isinstance(evaluator_version, dict): + definition = evaluator_version.get("definition", {}) + if isinstance(definition, dict): + definition["blob_uri"] = blob_uri + else: + definition.blob_uri = blob_uri + else: + if hasattr(evaluator_version, "definition") and evaluator_version.definition: + evaluator_version.definition.blob_uri = blob_uri + + result = self.create_version( + name=name, + evaluator_version=evaluator_version, + ) + + return result diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py new file mode 100644 index 000000000000..1fa95ab19b1d --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py @@ -0,0 +1,14 @@ +"""Custom evaluator that measures the length of a response.""" + + +class AnswerLengthEvaluator: + def __init__(self, *, model_config): + self.model_config = model_config + + def __call__(self, *args, **kwargs): + return {"result": evaluate_answer_length(kwargs.get("response"))} + + +def evaluate_answer_length(answer: str): + return len(answer) + diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/__init__.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py new file mode 100644 index 000000000000..7499261ba7c6 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py @@ -0,0 +1,72 @@ +"""Utility functions for custom evaluators.""" + +FRIENDLINESS_SYSTEM_PROMPT = """You are an expert evaluator that assesses how friendly, warm, and approachable +a response is. You evaluate responses on a scale of 1 to 5 based on the following criteria: + +Score 1 (Very Unfriendly): The response is cold, dismissive, rude, or hostile. +Score 2 (Unfriendly): The response is curt, impersonal, or lacks warmth. +Score 3 (Neutral): The response is acceptable but neither particularly friendly nor unfriendly. +Score 4 (Friendly): The response is warm, polite, and shows genuine interest in helping. +Score 5 (Very Friendly): The response is exceptionally warm, encouraging, empathetic, and makes the user feel valued. + +You MUST respond in the following JSON format only: +{ + "score": , + "label": "", + "reason": "", + "explanation": "" +} + +A score of 3 or above is considered "Pass", below 3 is "Fail". +""" + + +def build_evaluation_messages(query: str, response: str) -> list: + """Build the messages list for the LLM evaluation call. + + :param query: The original user query. + :param response: The response to evaluate for friendliness. + :return: A list of message dicts for the chat completion API. + """ + return [ + {"role": "system", "content": FRIENDLINESS_SYSTEM_PROMPT}, + { + "role": "user", + "content": ( + f"Please evaluate the friendliness of the following response.\n\n" + f"Original query: {query}\n\n" + f"Response to evaluate: {response}" + ), + }, + ] + + +def parse_evaluation_result(raw_result: str) -> dict: + """Parse the LLM's JSON response into a structured evaluation result. + + :param raw_result: The raw string output from the LLM. + :return: A dict with score, label, reason, and explanation. + """ + import json + + try: + # Try to extract JSON from the response (handle markdown code blocks) + text = raw_result.strip() + if text.startswith("```"): + text = text.split("\n", 1)[1] if "\n" in text else text[3:] + text = text.rsplit("```", 1)[0] + result = json.loads(text.strip()) + score = int(result.get("score", 3)) + return { + "score": max(1, min(5, score)), + "label": result.get("label", "Pass" if score >= 3 else "Fail"), + "reason": result.get("reason", "No reason provided"), + "explanation": result.get("explanation", "No explanation provided"), + } + except (json.JSONDecodeError, ValueError, KeyError): + return { + "score": 3, + "label": "Pass", + "reason": "Could not parse LLM response", + "explanation": f"Raw LLM output: {raw_result}", + } diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py new file mode 100644 index 000000000000..c58ff350ba25 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py @@ -0,0 +1,62 @@ +"""Custom evaluator that uses an LLM to assess the friendliness of a response.""" + +from openai import AzureOpenAI +from common_util.util import build_evaluation_messages, parse_evaluation_result + + +class FriendlyEvaluator: + """Evaluates how friendly and approachable a response is using an LLM judge. + + This evaluator sends the query and response to an LLM, which returns a + friendliness score (1-5), a pass/fail label, a reason, and a detailed explanation. + + :param model_config: A dict containing Azure OpenAI connection info. Expected keys: + - azure_endpoint: The Azure OpenAI endpoint URL. + - azure_deployment: The deployment/model name. + - api_version: The API version (default: "2024-06-01"). + - api_key: (Optional) The API key. If not provided, DefaultAzureCredential is used. + """ + + def __init__(self, *, model_config: dict): + self.model_config = model_config + api_key = model_config.get("api_key") + + if api_key: + self.client = AzureOpenAI( + azure_endpoint=model_config["azure_endpoint"], + api_key=api_key, + api_version=model_config.get("api_version", "2024-06-01"), + ) + else: + from azure.identity import DefaultAzureCredential, get_bearer_token_provider + + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default", + ) + self.client = AzureOpenAI( + azure_endpoint=model_config["azure_endpoint"], + azure_ad_token_provider=token_provider, + api_version=model_config.get("api_version", "2024-06-01"), + ) + + self.deployment = model_config["azure_deployment"] + + def __call__(self, *, query: str, response: str, **kwargs) -> dict: + """Evaluate the friendliness of a response. + + :param query: The original user query. + :param response: The response to evaluate. + :return: A dict with score, label, reason, and explanation. + """ + messages = build_evaluation_messages(query, response) + + completion = self.client.chat.completions.create( + model=self.deployment, + messages=messages, + temperature=0.0, + max_tokens=500, + ) + + raw_result = completion.choices[0].message.content + return parse_evaluation_result(raw_result) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py new file mode 100644 index 000000000000..96a551c8b087 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -0,0 +1,127 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to upload a local + folder containing custom evaluator Python code and register it as a + code-based evaluator version using the `evaluators.upload()` method. + It then calls getCredentials to verify access to the uploaded blob storage. + +USAGE: + python sample_eval_upload_custom_evaluator.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. +""" + +import os +from pathlib import Path +from pprint import pprint +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import ( + CodeBasedEvaluatorDefinition, + EvaluatorCategory, + EvaluatorCredentialRequest, + EvaluatorMetric, + EvaluatorMetricType, + EvaluatorMetricDirection, + EvaluatorType, + EvaluatorVersion, +) + +from dotenv import load_dotenv + +load_dotenv() + +endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] + +# The folder containing the custom evaluator code, relative to this sample file. +local_upload_folder = str(Path(__file__).parent / "custom_evaluator") + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, +): + # --------------------------------------------------------------- + # 1. Upload evaluator code and create evaluator version + # upload() internally calls startPendingUpload to get a SAS URI, + # uploads the folder contents to blob storage, then creates the + # evaluator version with the blob URI. + # --------------------------------------------------------------- + evaluator_version = EvaluatorVersion( + evaluator_type=EvaluatorType.CUSTOM, + categories=[EvaluatorCategory.QUALITY], + display_name="Answer Length Evaluator 2", + description="Custom evaluator to calculate length of content", + definition=CodeBasedEvaluatorDefinition( + entry_point="answer_length_evaluator:AnswerLengthEvaluator", + init_parameters={ + "type": "object", + "properties": {"model_config": {"type": "string"}}, + "required": ["model_config"], + }, + data_schema={ + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + metrics={ + "score": EvaluatorMetric( + type=EvaluatorMetricType.ORDINAL, + desirable_direction=EvaluatorMetricDirection.INCREASE, + min_value=1, + max_value=5, + ) + }, + ), + ) + + print("Uploading custom evaluator code and creating evaluator version...") + code_evaluator = project_client.beta.evaluators.upload( + name="answer_length_evaluator_2", + evaluator_version=evaluator_version, + folder=local_upload_folder, + overwrite=True, + ) + + print(f"Evaluator created: name={code_evaluator.name}, version={code_evaluator.version}") + print(f"Evaluator ID: {code_evaluator.id}") + pprint(code_evaluator) + + # --------------------------------------------------------------- + # 2. Call getCredentials to verify access to the uploaded blob + # --------------------------------------------------------------- + blob_uri = code_evaluator["definition"]["blob_uri"] + print(f"\nCalling getCredentials with blob_uri: {blob_uri}") + + credential_response = project_client.beta.evaluators.get_credentials( + name=code_evaluator.name, + version=code_evaluator.version, + credential_request=EvaluatorCredentialRequest(blob_uri=blob_uri), + ) + + print("GetCredentials response:") + pprint(credential_response) + + # --------------------------------------------------------------- + # 3. Cleanup: delete the evaluator version + # --------------------------------------------------------------- + print("\nCleaning up - deleting the created evaluator version...") + # project_client.beta.evaluators.delete_version( + # name=code_evaluator.name, + # version=code_evaluator.version, + # ) + print("Done - upload and getCredentials verified successfully.") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py new file mode 100644 index 000000000000..63387a018fca --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -0,0 +1,145 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to upload a custom + LLM-based evaluator (FriendlyEvaluator) that uses the common_util helper + module. The evaluator calls Azure OpenAI to judge the friendliness of a + response and returns score, label, reason, and explanation. + + This proves that the upload() API can handle nested folder structures + (common_util/util.py is uploaded alongside friendly_evaluator.py). + +USAGE: + python sample_eval_upload_friendly_evaluator.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity openai + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint. +""" + +import os +from pathlib import Path +from pprint import pprint +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import ( + CodeBasedEvaluatorDefinition, + EvaluatorCategory, + EvaluatorCredentialRequest, + EvaluatorMetric, + EvaluatorMetricType, + EvaluatorMetricDirection, + EvaluatorType, + EvaluatorVersion, +) + +from dotenv import load_dotenv + +load_dotenv() + +endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] + +# The folder containing the evaluator code, including common_util/ subfolder +local_upload_folder = str(Path(__file__).parent / "custom_evaluator") + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, +): + # --------------------------------------------------------------- + # 1. Upload evaluator code and create evaluator version + # The folder structure uploaded is: + # custom_evaluator/ + # friendly_evaluator.py <- entry point + # common_util/ + # __init__.py + # util.py <- helper functions + # answer_length_evaluator.py <- (also uploaded, ignored by this evaluator) + # --------------------------------------------------------------- + evaluator_version = EvaluatorVersion( + evaluator_type=EvaluatorType.CUSTOM, + categories=[EvaluatorCategory.QUALITY], + display_name="Friendliness Evaluator 3", + description="LLM-based evaluator that scores how friendly a response is (1-5)", + definition=CodeBasedEvaluatorDefinition( + entry_point="friendly_evaluator:FriendlyEvaluator", + init_parameters={ + "type": "object", + "properties": { + "model_config": { + "type": "object", + "description": "Azure OpenAI configuration for the LLM judge", + "properties": { + "azure_endpoint": {"type": "string"}, + "azure_deployment": {"type": "string"}, + "api_version": {"type": "string"}, + "api_key": {"type": "string"}, + }, + "required": ["azure_endpoint", "azure_deployment"], + } + }, + "required": ["model_config"], + }, + data_schema={ + "type": "object", + "properties": { + "query": {"type": "string", "description": "The original user query"}, + "response": {"type": "string", "description": "The response to evaluate for friendliness"}, + }, + "required": ["query", "response"], + }, + metrics={ + "score": EvaluatorMetric( + type=EvaluatorMetricType.ORDINAL, + desirable_direction=EvaluatorMetricDirection.INCREASE, + min_value=1, + max_value=5, + ) + }, + ), + ) + + print("Uploading FriendlyEvaluator (with nested common_util folder)...") + friendly_evaluator = project_client.beta.evaluators.upload( + name="friendly_evaluator_3", + evaluator_version=evaluator_version, + folder=local_upload_folder, + overwrite=True, + ) + + print(f"\nEvaluator created: name={friendly_evaluator.name}, version={friendly_evaluator.version}") + print(f"Evaluator ID: {friendly_evaluator.id}") + pprint(friendly_evaluator) + + # --------------------------------------------------------------- + # 2. Call getCredentials to verify blob storage access + # --------------------------------------------------------------- + blob_uri = friendly_evaluator["definition"]["blob_uri"] + print(f"\nCalling getCredentials with blob_uri: {blob_uri}") + + credential_response = project_client.beta.evaluators.get_credentials( + name=friendly_evaluator.name, + version=friendly_evaluator.version, + credential_request=EvaluatorCredentialRequest(blob_uri=blob_uri), + ) + + print("GetCredentials response:") + pprint(credential_response) + + # --------------------------------------------------------------- + # 3. Cleanup: delete the evaluator version + # --------------------------------------------------------------- + print("\nCleaning up - deleting the created evaluator version...") + # project_client.beta.evaluators.delete_version( + # name=friendly_evaluator.name, + # version=friendly_evaluator.version, + # ) + print("Done - FriendlyEvaluator upload with nested folders verified successfully.") From 3c542f59d283b8594cc6f04523ac8e1bf276ddb3 Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:07:46 -0700 Subject: [PATCH 2/7] Adding-Upload-Evaluator --- .../evaluations/sample_eval_upload_custom_evaluator.py | 4 ++-- .../evaluations/sample_eval_upload_friendly_evaluator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py index 96a551c8b087..b58e22543946 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -61,7 +61,7 @@ evaluator_version = EvaluatorVersion( evaluator_type=EvaluatorType.CUSTOM, categories=[EvaluatorCategory.QUALITY], - display_name="Answer Length Evaluator 2", + display_name="Answer Length Evaluator", description="Custom evaluator to calculate length of content", definition=CodeBasedEvaluatorDefinition( entry_point="answer_length_evaluator:AnswerLengthEvaluator", @@ -91,7 +91,7 @@ print("Uploading custom evaluator code and creating evaluator version...") code_evaluator = project_client.beta.evaluators.upload( - name="answer_length_evaluator_2", + name="answer_length_evaluator", evaluator_version=evaluator_version, folder=local_upload_folder, overwrite=True, diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py index 63387a018fca..939b6f40f323 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -67,7 +67,7 @@ evaluator_version = EvaluatorVersion( evaluator_type=EvaluatorType.CUSTOM, categories=[EvaluatorCategory.QUALITY], - display_name="Friendliness Evaluator 3", + display_name="Friendliness Evaluator", description="LLM-based evaluator that scores how friendly a response is (1-5)", definition=CodeBasedEvaluatorDefinition( entry_point="friendly_evaluator:FriendlyEvaluator", @@ -109,7 +109,7 @@ print("Uploading FriendlyEvaluator (with nested common_util folder)...") friendly_evaluator = project_client.beta.evaluators.upload( - name="friendly_evaluator_3", + name="friendly_evaluator", evaluator_version=evaluator_version, folder=local_upload_folder, overwrite=True, From 42c327524c1ba9be08ec2254dbce6aa87125f66d Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:19:03 -0700 Subject: [PATCH 3/7] Adding-Upload-Evaluator --- .../projects/operations/_patch_evaluators.py | 20 +++++-------------- .../answer_length_evaluator.py | 0 .../common_util/__init__.py | 0 .../common_util/util.py | 0 .../friendly_evaluator.py | 0 .../sample_eval_upload_custom_evaluator.py | 4 ++-- .../sample_eval_upload_friendly_evaluator.py | 7 +++---- 7 files changed, 10 insertions(+), 21 deletions(-) rename sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/{ => answer_length_evaluator}/answer_length_evaluator.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/{ => friendly_evaluator}/common_util/__init__.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/{ => friendly_evaluator}/common_util/util.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/{ => friendly_evaluator}/friendly_evaluator.py (100%) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py index 0f5dbfd2c474..b6ebb813992d 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py @@ -45,21 +45,11 @@ def _start_pending_upload_and_get_container_client( if connection_name: request_body["connectionName"] = connection_name - try: - pending_upload_response = self.pending_upload( - name=name, - version=version, - pending_upload_request=request_body, - ) - except HttpResponseError as e: - if e.message and "409" in e.message: - raise ValueError( - f"Evaluator '{name}' version '{version}' already exists. " - f"Use a different version, delete the existing version with " - f"'client.beta.evaluators.delete_version(name=\"{name}\", version=\"{version}\")' " - f"before uploading, or set 'overwrite=True' in the upload call." - ) from e - raise + pending_upload_response = self.pending_upload( + name=name, + version=version, + pending_upload_request=request_body, + ) # The service returns blobReferenceForConsumption blob_ref = pending_upload_response.get("blobReferenceForConsumption") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator/answer_length_evaluator.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator/answer_length_evaluator.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/__init__.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/__init__.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/__init__.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/__init__.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/util.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/common_util/util.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/util.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/friendly_evaluator.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/friendly_evaluator.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py index b58e22543946..a87bb1b7c037 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -45,8 +45,8 @@ endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] -# The folder containing the custom evaluator code, relative to this sample file. -local_upload_folder = str(Path(__file__).parent / "custom_evaluator") +# The folder containing the AnswerLength evaluator code, relative to this sample file. +local_upload_folder = str(Path(__file__).parent / "custom_evaluator" / "answer_length_evaluator") with ( DefaultAzureCredential() as credential, diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py index 939b6f40f323..b49e3038f027 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -47,8 +47,8 @@ endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] -# The folder containing the evaluator code, including common_util/ subfolder -local_upload_folder = str(Path(__file__).parent / "custom_evaluator") +# The folder containing the FriendlyEvaluator code, including common_util/ subfolder +local_upload_folder = str(Path(__file__).parent / "custom_evaluator" / "friendly_evaluator") with ( DefaultAzureCredential() as credential, @@ -57,12 +57,11 @@ # --------------------------------------------------------------- # 1. Upload evaluator code and create evaluator version # The folder structure uploaded is: - # custom_evaluator/ + # friendly_evaluator/ # friendly_evaluator.py <- entry point # common_util/ # __init__.py # util.py <- helper functions - # answer_length_evaluator.py <- (also uploaded, ignored by this evaluator) # --------------------------------------------------------------- evaluator_version = EvaluatorVersion( evaluator_type=EvaluatorType.CUSTOM, From a6285fde7b6a59dbc758ec3f07e39e6a22efb43d Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:35:01 -0700 Subject: [PATCH 4/7] Adding-Upload-Evaluator-aio --- .../aio/operations/_patch_evaluators_async.py | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py index aa37adaa48ca..966e377d68a1 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py @@ -11,9 +11,10 @@ import logging from typing import Any, IO, Tuple, Optional, Union from pathlib import Path +from urllib.parse import urlsplit from azure.storage.blob.aio import ContainerClient from azure.core.tracing.decorator_async import distributed_trace_async -from azure.core.exceptions import ResourceNotFoundError +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError from ._operations import BetaEvaluatorsOperations as EvaluatorsOperationsGenerated, JSON from ...models._models import ( EvaluatorVersion, @@ -98,7 +99,6 @@ async def upload( evaluator_version: Union[EvaluatorVersion, JSON, IO[bytes]], *, folder: str, - version: Optional[str] = None, connection_name: Optional[str] = None, **kwargs: Any, ) -> EvaluatorVersion: @@ -108,7 +108,7 @@ async def upload( This method calls startPendingUpload to get a SAS URI, uploads files from the folder to blob storage, then creates an evaluator version referencing the uploaded blob. - If no version is provided, the method will auto-increment based on existing versions. + The version is automatically determined by incrementing the latest existing version. :param name: The name of the evaluator. Required. :type name: str @@ -118,9 +118,6 @@ async def upload( :type evaluator_version: ~azure.ai.projects.models.EvaluatorVersion or JSON or IO[bytes] :keyword folder: Path to the folder containing the evaluator Python code. Required. :paramtype folder: str - :keyword version: The version identifier for the evaluator. If not provided, will - auto-increment from the latest existing version. Optional. - :paramtype version: str :keyword connection_name: The name of an Azure Storage Account connection where the files should be uploaded. If not specified, the default Azure Storage Account connection will be used. Optional. @@ -135,10 +132,8 @@ async def upload( if path_folder.is_file(): raise ValueError("The provided path is a file, not a folder.") - # Determine version - if not version: - version = await self._get_next_version(name) - logger.info("[upload] Auto-resolved version to '%s'.", version) + version = await self._get_next_version(name) + logger.info("[upload] Auto-resolved version to '%s'.", version) # Get SAS URI via startPendingUpload container_client, output_version, blob_uri = await self._start_pending_upload_and_get_container_client( @@ -148,10 +143,16 @@ async def upload( ) async with container_client: - # Upload all files from the folder + # Upload all files from the folder (including nested subdirectories) + skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} + skip_extensions = {".pyc", ".pyo"} files_uploaded: bool = False - for root, _, files in os.walk(folder): + for root, dirs, files in os.walk(folder): + # Prune directories we don't want to traverse + dirs[:] = [d for d in dirs if d not in skip_dirs] for file in files: + if any(file.endswith(ext) for ext in skip_extensions): + continue file_path = os.path.join(root, file) blob_name = os.path.relpath(file_path, folder).replace("\\", "/") logger.debug( @@ -160,7 +161,22 @@ async def upload( blob_name, ) with open(file=file_path, mode="rb") as data: - await container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + try: + await container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + except HttpResponseError as e: + if e.error_code == "AuthorizationPermissionMismatch": + storage_account = urlsplit(container_client.url).hostname + raise HttpResponseError( + message=( + f"Failed to upload file '{blob_name}' to blob storage: " + f"permission denied. Ensure the identity that signed the SAS token " + f"has the 'Storage Blob Data Contributor' role on the storage account " + f"'{storage_account}'. " + f"Original error: {e.message}" + ), + response=e.response, + ) from e + raise logger.debug("[upload] Done uploading file") files_uploaded = True logger.debug("[upload] Done uploading all files.") From d311a7a03eaaa871c58c203706612effe4d8b481 Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 23:13:53 -0700 Subject: [PATCH 5/7] rename --- .../answer_length_evaluator/answer_length_evaluator.py | 0 .../friendly_evaluator/common_util/__init__.py | 0 .../friendly_evaluator/common_util/util.py | 0 .../friendly_evaluator/friendly_evaluator.py | 0 .../samples/evaluations/sample_eval_upload_custom_evaluator.py | 2 +- .../evaluations/sample_eval_upload_friendly_evaluator.py | 2 +- 6 files changed, 2 insertions(+), 2 deletions(-) rename sdk/ai/azure-ai-projects/samples/evaluations/{custom_evaluator => custom_evaluators}/answer_length_evaluator/answer_length_evaluator.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/{custom_evaluator => custom_evaluators}/friendly_evaluator/common_util/__init__.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/{custom_evaluator => custom_evaluators}/friendly_evaluator/common_util/util.py (100%) rename sdk/ai/azure-ai-projects/samples/evaluations/{custom_evaluator => custom_evaluators}/friendly_evaluator/friendly_evaluator.py (100%) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator/answer_length_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/answer_length_evaluator/answer_length_evaluator.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/__init__.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/__init__.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/__init__.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/__init__.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/util.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/common_util/util.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py similarity index 100% rename from sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluator/friendly_evaluator/friendly_evaluator.py rename to sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py index a87bb1b7c037..fb1592d0d8ff 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -46,7 +46,7 @@ endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] # The folder containing the AnswerLength evaluator code, relative to this sample file. -local_upload_folder = str(Path(__file__).parent / "custom_evaluator" / "answer_length_evaluator") +local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "answer_length_evaluator") with ( DefaultAzureCredential() as credential, diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py index b49e3038f027..6d9b6910160c 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -48,7 +48,7 @@ endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] # The folder containing the FriendlyEvaluator code, including common_util/ subfolder -local_upload_folder = str(Path(__file__).parent / "custom_evaluator" / "friendly_evaluator") +local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator") with ( DefaultAzureCredential() as credential, From 53a946d725db1ac0764b6f350e0f32eaed037232 Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Thu, 12 Mar 2026 23:53:13 -0700 Subject: [PATCH 6/7] added - eval and eval run --- .../sample_eval_upload_custom_evaluator.py | 143 ++++++++++++++--- .../sample_eval_upload_friendly_evaluator.py | 148 +++++++++++++++--- 2 files changed, 244 insertions(+), 47 deletions(-) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py index fb1592d0d8ff..6a555ae5a3cd 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -6,32 +6,44 @@ """ DESCRIPTION: - Given an AIProjectClient, this sample demonstrates how to upload a local - folder containing custom evaluator Python code and register it as a - code-based evaluator version using the `evaluators.upload()` method. - It then calls getCredentials to verify access to the uploaded blob storage. + Given an AIProjectClient, this sample demonstrates how to: + 1. Upload a local folder containing custom evaluator Python code and + register it as a code-based evaluator version using `evaluators.upload()`. + 2. Create an evaluation (eval) that references the uploaded evaluator. + 3. Run the evaluation with inline data and poll for results. USAGE: python sample_eval_upload_custom_evaluator.py Before running the sample: - pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity + pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity openai Set these environment variables with your own values: - 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + 1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) FOUNDRY_MODEL_NAME - Optional. The name of the model deployment to use for evaluation. """ import os +import time +import random +import string from pathlib import Path from pprint import pprint + +from dotenv import load_dotenv +from openai.types.evals.create_eval_jsonl_run_data_source_param import ( + CreateEvalJSONLRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, +) +from openai.types.eval_create_params import DataSourceConfigCustom from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient from azure.ai.projects.models import ( CodeBasedEvaluatorDefinition, EvaluatorCategory, - EvaluatorCredentialRequest, EvaluatorMetric, EvaluatorMetricType, EvaluatorMetricDirection, @@ -39,11 +51,12 @@ EvaluatorVersion, ) -from dotenv import load_dotenv - load_dotenv() -endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] +endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] +model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") +suffix = "".join(random.choices(string.ascii_lowercase, k=5)) +evaluator_name = f"answer_length_evaluator_{suffix}" # The folder containing the AnswerLength evaluator code, relative to this sample file. local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "answer_length_evaluator") @@ -51,6 +64,7 @@ with ( DefaultAzureCredential() as credential, AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, ): # --------------------------------------------------------------- # 1. Upload evaluator code and create evaluator version @@ -91,7 +105,7 @@ print("Uploading custom evaluator code and creating evaluator version...") code_evaluator = project_client.beta.evaluators.upload( - name="answer_length_evaluator", + name=evaluator_name, evaluator_version=evaluator_version, folder=local_upload_folder, overwrite=True, @@ -102,26 +116,109 @@ pprint(code_evaluator) # --------------------------------------------------------------- - # 2. Call getCredentials to verify access to the uploaded blob + # 2. Create an evaluation referencing the uploaded evaluator # --------------------------------------------------------------- - blob_uri = code_evaluator["definition"]["blob_uri"] - print(f"\nCalling getCredentials with blob_uri: {blob_uri}") + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + "include_sample_schema": True, + } + ) - credential_response = project_client.beta.evaluators.get_credentials( - name=code_evaluator.name, - version=code_evaluator.version, - credential_request=EvaluatorCredentialRequest(blob_uri=blob_uri), + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": evaluator_name, + "evaluator_name": evaluator_name, + "initialization_parameters": { + "model_config": f"{model_deployment_name}", + }, + } + ] + + print("\nCreating evaluation...") + eval_object = client.evals.create( + name=f"Answer Length Evaluation - {suffix}", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") - print("GetCredentials response:") - pprint(credential_response) + # --------------------------------------------------------------- + # 3. Run the evaluation with inline data + # --------------------------------------------------------------- + print("\nCreating evaluation run with inline data...") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name=f"Answer Length Eval Run - {suffix}", + metadata={"team": "eval-exp", "scenario": "answer-length-v1"}, + data_source=CreateEvalJSONLRunDataSourceParam( + type="jsonl", + source=SourceFileContent( + type="file_content", + content=[ + SourceFileContentContent( + item={ + "query": "What is the capital of France?", + "response": "Paris", + } + ), + SourceFileContentContent( + item={ + "query": "Explain quantum computing", + "response": "Quantum computing leverages quantum mechanical phenomena like superposition and entanglement to process information in fundamentally different ways than classical computers.", + } + ), + SourceFileContentContent( + item={ + "query": "What is AI?", + "response": "AI stands for Artificial Intelligence. It is a branch of computer science that aims to create intelligent machines that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation.", + } + ), + SourceFileContentContent( + item={ + "query": "Say hello", + "response": "Hi!", + } + ), + ], + ), + ), + ) + + print(f"Evaluation run created (id: {eval_run_object.id})") + pprint(eval_run_object) + + # --------------------------------------------------------------- + # 4. Poll for evaluation run completion + # --------------------------------------------------------------- + while True: + run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + if run.status in ("completed", "failed"): + print(f"\nEvaluation run finished with status: {run.status}") + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"\nEvaluation run Report URL: {run.report_url}") + break + time.sleep(5) + print("Waiting for evaluation run to complete...") # --------------------------------------------------------------- - # 3. Cleanup: delete the evaluator version + # 5. Cleanup (uncomment to delete) # --------------------------------------------------------------- - print("\nCleaning up - deleting the created evaluator version...") + # print("\nCleaning up...") # project_client.beta.evaluators.delete_version( # name=code_evaluator.name, # version=code_evaluator.version, # ) - print("Done - upload and getCredentials verified successfully.") + # client.evals.delete(eval_id=eval_object.id) + # print("Cleanup done.") + print("\nDone - upload, eval creation, and eval run verified successfully.") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py index 6d9b6910160c..b8e00ffdeb7d 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -6,13 +6,14 @@ """ DESCRIPTION: - Given an AIProjectClient, this sample demonstrates how to upload a custom - LLM-based evaluator (FriendlyEvaluator) that uses the common_util helper - module. The evaluator calls Azure OpenAI to judge the friendliness of a - response and returns score, label, reason, and explanation. + Given an AIProjectClient, this sample demonstrates how to: + 1. Upload a custom LLM-based evaluator (FriendlyEvaluator) with nested + folder structure (common_util/) using `evaluators.upload()`. + 2. Create an evaluation (eval) that references the uploaded evaluator. + 3. Run the evaluation with inline data and poll for results. - This proves that the upload() API can handle nested folder structures - (common_util/util.py is uploaded alongside friendly_evaluator.py). + The FriendlyEvaluator calls Azure OpenAI to judge the friendliness of a + response and returns score, label, reason, and explanation. USAGE: python sample_eval_upload_friendly_evaluator.py @@ -22,18 +23,29 @@ pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity openai Set these environment variables with your own values: - 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint. + 1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint. + 2) FOUNDRY_MODEL_NAME - Optional. The name of the model deployment to use for evaluation. """ import os +import time +import random +import string from pathlib import Path from pprint import pprint + +from dotenv import load_dotenv +from openai.types.evals.create_eval_jsonl_run_data_source_param import ( + CreateEvalJSONLRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, +) +from openai.types.eval_create_params import DataSourceConfigCustom from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient from azure.ai.projects.models import ( CodeBasedEvaluatorDefinition, EvaluatorCategory, - EvaluatorCredentialRequest, EvaluatorMetric, EvaluatorMetricType, EvaluatorMetricDirection, @@ -41,11 +53,12 @@ EvaluatorVersion, ) -from dotenv import load_dotenv - load_dotenv() -endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] +endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] +model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") +suffix = "".join(random.choices(string.ascii_lowercase, k=5)) +evaluator_name = f"friendly_evaluator_{suffix}" # The folder containing the FriendlyEvaluator code, including common_util/ subfolder local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator") @@ -53,6 +66,7 @@ with ( DefaultAzureCredential() as credential, AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, ): # --------------------------------------------------------------- # 1. Upload evaluator code and create evaluator version @@ -108,7 +122,7 @@ print("Uploading FriendlyEvaluator (with nested common_util folder)...") friendly_evaluator = project_client.beta.evaluators.upload( - name="friendly_evaluator", + name=evaluator_name, evaluator_version=evaluator_version, folder=local_upload_folder, overwrite=True, @@ -119,26 +133,112 @@ pprint(friendly_evaluator) # --------------------------------------------------------------- - # 2. Call getCredentials to verify blob storage access + # 2. Create an evaluation referencing the uploaded evaluator # --------------------------------------------------------------- - blob_uri = friendly_evaluator["definition"]["blob_uri"] - print(f"\nCalling getCredentials with blob_uri: {blob_uri}") + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + "include_sample_schema": True, + } + ) - credential_response = project_client.beta.evaluators.get_credentials( - name=friendly_evaluator.name, - version=friendly_evaluator.version, - credential_request=EvaluatorCredentialRequest(blob_uri=blob_uri), + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": evaluator_name, + "evaluator_name": evaluator_name, + "initialization_parameters": { + "model_config": { + "azure_endpoint": endpoint, + "azure_deployment": f"{model_deployment_name}", + }, + }, + } + ] + + print("\nCreating evaluation...") + eval_object = client.evals.create( + name=f"Friendliness Evaluation - {suffix}", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") - print("GetCredentials response:") - pprint(credential_response) + # --------------------------------------------------------------- + # 3. Run the evaluation with inline data + # --------------------------------------------------------------- + print("\nCreating evaluation run with inline data...") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name=f"Friendliness Eval Run - {suffix}", + metadata={"team": "eval-exp", "scenario": "friendliness-v1"}, + data_source=CreateEvalJSONLRunDataSourceParam( + type="jsonl", + source=SourceFileContent( + type="file_content", + content=[ + SourceFileContentContent( + item={ + "query": "How do I reset my password?", + "response": "Go to settings and click reset. That's it.", + } + ), + SourceFileContentContent( + item={ + "query": "I'm having trouble with my account", + "response": "I'm really sorry to hear you're having trouble! I'd love to help you get this sorted out. Could you tell me a bit more about what's happening so I can assist you better?", + } + ), + SourceFileContentContent( + item={ + "query": "Can you help me?", + "response": "Read the docs.", + } + ), + SourceFileContentContent( + item={ + "query": "What's the weather like today?", + "response": "Great question! While I'm not a weather service, I'd be happy to suggest some wonderful weather apps that can give you accurate forecasts. Would you like some recommendations? 😊", + } + ), + ], + ), + ), + ) + + print(f"Evaluation run created (id: {eval_run_object.id})") + pprint(eval_run_object) + + # --------------------------------------------------------------- + # 4. Poll for evaluation run completion + # --------------------------------------------------------------- + while True: + run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + if run.status in ("completed", "failed"): + print(f"\nEvaluation run finished with status: {run.status}") + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"\nEvaluation run Report URL: {run.report_url}") + break + time.sleep(5) + print("Waiting for evaluation run to complete...") # --------------------------------------------------------------- - # 3. Cleanup: delete the evaluator version + # 5. Cleanup (uncomment to delete) # --------------------------------------------------------------- - print("\nCleaning up - deleting the created evaluator version...") + # print("\nCleaning up...") # project_client.beta.evaluators.delete_version( # name=friendly_evaluator.name, # version=friendly_evaluator.version, # ) - print("Done - FriendlyEvaluator upload with nested folders verified successfully.") + # client.evals.delete(eval_id=eval_object.id) + # print("Cleanup done.") + print("\nDone - FriendlyEvaluator upload, eval creation, and eval run verified successfully.") From e92e234198cb341936585437c81714bec117a4de Mon Sep 17 00:00:00 2001 From: Waqas Javed <7674577+w-javed@users.noreply.github.com> Date: Fri, 13 Mar 2026 00:38:11 -0700 Subject: [PATCH 7/7] fix --- .../sample_eval_upload_custom_evaluator.py | 4 ++-- .../sample_eval_upload_friendly_evaluator.py | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py index 6a555ae5a3cd..d8326f8d2c33 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -55,8 +55,6 @@ endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") -suffix = "".join(random.choices(string.ascii_lowercase, k=5)) -evaluator_name = f"answer_length_evaluator_{suffix}" # The folder containing the AnswerLength evaluator code, relative to this sample file. local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "answer_length_evaluator") @@ -72,6 +70,8 @@ # uploads the folder contents to blob storage, then creates the # evaluator version with the blob URI. # --------------------------------------------------------------- + suffix = "".join(random.choices(string.ascii_lowercase, k=5)) + evaluator_name = f"answer_length_evaluator_{suffix}" evaluator_version = EvaluatorVersion( evaluator_type=EvaluatorType.CUSTOM, categories=[EvaluatorCategory.QUALITY], diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py index b8e00ffdeb7d..bcea7e6aff42 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -57,8 +57,8 @@ endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") -suffix = "".join(random.choices(string.ascii_lowercase, k=5)) -evaluator_name = f"friendly_evaluator_{suffix}" +azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] +azure_openai_api_key = os.environ["AZURE_OPENAI_API_KEY"] # The folder containing the FriendlyEvaluator code, including common_util/ subfolder local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator") @@ -77,6 +77,9 @@ # __init__.py # util.py <- helper functions # --------------------------------------------------------------- + suffix = "".join(random.choices(string.ascii_lowercase, k=5)) + evaluator_name = f"friendly_evaluator_{suffix}" + evaluator_version = EvaluatorVersion( evaluator_type=EvaluatorType.CUSTOM, categories=[EvaluatorCategory.QUALITY], @@ -92,11 +95,10 @@ "description": "Azure OpenAI configuration for the LLM judge", "properties": { "azure_endpoint": {"type": "string"}, - "azure_deployment": {"type": "string"}, "api_version": {"type": "string"}, "api_key": {"type": "string"}, }, - "required": ["azure_endpoint", "azure_deployment"], + "required": ["azure_endpoint", "api_key"], } }, "required": ["model_config"], @@ -157,8 +159,9 @@ "evaluator_name": evaluator_name, "initialization_parameters": { "model_config": { - "azure_endpoint": endpoint, - "azure_deployment": f"{model_deployment_name}", + "azure_endpoint": azure_openai_endpoint, + "api_key": f"{azure_openai_api_key}", + "api_version": "2024-06-01", }, }, }