diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 1e40ff4b..c82eca85 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -4,5 +4,5 @@ FROM mcr.microsoft.com/devcontainers/${IMAGE} ENV PYTHONUNBUFFERED 1 RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ - && apt-get -y install --no-install-recommends postgresql-client \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* \ No newline at end of file + && apt-get -y install --no-install-recommends postgresql-client zstd \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4c9a9b6e..006422d3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -29,16 +29,13 @@ "extensions": [ "ms-python.python", "ms-python.vscode-pylance", - "ms-python.vscode-python-envs", "charliermarsh.ruff", "mtxr.sqltools", "mtxr.sqltools-driver-pg", "esbenp.prettier-vscode", "mechatroner.rainbow-csv", "ms-vscode.vscode-node-azure-pack", - "esbenp.prettier-vscode", "twixes.pypi-assistant", - "ms-python.vscode-python-envs", "teamsdevapp.vscode-ai-foundry", "ms-windows-ai-studio.windows-ai-studio" ], diff --git a/.env.sample b/.env.sample index 44517fd1..6fee2d2f 100644 --- a/.env.sample +++ b/.env.sample @@ -5,16 +5,15 @@ POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=postgres POSTGRES_SSL=disable -# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github: +# OPENAI_CHAT_HOST can be either azure, openai, or ollama: OPENAI_CHAT_HOST=azure -# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github: +# OPENAI_EMBED_HOST can be either azure, openai, or ollama: OPENAI_EMBED_HOST=azure # Needed for Azure: # You also need to `azd auth login` if running this locally AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com -AZURE_OPENAI_VERSION=2024-03-01-preview -AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini -AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini +AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4 +AZURE_OPENAI_CHAT_MODEL=gpt-5.4 AZURE_OPENAI_EMBED_DEPLOYMENT=text-embedding-3-large AZURE_OPENAI_EMBED_MODEL=text-embedding-3-large AZURE_OPENAI_EMBED_DIMENSIONS=1024 @@ -35,9 +34,3 @@ OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1 OLLAMA_CHAT_MODEL=llama3.1 OLLAMA_EMBED_MODEL=nomic-embed-text OLLAMA_EMBEDDING_COLUMN=embedding_nomic -# Needed for GitHub Models: -GITHUB_TOKEN=YOUR-GITHUB-TOKEN -GITHUB_MODEL=openai/gpt-4o -GITHUB_EMBED_MODEL=openai/text-embedding-3-large -GITHUB_EMBED_DIMENSIONS=1024 -GITHUB_EMBEDDING_COLUMN=embedding_3l diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml index b5c77c4b..0a35af5f 100644 --- a/.github/workflows/evaluate.yaml +++ b/.github/workflows/evaluate.yaml @@ -34,7 +34,6 @@ jobs: OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }} OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }} AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }} - AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }} AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }} AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }} AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }} diff --git a/.vscode/settings.json b/.vscode/settings.json index c9eb00cc..4d91d2fb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -36,5 +36,6 @@ "htmlcov": true, ".mypy_cache": true, ".coverage": true - } + }, + "python-envs.defaultEnvManager": "ms-python.python:system" } diff --git a/AGENTS.md b/AGENTS.md index ae38cbf5..b41d2779 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,3 +8,19 @@ When adding new azd environment variables, update: 1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable 1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `webAppEnv` object 1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section. If it's a @secure variable in main.bicep, it should come from `secrets`, otherwise from `vars`. + +## Upgrading Python packages + +1. Update the version constraint in src/backend/pyproject.toml + +2. Re-compile src/backend/requirements.txt from the src folder: + + ```shell + uv pip compile pyproject.toml -o requirements.txt --python-version 3.10 + ``` + +3. Reinstall with: + + ```shell + python -m pip install -r src/backend/requirements.txt + ``` diff --git a/azure.yaml b/azure.yaml index 38c99b96..8a91e712 100644 --- a/azure.yaml +++ b/azure.yaml @@ -42,7 +42,6 @@ pipeline: - OPENAI_CHAT_HOST - OPENAI_EMBED_HOST - AZURE_OPENAI_ENDPOINT - - AZURE_OPENAI_VERSION - AZURE_OPENAI_CHAT_DEPLOYMENT - AZURE_OPENAI_CHAT_MODEL - AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py index 44410506..9f2b0fef 100644 --- a/evals/generate_ground_truth.py +++ b/evals/generate_ground_truth.py @@ -3,12 +3,10 @@ import os from collections.abc import Generator from pathlib import Path -from typing import Union from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider from dotenv_azd import load_azd_env -from openai import AzureOpenAI, OpenAI -from openai.types.chat import ChatCompletionToolParam +from openai import OpenAI from sqlalchemy import create_engine, select from sqlalchemy.orm import Session @@ -17,32 +15,30 @@ logger = logging.getLogger("ragapp") -def qa_pairs_tool(num_questions: int = 1) -> ChatCompletionToolParam: +def qa_pairs_tool(num_questions: int = 1) -> dict: return { "type": "function", - "function": { - "name": "qa_pairs", - "description": "Send in question and answer pairs for a customer-facing chat app", - "parameters": { - "type": "object", - "properties": { - "qa_list": { - "type": "array", - "description": f"List of {num_questions} question and answer pairs", - "items": { - "type": "object", - "properties": { - "question": {"type": "string", "description": "The question text"}, - "answer": {"type": "string", "description": "The answer text"}, - }, - "required": ["question", "answer"], + "name": "qa_pairs", + "description": "Send in question and answer pairs for a customer-facing chat app", + "parameters": { + "type": "object", + "properties": { + "qa_list": { + "type": "array", + "description": f"List of {num_questions} question and answer pairs", + "items": { + "type": "object", + "properties": { + "question": {"type": "string", "description": "The question text"}, + "answer": {"type": "string", "description": "The answer text"}, }, - "minItems": num_questions, - "maxItems": num_questions, - } - }, - "required": ["qa_list"], + "required": ["question", "answer"], + }, + "minItems": num_questions, + "maxItems": num_questions, + } }, + "required": ["qa_list"], }, } @@ -67,7 +63,6 @@ def source_retriever() -> Generator[str, None, None]: # for record in records: # logger.info(f"Processing database record: {record.name}") # yield f"## Product ID: [{record.id}]\n" + record.to_str_for_rag() - # await self.openai_chat_client.chat.completions.create( def source_to_text(source) -> str: @@ -78,32 +73,29 @@ def answer_formatter(answer, source) -> str: return f"{answer} [{source['id']}]" -def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]: +def get_openai_client() -> tuple[OpenAI, str]: """Return an OpenAI client based on the environment variables""" - openai_client: Union[AzureOpenAI, OpenAI] + openai_client: OpenAI OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST") if OPENAI_CHAT_HOST == "azure": + azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] if api_key := os.getenv("AZURE_OPENAI_KEY"): logger.info("Using Azure OpenAI Service with API Key from AZURE_OPENAI_KEY") - openai_client = AzureOpenAI( - api_version=os.environ["AZURE_OPENAI_VERSION"], - azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], + openai_client = OpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", api_key=api_key, ) else: logger.info("Using Azure OpenAI Service with Azure Developer CLI Credential") azure_credential = AzureDeveloperCliCredential(process_timeout=60, tenant_id=os.environ["AZURE_TENANT_ID"]) token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default") - openai_client = AzureOpenAI( - api_version=os.environ["AZURE_OPENAI_VERSION"], - azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - azure_ad_token_provider=token_provider, + openai_client = OpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", + api_key=token_provider, ) model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] elif OPENAI_CHAT_HOST == "ollama": raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com") - elif OPENAI_CHAT_HOST == "github": - raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com") else: logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY") openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"]) @@ -123,18 +115,21 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc if len(qa) > num_questions_total: logger.info("Generated enough questions already, stopping") break - result = openai_client.chat.completions.create( + result = openai_client.responses.create( model=model, - messages=[ + input=[ {"role": "system", "content": generate_prompt}, {"role": "user", "content": json.dumps(source)}, ], tools=[qa_pairs_tool(num_questions=2)], + max_output_tokens=1000, + store=False, ) - if not result.choices[0].message.tool_calls: + tool_calls = [item for item in result.output if item.type == "function_call"] + if not tool_calls: logger.warning("No tool calls found in response, skipping") continue - qa_pairs = json.loads(result.choices[0].message.tool_calls[0].function.arguments)["qa_list"] + qa_pairs = json.loads(tool_calls[0].arguments)["qa_list"] qa_pairs = [{"question": qa_pair["question"], "truth": qa_pair["answer"]} for qa_pair in qa_pairs] qa.extend(qa_pairs) diff --git a/infra/main.bicep b/infra/main.bicep index b6e5d9a2..ea800759 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -131,7 +131,6 @@ param openAIEmbedHost string = 'azure' @secure() param openAIComKey string = '' -param azureOpenAIAPIVersion string = '2024-03-01-preview' @secure() param azureOpenAIKey string = '' @@ -385,10 +384,6 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [ name: 'AZURE_OPENAI_ENDPOINT' value: !empty(azureOpenAIEndpoint) ? azureOpenAIEndpoint : (deployAzureOpenAI ? openAI.outputs.endpoint : '') } - { - name: 'AZURE_OPENAI_VERSION' - value: openAIChatHost == 'azure' ? azureOpenAIAPIVersion : '' - } ]) module web 'web.bicep' = { @@ -613,7 +608,6 @@ output AZURE_OPENAI_RESOURCE_GROUP string = deployAzureOpenAI ? openAIResourceGr output AZURE_OPENAI_ENDPOINT string = !empty(azureOpenAIEndpoint) ? azureOpenAIEndpoint : (deployAzureOpenAI ? openAI.outputs.endpoint : '') -output AZURE_OPENAI_VERSION string = azureOpenAIAPIVersion output AZURE_OPENAI_CHAT_DEPLOYMENT string = deployAzureOpenAI ? chatDeploymentName : '' output AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION string = deployAzureOpenAI ? chatDeploymentVersion : '' output AZURE_OPENAI_CHAT_DEPLOYMENT_CAPACITY int = deployAzureOpenAI ? chatDeploymentCapacity : 0 diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 3e16a351..85ac1817 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -33,13 +33,13 @@ "value": "${OPENAI_CHAT_HOST=azure}" }, "chatModelName": { - "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini}" + "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-5.4}" }, "chatDeploymentName": { - "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini}" + "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4}" }, "chatDeploymentVersion":{ - "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-07-18}" + "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2026-03-05}" }, "chatDeploymentSku": { "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_SKU=GlobalStandard}" diff --git a/src/backend/fastapi_app/__init__.py b/src/backend/fastapi_app/__init__.py index b760fdb2..cf04614e 100644 --- a/src/backend/fastapi_app/__init__.py +++ b/src/backend/fastapi_app/__init__.py @@ -2,12 +2,12 @@ import os from collections.abc import AsyncIterator from contextlib import asynccontextmanager -from typing import TypedDict, Union +from typing import TypedDict import fastapi from azure.monitor.opentelemetry import configure_azure_monitor from dotenv import load_dotenv -from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai import AsyncOpenAI from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker @@ -27,8 +27,8 @@ class State(TypedDict): sessionmaker: async_sessionmaker[AsyncSession] context: FastAPIAppContext - chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI] - embed_client: Union[AsyncOpenAI, AsyncAzureOpenAI] + chat_client: AsyncOpenAI + embed_client: AsyncOpenAI @asynccontextmanager diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py index 06d14a6b..f75db35c 100644 --- a/src/backend/fastapi_app/api_models.py +++ b/src/backend/fastapi_app/api_models.py @@ -28,7 +28,6 @@ class ChatRequestOverrides(BaseModel): retrieval_mode: RetrievalMode = RetrievalMode.HYBRID use_advanced_flow: bool = True prompt_template: Optional[str] = None - seed: Optional[int] = None class ChatRequestContext(BaseModel): diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py index 2715819e..7b60bef6 100644 --- a/src/backend/fastapi_app/dependencies.py +++ b/src/backend/fastapi_app/dependencies.py @@ -1,11 +1,11 @@ import logging import os from collections.abc import AsyncGenerator -from typing import Annotated, Optional, Union +from typing import Annotated, Optional import azure.identity from fastapi import Depends, Request -from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai import AsyncOpenAI from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker @@ -17,7 +17,7 @@ class OpenAIClient(BaseModel): OpenAI client """ - client: Union[AsyncOpenAI, AsyncAzureOpenAI] + client: AsyncOpenAI model_config = {"arbitrary_types_allowed": True} @@ -51,26 +51,18 @@ async def common_parameters(): openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" openai_embed_dimensions = None embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic" - elif OPENAI_EMBED_HOST == "github": - openai_embed_deployment = None - openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large" - openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024)) - embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l" else: openai_embed_deployment = None openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large" openai_embed_dimensions = int(os.getenv("OPENAICOM_EMBED_DIMENSIONS", 1024)) embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN") or "embedding_3l" if OPENAI_CHAT_HOST == "azure": - openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-4o-mini" - openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-4o-mini" + openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-5.4" + openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-5.4" elif OPENAI_CHAT_HOST == "ollama": openai_chat_deployment = None openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b" openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" - elif OPENAI_CHAT_HOST == "github": - openai_chat_deployment = None - openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o" else: openai_chat_deployment = None openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo" @@ -84,10 +76,10 @@ async def common_parameters(): ) -async def get_azure_credential() -> Union[ - azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential -]: - azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential] +async def get_azure_credential() -> ( + azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential +): + azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential try: if client_id := os.getenv("APP_IDENTITY_ID"): # Authenticate using a user-assigned managed identity on Azure diff --git a/src/backend/fastapi_app/embeddings.py b/src/backend/fastapi_app/embeddings.py index 0dccec3e..1a0581b7 100644 --- a/src/backend/fastapi_app/embeddings.py +++ b/src/backend/fastapi_app/embeddings.py @@ -1,11 +1,11 @@ -from typing import Optional, TypedDict, Union +from typing import Optional, TypedDict -from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai import AsyncOpenAI async def compute_text_embedding( q: str, - openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI], + openai_client: AsyncOpenAI, embed_model: str, embed_deployment: Optional[str] = None, embedding_dimensions: Optional[int] = None, diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py index b704dc9d..e5c0a703 100644 --- a/src/backend/fastapi_app/openai_clients.py +++ b/src/backend/fastapi_app/openai_clients.py @@ -1,79 +1,65 @@ import logging import os -from typing import Union import azure.identity +import azure.identity.aio import openai logger = logging.getLogger("ragapp") async def create_openai_chat_client( - azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None], -) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]: - openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI] + azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None, +) -> openai.AsyncOpenAI: + openai_chat_client: openai.AsyncOpenAI OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST") if OPENAI_CHAT_HOST == "azure": - api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-10-21" azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] azure_deployment = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] if api_key := os.getenv("AZURE_OPENAI_KEY"): logger.info( - "Setting up Azure OpenAI client for chat completions using API key, endpoint %s, deployment %s", + "Setting up Azure OpenAI client for chat using API key, endpoint %s, deployment %s", azure_endpoint, azure_deployment, ) - openai_chat_client = openai.AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=azure_endpoint, - azure_deployment=azure_deployment, + openai_chat_client = openai.AsyncOpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", api_key=api_key, ) elif azure_credential: logger.info( - "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s", + "Setting up Azure OpenAI client for chat using Azure Identity, endpoint %s, deployment %s", azure_endpoint, azure_deployment, ) - token_provider = azure.identity.get_bearer_token_provider( + token_provider = azure.identity.aio.get_bearer_token_provider( azure_credential, "https://cognitiveservices.azure.com/.default" ) - openai_chat_client = openai.AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=azure_endpoint, - azure_deployment=azure_deployment, - azure_ad_token_provider=token_provider, + openai_chat_client = openai.AsyncOpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", + api_key=token_provider, ) else: raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.") elif OPENAI_CHAT_HOST == "ollama": - logger.info("Setting up OpenAI client for chat completions using Ollama") + logger.info("Setting up OpenAI client for chat using Ollama") openai_chat_client = openai.AsyncOpenAI( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) - elif OPENAI_CHAT_HOST == "github": - logger.info("Setting up OpenAI client for chat completions using GitHub Models") - github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o") - logger.info(f"Using GitHub Models with model: {github_model}") - openai_chat_client = openai.AsyncOpenAI( - base_url="https://models.github.ai/inference", - api_key=os.getenv("GITHUB_TOKEN"), - ) else: - logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key") + logger.info("Setting up OpenAI client for chat using OpenAI.com API key") openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) return openai_chat_client async def create_openai_embed_client( - azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None], -) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]: - openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI] + azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None, +) -> openai.AsyncOpenAI: + openai_embed_client: openai.AsyncOpenAI OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST") if OPENAI_EMBED_HOST == "azure": - api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-03-01-preview" azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] azure_deployment = os.environ["AZURE_OPENAI_EMBED_DEPLOYMENT"] if api_key := os.getenv("AZURE_OPENAI_KEY"): @@ -82,10 +68,8 @@ async def create_openai_embed_client( azure_endpoint, azure_deployment, ) - openai_embed_client = openai.AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=azure_endpoint, - azure_deployment=azure_deployment, + openai_embed_client = openai.AsyncOpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", api_key=api_key, ) elif azure_credential: @@ -94,14 +78,12 @@ async def create_openai_embed_client( azure_endpoint, azure_deployment, ) - token_provider = azure.identity.get_bearer_token_provider( + token_provider = azure.identity.aio.get_bearer_token_provider( azure_credential, "https://cognitiveservices.azure.com/.default" ) - openai_embed_client = openai.AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=azure_endpoint, - azure_deployment=azure_deployment, - azure_ad_token_provider=token_provider, + openai_embed_client = openai.AsyncOpenAI( + base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/", + api_key=token_provider, ) else: raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.") @@ -111,14 +93,6 @@ async def create_openai_embed_client( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) - elif OPENAI_EMBED_HOST == "github": - logger.info("Setting up OpenAI client for embeddings using GitHub Models") - github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small") - logger.info(f"Using GitHub Models with embedding model: {github_embed_model}") - openai_embed_client = openai.AsyncOpenAI( - base_url="https://models.github.ai/inference", - api_key=os.getenv("GITHUB_TOKEN"), - ) else: logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key") openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json index 0ef450fd..4fe3ae10 100644 --- a/src/backend/fastapi_app/prompts/query_fewshots.json +++ b/src/backend/fastapi_app/prompts/query_fewshots.json @@ -4,14 +4,14 @@ "content": "good options for climbing gear that can be used outside?" }, { - "id": "madeup", + "id": "fc_madeup1", "call_id": "call_abc123", "name": "search_database", "arguments": "{\"search_query\":\"climbing gear outside\"}", "type": "function_call" }, { - "id": "madeupoutput", + "id": "fc_madeupoutput1", "call_id": "call_abc123", "output": "Search results for climbing gear that can be used outside: ...", "type": "function_call_output" @@ -21,14 +21,14 @@ "content": "are there any shoes less than $50?" }, { - "id": "madeup", + "id": "fc_madeup2", "call_id": "call_abc456", "name": "search_database", "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}", "type": "function_call" }, { - "id": "madeupoutput", + "id": "fc_madeupoutput2", "call_id": "call_abc456", "output": "Search results for shoes cheaper than 50: ...", "type": "function_call_output" diff --git a/src/backend/fastapi_app/query_rewriter.py b/src/backend/fastapi_app/query_rewriter.py index aa0ad466..122c0ed9 100644 --- a/src/backend/fastapi_app/query_rewriter.py +++ b/src/backend/fastapi_app/query_rewriter.py @@ -1,73 +1,62 @@ import json -from openai.types.chat import ( - ChatCompletion, - ChatCompletionToolParam, -) +from openai.types.responses import Response, ResponseFunctionToolCall -def build_search_function() -> list[ChatCompletionToolParam]: - return [ - { - "type": "function", - "function": { - "name": "search_database", - "description": "Search PostgreSQL database for relevant products based on user query", - "parameters": { +def build_search_function() -> dict: + return { + "type": "function", + "name": "search_database", + "description": "Search PostgreSQL database for relevant products based on user query", + "parameters": { + "type": "object", + "properties": { + "search_query": { + "type": "string", + "description": "Query string to use for full text search, e.g. 'red shoes'", + }, + "price_filter": { "type": "object", + "description": "Filter search results based on price of the product", "properties": { - "search_query": { + "comparison_operator": { "type": "string", - "description": "Query string to use for full text search, e.g. 'red shoes'", + "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa + }, + "value": { + "type": "number", + "description": "Value to compare against, e.g. 30", }, - "price_filter": { - "type": "object", - "description": "Filter search results based on price of the product", - "properties": { - "comparison_operator": { - "type": "string", - "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa - }, - "value": { - "type": "number", - "description": "Value to compare against, e.g. 30", - }, - }, + }, + }, + "brand_filter": { + "type": "object", + "description": "Filter search results based on brand of the product", + "properties": { + "comparison_operator": { + "type": "string", + "description": "Operator to compare the column value, either '=' or '!='", }, - "brand_filter": { - "type": "object", - "description": "Filter search results based on brand of the product", - "properties": { - "comparison_operator": { - "type": "string", - "description": "Operator to compare the column value, either '=' or '!='", - }, - "value": { - "type": "string", - "description": "Value to compare against, e.g. AirStrider", - }, - }, + "value": { + "type": "string", + "description": "Value to compare against, e.g. AirStrider", }, }, - "required": ["search_query"], }, }, - } - ] + "required": ["search_query"], + }, + } -def extract_search_arguments(original_user_query: str, chat_completion: ChatCompletion): - response_message = chat_completion.choices[0].message +def extract_search_arguments(original_user_query: str, response: Response): search_query = None filters = [] - if response_message.tool_calls: - for tool in response_message.tool_calls: - if tool.type != "function": - continue - function = tool.function - if function.name == "search_database": - arg = json.loads(function.arguments) - # Even though its required, search_query is not always specified + tool_calls = [item for item in response.output if isinstance(item, ResponseFunctionToolCall)] + if tool_calls: + for tool_call in tool_calls: + if tool_call.name == "search_database": + arg = json.loads(tool_call.arguments) search_query = arg.get("search_query", original_user_query) if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict): price_filter = arg["price_filter"] @@ -87,6 +76,6 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp "value": brand_filter["value"], } ) - elif query_text := response_message.content: - search_query = query_text.strip() + elif response.output_text: + search_query = response.output_text.strip() return search_query, filters diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py index eb53aa6a..7b894b24 100644 --- a/src/backend/fastapi_app/rag_advanced.py +++ b/src/backend/fastapi_app/rag_advanced.py @@ -1,18 +1,18 @@ import json from collections.abc import AsyncGenerator -from typing import Optional, Union +from typing import Optional from agents import ( Agent, ItemHelpers, ModelSettings, - OpenAIChatCompletionsModel, + OpenAIResponsesModel, Runner, ToolCallOutputItem, function_tool, set_tracing_disabled, ) -from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai import AsyncOpenAI from openai.types.responses import EasyInputMessageParam, ResponseInputItemParam, ResponseTextDeltaEvent from fastapi_app.api_models import ( @@ -45,7 +45,7 @@ def __init__( messages: list[ResponseInputItemParam], overrides: ChatRequestOverrides, searcher: PostgresSearcher, - openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI], + openai_chat_client: AsyncOpenAI, chat_model: str, chat_deployment: Optional[str], # Not needed for non-Azure OpenAI ): @@ -54,7 +54,7 @@ def __init__( self.model_for_thoughts = ( {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model} ) - openai_agents_model = OpenAIChatCompletionsModel( + openai_agents_model = OpenAIResponsesModel( model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client ) self.search_agent = Agent( @@ -71,7 +71,6 @@ def __init__( model_settings=ModelSettings( temperature=self.chat_params.temperature, max_tokens=self.chat_params.response_token_limit, - extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}, ), ) diff --git a/src/backend/fastapi_app/rag_base.py b/src/backend/fastapi_app/rag_base.py index 54e633c2..557c1049 100644 --- a/src/backend/fastapi_app/rag_base.py +++ b/src/backend/fastapi_app/rag_base.py @@ -32,7 +32,6 @@ def get_chat_params(self, messages: list[ResponseInputItemParam], overrides: Cha return ChatParams( top=overrides.top, temperature=overrides.temperature, - seed=overrides.seed, retrieval_mode=overrides.retrieval_mode, use_advanced_flow=overrides.use_advanced_flow, response_token_limit=response_token_limit, diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py index 69126618..1bbbd12a 100644 --- a/src/backend/fastapi_app/rag_simple.py +++ b/src/backend/fastapi_app/rag_simple.py @@ -1,8 +1,8 @@ from collections.abc import AsyncGenerator -from typing import Optional, Union +from typing import Optional -from agents import Agent, ItemHelpers, ModelSettings, OpenAIChatCompletionsModel, Runner, set_tracing_disabled -from openai import AsyncAzureOpenAI, AsyncOpenAI +from agents import Agent, ItemHelpers, ModelSettings, OpenAIResponsesModel, Runner, set_tracing_disabled +from openai import AsyncOpenAI from openai.types.responses import ResponseInputItemParam, ResponseTextDeltaEvent from fastapi_app.api_models import ( @@ -28,7 +28,7 @@ def __init__( messages: list[ResponseInputItemParam], overrides: ChatRequestOverrides, searcher: PostgresSearcher, - openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI], + openai_chat_client: AsyncOpenAI, chat_model: str, chat_deployment: Optional[str], # Not needed for non-Azure OpenAI ): @@ -37,7 +37,7 @@ def __init__( self.model_for_thoughts = ( {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model} ) - openai_agents_model = OpenAIChatCompletionsModel( + openai_agents_model = OpenAIResponsesModel( model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client ) self.answer_agent = Agent( @@ -47,7 +47,6 @@ def __init__( model_settings=ModelSettings( temperature=self.chat_params.temperature, max_tokens=self.chat_params.response_token_limit, - extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}, ), ) diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index 7ede97c9..9874b924 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -12,12 +12,12 @@ dependencies = [ "asyncpg>=0.29.0,<1.0.0", "SQLAlchemy[asyncio]>=2.0.30,<3.0.0", "pgvector>=0.3.0,<0.4.0", - "openai>=1.34.0,<2.0.0", + "openai>=1.108.1,<3.0.0", "azure-monitor-opentelemetry>=1.6.0,<2.0.0", "opentelemetry-instrumentation-sqlalchemy", "opentelemetry-instrumentation-aiohttp-client", "opentelemetry-instrumentation-openai", - "openai-agents" + "openai-agents>=0.13.6" ] [build-system] diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt index b83031b6..167de074 100644 --- a/src/backend/requirements.txt +++ b/src/backend/requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements_new.txt --python-version 3.10 +# uv pip compile pyproject.toml -o requirements.txt --python-version 3.10 aiohappyeyeballs==2.6.1 # via aiohttp aiohttp==3.12.14 @@ -59,8 +59,6 @@ charset-normalizer==3.4.2 # via requests click==8.2.1 # via uvicorn -colorama==0.4.6 - # via griffe cryptography==45.0.5 # via # azure-identity @@ -86,7 +84,7 @@ frozenlist==1.7.0 # aiosignal greenlet==3.2.3 # via sqlalchemy -griffe==1.7.3 +griffelib==2.0.2 # via openai-agents h11==0.16.0 # via @@ -118,7 +116,7 @@ jsonschema-specifications==2025.4.1 # via jsonschema marshmallow==4.0.0 # via environs -mcp==1.11.0 +mcp==1.27.0 # via openai-agents msal==1.32.3 # via @@ -136,11 +134,11 @@ numpy==2.2.6 # via pgvector oauthlib==3.3.1 # via requests-oauthlib -openai==1.96.1 +openai==2.31.0 # via # fastapi-app (pyproject.toml) # openai-agents -openai-agents==0.2.0 +openai-agents==0.13.6 # via fastapi-app (pyproject.toml) opentelemetry-api==1.31.1 # via @@ -256,19 +254,21 @@ psutil==7.0.0 # via azure-monitor-opentelemetry-exporter pycparser==2.22 # via cffi -pydantic==2.11.7 +pydantic==2.12.5 # via # fastapi # mcp # openai # openai-agents # pydantic-settings -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic pydantic-settings==2.10.1 # via mcp pyjwt==2.10.1 - # via msal + # via + # mcp + # msal python-dotenv==1.1.1 # via # fastapi-app (pyproject.toml) @@ -327,6 +327,7 @@ typing-extensions==4.14.1 # exceptiongroup # fastapi # marshmallow + # mcp # multidict # openai # openai-agents @@ -338,8 +339,9 @@ typing-extensions==4.14.1 # starlette # typing-inspection # uvicorn -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via + # mcp # pydantic # pydantic-settings urllib3==2.5.0 diff --git a/tests/conftest.py b/tests/conftest.py index 5fe67053..409b9e07 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,13 +9,14 @@ import pytest_asyncio from fastapi.testclient import TestClient from openai.types import CreateEmbeddingResponse, Embedding -from openai.types.chat import ChatCompletion, ChatCompletionChunk -from openai.types.chat.chat_completion import ( - ChatCompletionMessage, - Choice, -) -from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function from openai.types.create_embedding_response import Usage +from openai.types.responses import ( + Response, + ResponseFunctionToolCall, + ResponseOutputMessage, + ResponseOutputText, + ResponseTextDeltaEvent, +) from sqlalchemy.ext.asyncio import async_sessionmaker from fastapi_app import create_app @@ -63,9 +64,8 @@ def mock_session_env(monkeypatch_session): monkeypatch_session.setenv("OPENAI_CHAT_HOST", "azure") monkeypatch_session.setenv("OPENAI_EMBED_HOST", "azure") monkeypatch_session.setenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com") - monkeypatch_session.setenv("AZURE_OPENAI_VERSION", "2024-03-01-preview") - monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-4o-mini") - monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-4o-mini") + monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-5.4") + monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-5.4") monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "text-embedding-3-large") monkeypatch_session.setenv("AZURE_OPENAI_EMBED_MODEL", "text-embedding-3-large") monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DIMENSIONS", "1024") @@ -171,67 +171,39 @@ async def mock_acreate(*args, **kwargs): @pytest.fixture(scope="session") def mock_openai_chatcompletion(monkeypatch_session): - class AsyncChatCompletionIterator: + class AsyncResponseEventIterator: def __init__(self, answer: str): - chunk_id = "test-id" - model = "gpt-4o-mini" - self.responses = [ - {"object": "chat.completion.chunk", "choices": [], "id": chunk_id, "model": model, "created": 1}, - { - "object": "chat.completion.chunk", - "choices": [{"delta": {"role": "assistant"}, "index": 0, "finish_reason": None}], - "id": chunk_id, - "model": model, - "created": 1, - }, - ] + self.events = [] # Split at << to simulate chunked responses if answer.find("<<") > -1: parts = answer.split("<<") - self.responses.append( - { - "object": "chat.completion.chunk", - "choices": [ - { - "delta": {"role": "assistant", "content": parts[0] + "<<"}, - "index": 0, - "finish_reason": None, - } - ], - "id": chunk_id, - "model": model, - "created": 1, - } - ) - self.responses.append( - { - "object": "chat.completion.chunk", - "choices": [ - {"delta": {"role": "assistant", "content": parts[1]}, "index": 0, "finish_reason": None} - ], - "id": chunk_id, - "model": model, - "created": 1, - } + self.events.append( + ResponseTextDeltaEvent( + type="response.output_text.delta", + content_index=0, + delta=parts[0] + "<<", + item_id="msg-1", + output_index=0, + ) ) - self.responses.append( - { - "object": "chat.completion.chunk", - "choices": [{"delta": {"role": None, "content": None}, "index": 0, "finish_reason": "stop"}], - "id": chunk_id, - "model": model, - "created": 1, - } + self.events.append( + ResponseTextDeltaEvent( + type="response.output_text.delta", + content_index=0, + delta=parts[1], + item_id="msg-1", + output_index=0, + ) ) else: - self.responses.append( - { - "object": "chat.completion.chunk", - "choices": [{"delta": {"content": answer}, "index": 0, "finish_reason": None}], - "id": chunk_id, - "model": model, - "created": 1, - } + self.events.append( + ResponseTextDeltaEvent( + type="response.output_text.delta", + content_index=0, + delta=answer, + item_id="msg-1", + output_index=0, + ) ) async def __aenter__(self): @@ -244,93 +216,88 @@ def __aiter__(self): return self async def __anext__(self): - if self.responses: - return ChatCompletionChunk.model_validate(self.responses.pop(0)) - else: - raise StopAsyncIteration + if self.events: + return self.events.pop(0) + raise StopAsyncIteration + + def _make_text_response(answer: str) -> Response: + return Response( + id="resp-test-123", + created_at=0, + model="gpt-5.4", + object="response", + output=[ + ResponseOutputMessage( + id="msg-1", + type="message", + role="assistant", + status="completed", + content=[ResponseOutputText(type="output_text", text=answer, annotations=[])], + ) + ], + tool_choice="auto", + tools=[], + status="completed", + parallel_tool_calls=True, + ) + + def _make_tool_call_response(tool_name: str, arguments: str, call_id: str = "fc_abc123") -> Response: + return Response( + id="resp-test-123", + created_at=0, + model="gpt-5.4", + object="response", + output=[ + ResponseFunctionToolCall( + id=call_id, + call_id=call_id, + type="function_call", + name=tool_name, + arguments=arguments, + status="completed", + ) + ], + tool_choice="auto", + tools=[], + status="completed", + parallel_tool_calls=True, + ) async def mock_acreate(*args, **kwargs): - messages = kwargs["messages"] - last_question = messages[-1]["content"] - last_role = messages[-1]["role"] + input_messages = kwargs.get("input", []) + last_message = input_messages[-1] + last_content = last_message.get("content", "") if isinstance(last_message, dict) else "" + last_role = last_message.get("role", "") if isinstance(last_message, dict) else "" if last_role == "tool": - items = json.loads(last_question)["items"] + items = json.loads(last_content)["items"] arguments = {"query": "capital of France", "items": items, "filters": []} - return ChatCompletion( - object="chat.completion", - choices=[ - Choice( - message=ChatCompletionMessage( - role="assistant", - tool_calls=[ - ChatCompletionMessageToolCall( - id="call_abc123final", - type="function", - function=Function( - name="final_result", - arguments=json.dumps(arguments), - ), - ) - ], - ), - finish_reason="stop", - index=0, - ) - ], - id="test-123final", - created=0, - model="test-model", + return _make_tool_call_response("final_result", json.dumps(arguments), call_id="fc_abc123final") + if last_content == "Find search results for user query: What is the capital of France?": + return _make_tool_call_response( + "search_database", '{"search_query":"climbing gear outside"}', call_id="fc_abc123" ) - if last_question == "Find search results for user query: What is the capital of France?": - return ChatCompletion( - object="chat.completion", - choices=[ - Choice( - message=ChatCompletionMessage( - role="assistant", - tool_calls=[ - ChatCompletionMessageToolCall( - id="call_abc123", - type="function", - function=Function( - name="search_database", arguments='{"search_query":"climbing gear outside"}' - ), - ) - ], - ), - finish_reason="stop", - index=0, - ) - ], - id="test-123", - created=0, - model="test-model", - ) - elif last_question == "Find search results for user query: Are interest rates high?": + elif last_content == "Find search results for user query: Are interest rates high?": answer = "interest rates" - elif isinstance(last_question, list) and last_question[2].get("image_url"): - answer = "From the provided sources, the impact of interest rates and GDP growth on " - "financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]" + elif isinstance(last_content, list) and len(last_content) > 2 and last_content[2].get("image_url"): + answer = ( + "From the provided sources, the impact of interest rates and GDP growth on " + "financial markets can be observed through the line graph." + " [Financial Market Analysis Report 2023-7.png]" + ) else: answer = "The capital of France is Paris. [Benefit_Options-2.pdf]." - if messages[0]["content"].find("Generate 3 very brief follow-up questions") > -1: + system_content = input_messages[0].get("content", "") if isinstance(input_messages[0], dict) else "" + if ( + isinstance(system_content, str) + and system_content.find("Generate 3 very brief follow-up questions") > -1 + ): answer = "The capital of France is Paris. [Benefit_Options-2.pdf]. <>" - if "stream" in kwargs and kwargs["stream"] is True: - return AsyncChatCompletionIterator(answer) + if kwargs.get("stream") is True: + return AsyncResponseEventIterator(answer) else: - return ChatCompletion( - object="chat.completion", - choices=[ - Choice( - message=ChatCompletionMessage(role="assistant", content=answer), finish_reason="stop", index=0 - ) - ], - id="test-123", - created=0, - model="test-model", - ) + return _make_text_response(answer) - monkeypatch_session.setattr(openai.resources.chat.completions.AsyncCompletions, "create", mock_acreate) + monkeypatch_session.setattr(openai.resources.responses.AsyncResponses, "create", mock_acreate) yield diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json index 612be773..2535e647 100644 --- a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json +++ b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json @@ -61,8 +61,8 @@ } ], "props": { - "model": "gpt-4o-mini", - "deployment": "gpt-4o-mini" + "model": "gpt-5.4", + "deployment": "gpt-5.4" } }, { @@ -101,8 +101,8 @@ } ], "props": { - "model": "gpt-4o-mini", - "deployment": "gpt-4o-mini" + "model": "gpt-5.4", + "deployment": "gpt-5.4" } } ], diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines index d29b85c4..7a8760f6 100644 --- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines +++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines @@ -1,2 +1,2 @@ -{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null} +{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null} {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null} diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json index e311917b..2059b570 100644 --- a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json +++ b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json @@ -50,8 +50,8 @@ } ], "props": { - "model": "gpt-4o-mini", - "deployment": "gpt-4o-mini" + "model": "gpt-5.4", + "deployment": "gpt-5.4" } } ], diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json index d0456cd7..71e2efb8 100644 --- a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json +++ b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json @@ -58,8 +58,8 @@ } ], "props": { - "model": "gpt-4o-mini", - "deployment": "gpt-4o-mini" + "model": "gpt-5.4", + "deployment": "gpt-5.4" } } ], diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines index 65d3ae5b..14a4d1b2 100644 --- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines +++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines @@ -1,2 +1,2 @@ -{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null} +{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null} {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null} diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index e55149a3..1c4d90db 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -6,10 +6,10 @@ @pytest.mark.asyncio async def test_get_common_parameters(mock_session_env): result = await common_parameters() - assert result.openai_chat_model == "gpt-4o-mini" + assert result.openai_chat_model == "gpt-5.4" assert result.openai_embed_model == "text-embedding-3-large" assert result.openai_embed_dimensions == 1024 - assert result.openai_chat_deployment == "gpt-4o-mini" + assert result.openai_chat_deployment == "gpt-5.4" assert result.openai_embed_deployment == "text-embedding-3-large" diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py index 47caba26..ad60afc7 100644 --- a/tests/test_openai_clients.py +++ b/tests/test_openai_clients.py @@ -1,6 +1,5 @@ import pytest -from fastapi_app.dependencies import common_parameters from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client from tests.data import test_data @@ -18,49 +17,6 @@ async def test_create_openai_embed_client(mock_azure_credential, mock_openai_emb @pytest.mark.asyncio async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chatcompletion): openai_chat_client = await create_openai_chat_client(mock_azure_credential) - assert openai_chat_client.chat.completions.create is not None - response = await openai_chat_client.chat.completions.create( - model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}] - ) - assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]." - - -@pytest.mark.asyncio -async def test_github_models_configuration(monkeypatch): - """Test that GitHub Models uses the correct URLs and model names.""" - # Set up environment for GitHub Models - monkeypatch.setenv("OPENAI_CHAT_HOST", "github") - monkeypatch.setenv("OPENAI_EMBED_HOST", "github") - monkeypatch.setenv("GITHUB_TOKEN", "fake-token") - # Don't set GITHUB_MODEL to test defaults - - # Test chat client configuration - chat_client = await create_openai_chat_client(None) - assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference" - assert chat_client.api_key == "fake-token" - - # Test embed client configuration - embed_client = await create_openai_embed_client(None) - assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference" - assert embed_client.api_key == "fake-token" - - # Test that dependencies use correct defaults - context = await common_parameters() - assert context.openai_chat_model == "openai/gpt-4o" - assert context.openai_embed_model == "openai/text-embedding-3-large" - - -@pytest.mark.asyncio -async def test_github_models_with_custom_values(monkeypatch): - """Test that GitHub Models respects custom environment values.""" - # Set up environment for GitHub Models with custom values - monkeypatch.setenv("OPENAI_CHAT_HOST", "github") - monkeypatch.setenv("OPENAI_EMBED_HOST", "github") - monkeypatch.setenv("GITHUB_TOKEN", "fake-token") - monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4") - monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002") - - # Test that dependencies use custom values - context = await common_parameters() - assert context.openai_chat_model == "openai/gpt-4" - assert context.openai_embed_model == "openai/text-embedding-ada-002" + assert openai_chat_client.responses.create is not None + response = await openai_chat_client.responses.create(model="gpt-5.4", input=[{"role": "user", "content": "test"}]) + assert response.output_text == "The capital of France is Paris. [Benefit_Options-2.pdf]."