Azure-Samples · pamelafox · Apr 11, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 10, 2026
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -4,5 +4,5 @@ FROM mcr.microsoft.com/devcontainers/${IMAGE}
 ENV PYTHONUNBUFFERED 1
 
 RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
-    && apt-get -y install --no-install-recommends postgresql-client \
-     && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+    && apt-get -y install --no-install-recommends postgresql-client zstd \
+     && apt-get clean -y && rm -rf /var/lib/apt/lists/*
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -29,16 +29,13 @@
 			"extensions": [
 				"ms-python.python",
 				"ms-python.vscode-pylance",
-				"ms-python.vscode-python-envs",
 				"charliermarsh.ruff",
 				"mtxr.sqltools",
 				"mtxr.sqltools-driver-pg",
 				"esbenp.prettier-vscode",
 				"mechatroner.rainbow-csv",
 				"ms-vscode.vscode-node-azure-pack",
-				"esbenp.prettier-vscode",
 				"twixes.pypi-assistant",
-				"ms-python.vscode-python-envs",
 				"teamsdevapp.vscode-ai-foundry",
 				"ms-windows-ai-studio.windows-ai-studio"
 			],

diff --git a/.env.sample b/.env.sample
@@ -5,16 +5,15 @@ POSTGRES_PASSWORD=postgres
 POSTGRES_DATABASE=postgres
 POSTGRES_SSL=disable
 
-# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github:
+# OPENAI_CHAT_HOST can be either azure, openai, or ollama:
 OPENAI_CHAT_HOST=azure
-# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github:
+# OPENAI_EMBED_HOST can be either azure, openai, or ollama:
 OPENAI_EMBED_HOST=azure
 # Needed for Azure:
 # You also need to `azd auth login` if running this locally
 AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com
-AZURE_OPENAI_VERSION=2024-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
-AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini
+AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4
+AZURE_OPENAI_CHAT_MODEL=gpt-5.4
 AZURE_OPENAI_EMBED_DEPLOYMENT=text-embedding-3-large
 AZURE_OPENAI_EMBED_MODEL=text-embedding-3-large
 AZURE_OPENAI_EMBED_DIMENSIONS=1024
@@ -35,9 +34,3 @@ OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1
 OLLAMA_CHAT_MODEL=llama3.1
 OLLAMA_EMBED_MODEL=nomic-embed-text
 OLLAMA_EMBEDDING_COLUMN=embedding_nomic
-# Needed for GitHub Models:
-GITHUB_TOKEN=YOUR-GITHUB-TOKEN
-GITHUB_MODEL=openai/gpt-4o
-GITHUB_EMBED_MODEL=openai/text-embedding-3-large
-GITHUB_EMBED_DIMENSIONS=1024
-GITHUB_EMBEDDING_COLUMN=embedding_3l
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -11,7 +11,7 @@ RAG on PostgreSQL is a Python FastAPI backend with React TypeScript frontend tha
 Install the following tools before beginning development:
 
 - **Python 3.10+** (3.12 recommended)
-- **Node.js 18+** for frontend development  
+- **Node.js 18+** for frontend development
 - **PostgreSQL 14+** with pgvector extension
 - **Azure Developer CLI (azd)** for deployment
 - **Docker Desktop** for dev containers (optional)
@@ -37,7 +37,7 @@ Run these commands in sequence. NEVER CANCEL any long-running commands:
    ```bash
    # Ubuntu/Debian:
    sudo apt update && sudo apt install -y postgresql-16-pgvector
-   
+
    # Start PostgreSQL and set password
    sudo service postgresql start
    sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"
@@ -97,7 +97,7 @@ Use "Frontend & Backend" configuration in the VS Code Run & Debug menu.
 ### Linting and Formatting (ALWAYS run before committing)
 ```bash
 ruff check .          # Lint code (takes <1 second)
-ruff format .          # Format code (takes <1 second)  
+ruff format .          # Format code (takes <1 second)
 mypy . --python-version 3.12  # Type check (takes ~42 seconds)
 ```
 
@@ -121,7 +121,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
 **CRITICAL TIMING INFORMATION** - Set these timeout values and NEVER CANCEL:
 
 - **Dependencies install**: 90 seconds (use 180+ second timeout)
-- **Frontend npm install**: 22 seconds (use 60+ second timeout)  
+- **Frontend npm install**: 22 seconds (use 60+ second timeout)
 - **Frontend build**: 12 seconds (use 30+ second timeout)
 - **MyPy type checking**: 42 seconds (use 90+ second timeout)
 - **Full test suite**: 25 seconds (use 60+ second timeout)
@@ -156,7 +156,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
    # Test API endpoints
    curl http://localhost:8000/items/1
    # Should return JSON with item data
-   
+
    # Test frontend
    curl http://localhost:8000/ | head -n 5
    # Should return HTML with "RAG on PostgreSQL" title
@@ -226,9 +226,6 @@ The application supports multiple OpenAI providers:
 3. **Ollama** (local):
    Set `OPENAI_CHAT_HOST=ollama`
 
-4. **GitHub Models**:
-   Set `OPENAI_CHAT_HOST=github`
-
 ## Common Issues and Solutions
 
 ### Database Connection Issues
@@ -267,7 +264,7 @@ Open `http://localhost:8089/` and point to your running application.
 The application provides these REST API endpoints (view full docs at `http://localhost:8000/docs`):
 
 - `GET /items/{id}` - Get specific item by ID
-- `GET /search` - Search items with text query 
+- `GET /search` - Search items with text query
 - `GET /similar` - Find similar items using vector search
 - `POST /chat` - Chat with RAG system (requires OpenAI configuration)
 - `POST /chat/stream` - Streaming chat responses
@@ -286,7 +283,7 @@ curl "http://localhost:8000/search?query=tent&limit=5"
 **Quick ls -la output for repository root:**
 ```
 .devcontainer/          # Dev container configuration
-.env.sample            # Environment variables template  
+.env.sample            # Environment variables template
 .github/               # GitHub Actions workflows
 .gitignore            # Git ignore patterns
 .pre-commit-config.yaml # Pre-commit hook configuration
@@ -309,8 +306,8 @@ tests/        # Test suite
 - **Always build and test locally before committing**
 - **Use pre-commit hooks** - they run ruff automatically
 - **Check the GitHub Actions** in `.github/workflows/` for CI requirements
-- **Reference the full README.md** for deployment and Azure-specific details  
+- **Reference the full README.md** for deployment and Azure-specific details
 - **Use VS Code with the Python and Ruff extensions** for the best development experience
 - **Never skip the frontend build** - the backend serves static files from `src/backend/static/`
 
-This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
+This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
@@ -27,10 +27,10 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ["ubuntu-latest", "macos-latest-xlarge", "macos-13", "windows-latest"]
+        os: ["ubuntu-latest", "macos-26", "windows-latest"]
         python_version: ["3.10", "3.11", "3.12"]
         exclude:
-          - os: macos-latest-xlarge
+          - os: macos-26
             python_version: "3.10"
     env:
       UV_SYSTEM_PYTHON: 1
@@ -43,12 +43,12 @@ jobs:
         - uses: actions/checkout@v4
 
         - name: (MacOS) Install postgreSQL and pgvector using brew
-          if: matrix.os == 'macos-13' || matrix.os == 'macos-latest-xlarge'
+          if: matrix.os == 'macos-26'
           run: |
-            brew install postgresql@14
-            brew link --overwrite postgresql@14
+            brew install postgresql@18
+            brew link --overwrite postgresql@18
             brew install pgvector
-            brew services start postgresql@14 && sleep 1
+            brew services start postgresql@18 && sleep 1
             createuser -s ${{ env.POSTGRES_USERNAME }}
             psql -d postgres -c "ALTER USER ${{ env.POSTGRES_USERNAME }} WITH PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
             psql -d postgres -c 'CREATE EXTENSION vector'

diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml
@@ -34,7 +34,6 @@ jobs:
       OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }}
       OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
-      AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }}
       AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
       AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }}
       AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }}

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -36,5 +36,6 @@
         "htmlcov": true,
         ".mypy_cache": true,
         ".coverage": true
-    }
+    },
+    "python-envs.defaultEnvManager": "ms-python.python:system"
 }
diff --git a/AGENTS.md b/AGENTS.md
@@ -8,3 +8,19 @@ When adding new azd environment variables, update:
 1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
 1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `webAppEnv` object
 1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section. If it's a @secure variable in main.bicep, it should come from `secrets`, otherwise from `vars`.
+
+## Upgrading Python packages
+
+1. Update the version constraint in src/backend/pyproject.toml
+
+2. Re-compile src/backend/requirements.txt from the src folder:
+
+    ```shell
+    uv pip compile pyproject.toml -o requirements.txt --python-version 3.10
+    ```
+
+3. Reinstall with:
+
+    ```shell
+    python -m pip install -r src/backend/requirements.txt
+    ```
diff --git a/azure.yaml b/azure.yaml
@@ -42,7 +42,6 @@ pipeline:
     - OPENAI_CHAT_HOST
     - OPENAI_EMBED_HOST
     - AZURE_OPENAI_ENDPOINT
-    - AZURE_OPENAI_VERSION
     - AZURE_OPENAI_CHAT_DEPLOYMENT
     - AZURE_OPENAI_CHAT_MODEL
     - AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION

diff --git a/evals/eval_config.json b/evals/eval_config.json
@@ -8,8 +8,7 @@
             "use_advanced_flow": true,
             "top": 3,
             "retrieval_mode": "hybrid",
-            "temperature": 0.3,
-            "seed": 42
+            "temperature": 0.3
         }
     },
     "target_response_answer_jmespath": "message.content",

diff --git a/evals/evaluate.py b/evals/evaluate.py
@@ -68,8 +68,6 @@ def get_openai_config() -> dict:
         openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"]
     elif os.environ.get("OPENAI_CHAT_HOST") == "ollama":
         raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
-    elif os.environ.get("OPENAI_CHAT_HOST") == "github":
-        raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
     else:
         logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
         openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"}

diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py
@@ -3,12 +3,10 @@
 import os
 from collections.abc import Generator
 from pathlib import Path
-from typing import Union
 
 from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
 from dotenv_azd import load_azd_env
-from openai import AzureOpenAI, OpenAI
-from openai.types.chat import ChatCompletionToolParam
+from openai import OpenAI
 from sqlalchemy import create_engine, select
 from sqlalchemy.orm import Session
 
@@ -17,32 +15,30 @@
 logger = logging.getLogger("ragapp")
 
 
-def qa_pairs_tool(num_questions: int = 1) -> ChatCompletionToolParam:
+def qa_pairs_tool(num_questions: int = 1) -> dict:
     return {
         "type": "function",
-        "function": {
-            "name": "qa_pairs",
-            "description": "Send in question and answer pairs for a customer-facing chat app",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "qa_list": {
-                        "type": "array",
-                        "description": f"List of {num_questions} question and answer pairs",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "question": {"type": "string", "description": "The question text"},
-                                "answer": {"type": "string", "description": "The answer text"},
-                            },
-                            "required": ["question", "answer"],
+        "name": "qa_pairs",
+        "description": "Send in question and answer pairs for a customer-facing chat app",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "qa_list": {
+                    "type": "array",
+                    "description": f"List of {num_questions} question and answer pairs",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "question": {"type": "string", "description": "The question text"},
+                            "answer": {"type": "string", "description": "The answer text"},
                         },
-                        "minItems": num_questions,
-                        "maxItems": num_questions,
-                    }
-                },
-                "required": ["qa_list"],
+                        "required": ["question", "answer"],
+                    },
+                    "minItems": num_questions,
+                    "maxItems": num_questions,
+                }
             },
+            "required": ["qa_list"],
         },
     }
 
@@ -67,7 +63,6 @@ def source_retriever() -> Generator[str, None, None]:
         # for record in records:
         #    logger.info(f"Processing database record: {record.name}")
         #    yield f"## Product ID: [{record.id}]\n" + record.to_str_for_rag()
-        # await self.openai_chat_client.chat.completions.create(
 
 
 def source_to_text(source) -> str:
@@ -78,32 +73,29 @@ def answer_formatter(answer, source) -> str:
     return f"{answer} [{source['id']}]"
 
 
-def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]:
+def get_openai_client() -> tuple[OpenAI, str]:
     """Return an OpenAI client based on the environment variables"""
-    openai_client: Union[AzureOpenAI, OpenAI]
+    openai_client: OpenAI
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
     if OPENAI_CHAT_HOST == "azure":
+        azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
             logger.info("Using Azure OpenAI Service with API Key from AZURE_OPENAI_KEY")
-            openai_client = AzureOpenAI(
-                api_version=os.environ["AZURE_OPENAI_VERSION"],
-                azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+            openai_client = OpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
                 api_key=api_key,
             )
         else:
             logger.info("Using Azure OpenAI Service with Azure Developer CLI Credential")
             azure_credential = AzureDeveloperCliCredential(process_timeout=60, tenant_id=os.environ["AZURE_TENANT_ID"])
             token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
-            openai_client = AzureOpenAI(
-                api_version=os.environ["AZURE_OPENAI_VERSION"],
-                azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
-                azure_ad_token_provider=token_provider,
+            openai_client = OpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+                api_key=token_provider,
             )
         model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
     elif OPENAI_CHAT_HOST == "ollama":
         raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
-    elif OPENAI_CHAT_HOST == "github":
-        raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
     else:
         logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
         openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"])
@@ -123,18 +115,21 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc
         if len(qa) > num_questions_total:
             logger.info("Generated enough questions already, stopping")
             break
-        result = openai_client.chat.completions.create(
+        result = openai_client.responses.create(
             model=model,
-            messages=[
+            input=[
                 {"role": "system", "content": generate_prompt},
                 {"role": "user", "content": json.dumps(source)},
             ],
-            tools=[qa_pairs_tool(num_questions=2)],
+            tools=[qa_pairs_tool(num_questions=2)],  # type: ignore[list-item]
+            max_output_tokens=1000,
+            store=False,
         )
-        if not result.choices[0].message.tool_calls:
+        tool_calls = [item for item in result.output if item.type == "function_call"]
+        if not tool_calls:
             logger.warning("No tool calls found in response, skipping")
             continue
-        qa_pairs = json.loads(result.choices[0].message.tool_calls[0].function.arguments)["qa_list"]
+        qa_pairs = json.loads(tool_calls[0].arguments)["qa_list"]
         qa_pairs = [{"question": qa_pair["question"], "truth": qa_pair["answer"]} for qa_pair in qa_pairs]
         qa.extend(qa_pairs)