From a898c150d50910883dfec9e06b8b3d70d9ec0b1f Mon Sep 17 00:00:00 2001
From: Dave Page <dpage@pgadmin.org>
Date: Tue, 10 Mar 2026 12:56:27 +0000
Subject: [PATCH] Fixed an issue where AI Reports fail with OpenAI models that
 do not support the temperature parameter. #9719

Removed the temperature parameter from all LLM provider clients and
pipeline calls, allowing each model to use its default. This fixes
compatibility with GPT-5-mini/nano and future models that don't
support user-configurable temperature.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docs/en_US/release_notes_9_14.rst      |  1 +
 web/pgadmin/llm/client.py              |  2 --
 web/pgadmin/llm/providers/anthropic.py |  5 -----
 web/pgadmin/llm/providers/docker.py    |  3 ---
 web/pgadmin/llm/providers/ollama.py    |  3 ---
 web/pgadmin/llm/providers/openai.py    |  3 ---
 web/pgadmin/llm/reports/pipeline.py    | 16 +++++-----------
 7 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/docs/en_US/release_notes_9_14.rst b/docs/en_US/release_notes_9_14.rst
index f841dcd5cd5..e96e2d7b6f3 100644
--- a/docs/en_US/release_notes_9_14.rst
+++ b/docs/en_US/release_notes_9_14.rst
@@ -30,4 +30,5 @@ Bug fixes
 
   | `Issue #9279 <https://github.com/pgadmin-org/pgadmin4/issues/9279>`_ -  Fixed an issue where OAuth2 authentication fails with 'object has no attribute' if OAUTH2_AUTO_CREATE_USER is False.
   | `Issue #9392 <https://github.com/pgadmin-org/pgadmin4/issues/9392>`_ -  Ensure that the Geometry Viewer refreshes when re-running queries or switching geometry columns, preventing stale data from being displayed.
+  | `Issue #9719 <https://github.com/pgadmin-org/pgadmin4/issues/9719>`_ -  Fixed an issue where AI Reports fail with OpenAI models that do not support the temperature parameter.
   | `Issue #9721 <https://github.com/pgadmin-org/pgadmin4/issues/9721>`_ -  Fixed an issue where permissions page is not completely accessible on full scroll.
diff --git a/web/pgadmin/llm/client.py b/web/pgadmin/llm/client.py
index 5a4f114e6d7..6339942ca4b 100644
--- a/web/pgadmin/llm/client.py
+++ b/web/pgadmin/llm/client.py
@@ -54,7 +54,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -65,7 +64,6 @@ def chat(
             tools: Optional list of tools the LLM can use.
             system_prompt: Optional system prompt to set context.
             max_tokens: Maximum tokens in the response.
-            temperature: Sampling temperature (0.0 = deterministic).
             **kwargs: Additional provider-specific parameters.
 
         Returns:
diff --git a/web/pgadmin/llm/providers/anthropic.py b/web/pgadmin/llm/providers/anthropic.py
index d2e6d4af4bd..f730b21e389 100644
--- a/web/pgadmin/llm/providers/anthropic.py
+++ b/web/pgadmin/llm/providers/anthropic.py
@@ -77,7 +77,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -88,7 +87,6 @@ def chat(
             tools: Optional list of tools Claude can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -107,9 +105,6 @@ def chat(
         if system_prompt:
             payload['system'] = system_prompt
 
-        if temperature > 0:
-            payload['temperature'] = temperature
-
         if tools:
             payload['tools'] = self._convert_tools(tools)
 
diff --git a/web/pgadmin/llm/providers/docker.py b/web/pgadmin/llm/providers/docker.py
index 2d65a21a46c..4fa6ccda2cb 100644
--- a/web/pgadmin/llm/providers/docker.py
+++ b/web/pgadmin/llm/providers/docker.py
@@ -83,7 +83,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -94,7 +93,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -117,7 +115,6 @@ def chat(
             'model': self._model,
             'messages': converted_messages,
             'max_completion_tokens': max_tokens,
-            'temperature': temperature
         }
 
         if tools:
diff --git a/web/pgadmin/llm/providers/ollama.py b/web/pgadmin/llm/providers/ollama.py
index ad683109f72..8d38b72facd 100644
--- a/web/pgadmin/llm/providers/ollama.py
+++ b/web/pgadmin/llm/providers/ollama.py
@@ -81,7 +81,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -92,7 +91,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response (num_predict in Ollama).
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -117,7 +115,6 @@ def chat(
             'stream': False,
             'options': {
                 'num_predict': max_tokens,
-                'temperature': temperature
             }
         }
 
diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py
index 3e7c169af1e..4bae71c821e 100644
--- a/web/pgadmin/llm/providers/openai.py
+++ b/web/pgadmin/llm/providers/openai.py
@@ -77,7 +77,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -88,7 +87,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -111,7 +109,6 @@ def chat(
             'model': self._model,
             'messages': converted_messages,
             'max_completion_tokens': max_tokens,
-            'temperature': temperature
         }
 
         if tools:
diff --git a/web/pgadmin/llm/reports/pipeline.py b/web/pgadmin/llm/reports/pipeline.py
index 0a963438d6a..81c60bd8417 100644
--- a/web/pgadmin/llm/reports/pipeline.py
+++ b/web/pgadmin/llm/reports/pipeline.py
@@ -218,8 +218,7 @@ def _planning_stage(self, context: dict) -> list[str]:
             response = self._call_llm_with_retry(
                 messages=[Message.user(user_prompt)],
                 system_prompt=PLANNING_SYSTEM_PROMPT,
-                max_tokens=500,
-                temperature=0.0
+                max_tokens=500
             )
 
             # Parse JSON response
@@ -292,8 +291,7 @@ def _analyze_section_with_retry(
                 response = self.client.chat(
                     messages=[Message.user(user_prompt)],
                     system_prompt=SECTION_ANALYSIS_SYSTEM_PROMPT,
-                    max_tokens=1500,
-                    temperature=0.3
+                    max_tokens=1500
                 )
 
                 # Determine severity from content
@@ -374,8 +372,7 @@ def _synthesize_with_retry(
                 response = self.client.chat(
                     messages=[Message.user(user_prompt)],
                     system_prompt=SYNTHESIS_SYSTEM_PROMPT,
-                    max_tokens=4096,
-                    temperature=0.3
+                    max_tokens=4096
                 )
 
                 yield {'type': 'result', 'result': response.content}
@@ -408,8 +405,7 @@ def _call_llm_with_retry(
         self,
         messages: list[Message],
         system_prompt: str,
-        max_tokens: int = 4096,
-        temperature: float = 0.3
+        max_tokens: int = 4096
     ):
         """Call LLM with exponential backoff retry.
 
@@ -417,7 +413,6 @@ def _call_llm_with_retry(
             messages: Messages to send.
             system_prompt: System prompt.
             max_tokens: Maximum response tokens.
-            temperature: Sampling temperature.
 
         Returns:
             LLMResponse from the client.
@@ -430,8 +425,7 @@ def _call_llm_with_retry(
                 return self.client.chat(
                     messages=messages,
                     system_prompt=system_prompt,
-                    max_tokens=max_tokens,
-                    temperature=temperature
+                    max_tokens=max_tokens
                 )
             except LLMClientError as e:
                 if e.error.retryable and attempt < self.max_retries - 1: