codefrydev · PrashantUnity · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/AGENT.md b/AGENT.md
@@ -4,7 +4,7 @@ Developer reference for agents and contributors. User-facing overview: [README.m
 
 **What it is:** `python -m src` from repo root (`src/__main__.py` -> package **`website_profiling`**). Config: stored in **PostgreSQL** (`pipeline_config` table, `key/value/is_unknown/updated_at`). A shadow **`pipeline-config.txt`** is auto-written to `DATA_DIR` on every Save/Run. CLI loads DB first (`DATABASE_URL`), then shadow file; `--config` overrides with a file. Reference keys: `input.txt.example` and `pipeline-config.example.txt` (not auto-loaded).
 
-**LLM / AI:** Settings live in **`llm_config`** table in PostgreSQL. Providers: OpenAI, Google Gemini, Anthropic, Ollama (`web/src/lib/llmConfigSchema.ts`). Configure only via web UI **AI** tab (`GET/PUT /api/llm-config`, localhost). Never in `pipeline-config.txt` or `--config`.
+**LLM / AI:** Settings live in **`llm_config`** table in PostgreSQL. Providers: OpenAI, Google Gemini, Anthropic, Groq, Ollama (`web/src/lib/llmConfigSchema.ts`). Configure only via web UI **AI** tab (`GET/PUT /api/llm-config`, localhost). Never in `pipeline-config.txt` or `--config`.
 
 **Frontend:** **`web/`** (Next.js) -- server reads PostgreSQL via `/api/report/*`.
 

diff --git a/README.md b/README.md
@@ -214,13 +214,14 @@ In Audit settings, set **Crawl rendering** to `javascript` (always headless Chro
 
 ### AI chat (optional)
 
-Ask questions about audit data at [http://localhost:3000/chat](http://localhost:3000/chat). Enable a provider under **Run audit → AI settings** (`llm_enabled`, provider, model). `./local-run setup` installs Python deps from `requirements.txt` (including `httpx`, OpenAI, and Anthropic SDKs; Gemini uses `httpx` via REST).
+Ask questions about audit data at [http://localhost:3000/chat](http://localhost:3000/chat). Enable a provider under **Run audit → AI settings** (`llm_enabled`, provider, model). `./local-run setup` installs Python deps from `requirements.txt` (including `httpx`, OpenAI, Anthropic, and Groq SDKs; Gemini uses `httpx` via REST).
 
 | Provider | Notes |
 |----------|-------|
 | **Ollama** | Local daemon at `http://127.0.0.1:11434`. Chat UI lists installed models plus the live Ollama cloud catalog. Native tool calling when supported; ReAct fallback otherwise. |
 | **OpenAI** / **Anthropic** | API key in AI settings or env (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`); native tool calling with streaming. |
 | **Google Gemini** | API key in AI settings or `GEMINI_API_KEY`; REST via `httpx`. |
+| **Groq** | API key in AI settings or `GROQ_API_KEY`; official Groq Python SDK; native tool calling with streaming. Default model `openai/gpt-oss-120b`. |
 
 The agent uses the same **340 read-only audit tools** as the MCP server ([docs/MCP.md](docs/MCP.md)), with **dynamic routing** (~45 tools per turn). Responses stream over SSE (`POST /api/chat`). Sessions persist per property (`chat_sessions` / `chat_messages`).
 

diff --git a/docs/MCP.md b/docs/MCP.md
@@ -239,6 +239,7 @@ Responses stream over SSE via `POST /api/chat`. Sessions persist per property in
 | **OpenAI** | Native with streaming | API key in AI settings or `OPENAI_API_KEY` |
 | **Anthropic** | Native with streaming | API key in AI settings or `ANTHROPIC_API_KEY` |
 | **Google Gemini** | Native with streaming | API key in AI settings or `GEMINI_API_KEY`; REST via `httpx` |
+| **Groq** | Native with streaming | API key in AI settings or `GROQ_API_KEY`; official Groq Python SDK; default model `openai/gpt-oss-120b` |
 
 ---
 

diff --git a/requirements.txt b/requirements.txt
@@ -37,6 +37,7 @@ playwright==1.60.0
 httpx==0.28.1
 openai==2.41.0
 anthropic==0.107.0
+groq==1.4.0
 
 # Spell-check / HTML validation extras
 pyspellchecker==0.9.0

diff --git a/src/website_profiling/crawl/crawler.py b/src/website_profiling/crawl/crawler.py
@@ -449,8 +449,15 @@ def crawl(
                             continue
                         futures.append(ex.submit(self.worker, url))
 
-                    if futures and self.queue.empty():
+                    can_submit_more = (
+                        not self.queue.empty()
+                        and len(futures) < self.concurrency
+                        and (len(self.results) + len(futures)) < self.max_pages
+                    )
+                    if futures and not can_submit_more:
                         # Block until at least one future completes instead of busy-polling.
+                        # Covers both an empty frontier and a saturated worker pool; wait()
+                        # returns immediately if a future is already done.
                         wait(futures, return_when=FIRST_COMPLETED)
 
                     remaining = []

diff --git a/src/website_profiling/crawl/fetchers/factory.py b/src/website_profiling/crawl/fetchers/factory.py
@@ -29,8 +29,10 @@ def _browser_auth_from_session(
         headers[str(key)] = str(value)
     credentials: Optional[dict[str, str]] = None
     auth = getattr(session, "auth", None)
-    if auth and auth[0]:
-        credentials = {"username": str(auth[0]), "password": str(auth[1] or "")}
+    # requests also allows a callable auth handler; only basic (user, pass) tuples map here.
+    if isinstance(auth, (tuple, list)) and len(auth) >= 1 and auth[0]:
+        password = str(auth[1] or "") if len(auth) > 1 else ""
+        credentials = {"username": str(auth[0]), "password": password}
     return headers, credentials
 
 

diff --git a/src/website_profiling/integrations/google/keyword_enrich.py b/src/website_profiling/integrations/google/keyword_enrich.py
@@ -73,7 +73,8 @@ def _normalize_kw(kw: str) -> str:
 # ── Intent ────────────────────────────────────────────────────────────────────
 
 def classify_intent(kw: str, brand_name: str = "") -> str:
-    if brand_name and brand_name.lower().split()[0] in kw.lower():
+    brand_tokens = brand_name.lower().split()
+    if brand_tokens and brand_tokens[0] in kw.lower():
         return "navigational"
     if QUESTION_STARTS.match(kw):
         return "informational"

diff --git a/src/website_profiling/llm/agent.py b/src/website_profiling/llm/agent.py
@@ -2,22 +2,57 @@
 from __future__ import annotations
 
 import json
+import os
 from typing import Any, Callable
 
 from ..concurrency import map_parallel, tool_concurrency
 from ..llm_config import llm_is_enabled, load_llm_config_from_db
 from ..text_sanitize import sanitize_unicode_deep, strip_surrogates
 from ..tools.audit_tools import AuditToolContext
-from ..tools.audit_tools.registry import TOOL_DEFINITIONS, dispatch_tool, openai_tools_schema
+from ..tools.audit_tools.registry import (
+    TOOL_DEFINITIONS,
+    _normalize_tool_args,
+    dispatch_tool,
+    openai_tools_schema,
+)
 from ..tools.audit_tools.tool_selector import (
     apply_tool_cap,
     chat_tool_mode,
     chat_tool_search_cap,
     select_tools_for_turn,
 )
 from .base import ChatResult, ToolCall, get_llm_client
+from .chat_narrative import ChatNarrativeError, synthesize_chat_narrative
+
+MAX_TOOL_ROUNDS_DEFAULT = 10
+MAX_TOOL_ROUNDS_EXTENDED = 100
+# Back-compat for tests and imports
+MAX_TOOL_ROUNDS = MAX_TOOL_ROUNDS_DEFAULT
+
+
+def _truthy_cfg(cfg: dict[str, str], key: str) -> bool:
+    return str(cfg.get(key, "")).lower() in ("true", "1", "yes")
+
+
+def _max_tool_rounds(cfg: dict[str, str]) -> int:
+    """Resolve per-turn tool loop cap from llm_config and optional env overrides."""
+    if _truthy_cfg(cfg, "llm_chat_unlimited_tool_rounds"):
+        raw = (os.environ.get("CHAT_MAX_TOOL_ROUNDS_EXTENDED") or "").strip()
+        if raw:
+            try:
+                return max(1, int(raw))
+            except ValueError:
+                pass
+        return MAX_TOOL_ROUNDS_EXTENDED
+    raw = (os.environ.get("CHAT_MAX_TOOL_ROUNDS") or "").strip()
+    if raw:
+        try:
+            return max(1, int(raw))
+        except ValueError:
+            pass
+    return MAX_TOOL_ROUNDS_DEFAULT
 
-MAX_TOOL_ROUNDS = 10
+NARRATIVE_FAILED_MSG = "Could not generate a summary. Tool results are shown below."
 
 SYSTEM_PROMPT = """You are Site Audit AI, a technical SEO assistant for a self-hosted site audit platform.
 You help users understand crawl results, audit issues, Lighthouse scores, keywords, and Search Console data.
@@ -31,7 +66,7 @@
 - Use get_data_coverage_report when tools return empty or missing data.
 
 Image playbook:
-- Overview: get_image_audit_summary first — the UI renders summary cards, page preview lists (alt/lazy/OG/dimensions), and Lighthouse image findings. Write only ### Power Insights and ### Recommended actions (interpretation). Never repeat counts, URL lists, or markdown tables of pages.
+- Overview: get_image_audit_summary first — the UI renders summary cards, page preview lists (alt/lazy/OG/dimensions), and Lighthouse image findings. Call tools only; the app generates user-facing narrative separately.
 - Missing alt / lazy / OG / dimensions: get_image_audit_summary includes previews; call list_pages_* only if the user wants the full exportable list
 - All image URLs: list_site_image_urls (optional kind filter)
 - Lighthouse image issues: list_lighthouse_image_opportunities
@@ -63,10 +98,14 @@
 - When citing issues, include the URL when available.
 - The chat UI automatically renders charts, gauges, and tables from tool results. Never tell the user you cannot show graphs or charts, and never send them to other app pages for data you can fetch with tools.
 - For visual or chart requests, always call the appropriate tools first, then give a short interpretation (2–4 sentences) with recommendations.
-- When tools return issue lists, scores, or breakdowns, keep the narrative short. Do not re-list every issue or duplicate data in markdown tables—the UI renders structured blocks from tool data.
-- Use markdown headings and bullets for structure. Do not emit fake chart JSON or custom visualization blocks.
+- When tools return issue lists, scores, or breakdowns, do not re-list them in prose—the UI renders structured blocks from tool data.
+- Do not emit markdown headings, bullet lists, or pipe tables for the user. The app synthesizes the final narrative from tool results.
+- After gathering enough data via tools, stop calling tools. A brief internal acknowledgment is enough; user-facing text is generated separately.
+- Do not repeat health scores, URL counts, success rates, category scores, priority counts, or URL lists when the UI already shows them in cards or tables.
+- Never mention internal tool names (e.g. run_technical_workflow, export_audit_report) in user-facing text.
 - You are read-only: you cannot run crawls or change settings.
-- If data is missing, say what integration or crawl step is needed.
+- Do not pass property_id or report_id in tool calls — they are injected from the active chat property.
+- If data is missing, say what integration or crawl step is needed (briefly; narrative will be expanded separately).
 """
 
 REACT_PROMPT_SUFFIX = """
@@ -111,8 +150,6 @@ def _react_step(
             tool_calls=[ToolCall(id="react-0", name=name, arguments=args)],
         )
     text = str(data.get("text") or data.get("answer") or data.get("content") or "")
-    if on_token and text:
-        on_token(text)
     return ChatResult(content=text)
 
 
@@ -173,6 +210,41 @@ def _build_openai_messages(history: list[dict[str, str]]) -> list[dict[str, Any]
     return out
 
 
+def _finish_with_narrative(
+    cfg: dict[str, str],
+    user_message: str,
+    tool_events: list[dict[str, Any]],
+    on_event: Callable[[dict], None] | None,
+    *,
+    partial_note: str | None = None,
+) -> dict[str, Any]:
+    if partial_note:
+        _emit(on_event, {"type": "partial_done", "message": partial_note})
+
+    def on_status(phase: str) -> None:
+        detail = "Retrying summary…" if phase == "retrying" else "Summarizing insights…"
+        _emit(on_event, {"type": "status", "phase": "synthesizing", "detail": detail})
+
+    try:
+        narrative = synthesize_chat_narrative(
+            cfg,
+            user_message,
+            tool_events,
+            on_status=on_status,
+        )
+    except ChatNarrativeError:
+        _emit(on_event, {"type": "error", "message": NARRATIVE_FAILED_MSG})
+        return {
+            "ok": False,
+            "error": NARRATIVE_FAILED_MSG,
+            "tool_events": tool_events,
+        }
+
+    _emit(on_event, {"type": "narrative", "narrative": narrative})
+    _emit(on_event, {"type": "done"})
+    return {"ok": True, "tool_events": tool_events, "narrative": narrative}
+
+
 def run_agent_turn(
     messages: list[dict[str, str]],
     context: AuditToolContext,
@@ -181,7 +253,7 @@ def run_agent_turn(
 ) -> dict[str, Any]:
     """
     Run the agent loop. Emits NDJSON-style events via on_event.
-    Returns final result dict with ok, message, tool_events.
+    Returns final result dict with ok, tool_events, and narrative on success.
     """
     cfg = load_llm_config_from_db()
     if not llm_is_enabled(cfg):
@@ -199,33 +271,37 @@ def run_agent_turn(
     openai_messages = _build_openai_messages(messages)
     last_user = _last_user_message(messages)
     active_names = select_tools_for_turn(last_user, messages)
-    tools = openai_tools_schema(active_names)
+    tools = openai_tools_schema(active_names, context_scoped=True)
     tool_events: list[dict[str, Any]] = []
-    final_message = ""
+    max_rounds = _max_tool_rounds(cfg)
+    partial_note: str | None = None
 
-    def on_token(text: str) -> None:
-        _emit(on_event, {"type": "token", "text": strip_surrogates(text)})
-
-    for _round in range(MAX_TOOL_ROUNDS):
+    for _round in range(max_rounds):
         _emit(on_event, {
             "type": "status",
             "phase": "model",
-            "detail": f"Thinking (step {_round + 1}/{MAX_TOOL_ROUNDS})…",
+            "detail": f"Thinking (step {_round + 1}/{max_rounds})…",
         })
         try:
             llm_messages = sanitize_unicode_deep(openai_messages)
             if _supports_native_tools(client):
-                result = client.chat_with_tools(llm_messages, tools, on_token=on_token)
+                result = client.chat_with_tools(llm_messages, tools, on_token=None)
             else:
                 result = _react_step(
                     client,
                     llm_messages,
                     _tools_description(names=active_names, compact=True),
-                    on_token,
+                    None,
                 )
         except Exception as e:
             msg = str(e).strip() or type(e).__name__
-            if "httpx" in msg.lower() or "requirements.txt" in msg.lower():
+            if "Connection error" in msg and (cfg.get("llm_provider") or "").strip().lower() == "groq":
+                msg = (
+                    "Could not reach Groq. Check your Groq API key on the Secrets page and "
+                    "that outbound HTTPS to api.groq.com is allowed. "
+                    f"Details: {msg}"
+                )
+            elif "httpx" in msg.lower() or "requirements.txt" in msg.lower():
                 msg = (
                     "LLM dependencies are missing. Run: pip install -r requirements.txt "
                     f"(or restart with ./local-run setup). Details: {msg}"
@@ -282,9 +358,10 @@ def _run_tool(tc: ToolCall) -> dict[str, Any]:
                         "error": f"tool not loaded this turn: {tc.name}",
                         "hint": "Call search_audit_tools to load specialized tools, or rephrase your request.",
                     }
+                tool_args = _normalize_tool_args(tc.arguments)
                 try:
                     return sanitize_unicode_deep(
-                        dispatch_tool(tc.name, tc.arguments, context=context),
+                        dispatch_tool(tc.name, tool_args, context=context),
                     )
                 except Exception as e:  # noqa: BLE001 - isolate one tool's failure from the batch
                     return {"error": str(e).strip() or type(e).__name__}
@@ -313,16 +390,21 @@ def _run_tool(tc: ToolCall) -> dict[str, Any]:
                     })
 
             if gated:
-                tools = openai_tools_schema(active_names)
+                tools = openai_tools_schema(active_names, context_scoped=True)
             continue
 
-        final_message = strip_surrogates(result.content).strip()
-        if final_message:
-            _emit(on_event, {"type": "done", "message": final_message})
-            return {"ok": True, "message": final_message, "tool_events": tool_events}
-
         break
+    else:
+        if tool_events:
+            partial_note = (
+                f"The agent completed {len(tool_events)} tool step(s) but did not finish "
+                "all planned steps. Tool results are preserved below."
+            )
 
-    err = "Agent stopped after maximum tool rounds without a final answer."
-    _emit(on_event, {"type": "error", "message": err})
-    return {"ok": False, "error": err, "tool_events": tool_events}
+    return _finish_with_narrative(
+        cfg,
+        last_user,
+        tool_events,
+        on_event,
+        partial_note=partial_note,
+    )
diff --git a/src/website_profiling/llm/base.py b/src/website_profiling/llm/base.py
@@ -23,6 +23,27 @@ class ChatResult:
 
 TokenCallback = Callable[[str], None]
 
+OLLAMA_DEFAULT_BASES = frozenset({
+    "http://127.0.0.1:11434",
+    "http://localhost:11434",
+})
+
+
+def is_ollama_base_url(url: str) -> bool:
+    """True when llm_base_url points at a local Ollama daemon (not a cloud proxy)."""
+    normalized = (url or "").strip().rstrip("/").lower()
+    if normalized in OLLAMA_DEFAULT_BASES:
+        return True
+    return normalized.endswith(":11434")
+
+
+def optional_cloud_base_url(cfg: dict[str, str]) -> str | None:
+    """Custom OpenAI-compatible base URL; excludes Ollama's local default."""
+    base = (cfg.get("llm_base_url") or "").strip().rstrip("/")
+    if not base or is_ollama_base_url(base):
+        return None
+    return base
+
 
 class LLMClient(Protocol):
     def complete_json(self, system: str, user: str) -> dict[str, Any]: ...
@@ -69,6 +90,10 @@ def get_llm_client(cfg: dict[str, str]) -> LLMClient:
         from .providers.gemini import GeminiClient
 
         return GeminiClient(cfg)
+    if provider == "groq":
+        from .providers.groq import GroqClient
+
+        return GroqClient(cfg)
     if provider == "ollama":
         from .providers.ollama import OllamaClient