From 6397313e886d6660dd7f19a0848d7daa77cd6376 Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Mon, 27 Apr 2026 12:13:44 +1000 Subject: [PATCH 1/3] chore(app): add HERMES_MODEL/HERMES_FALLBACK_MODEL/ENABLE_HERMES to app.yaml Mirrors app.yaml.template so the deployed Databricks App config exposes Hermes settings explicitly in the Apps UI rather than relying on the Python defaults baked into setup_hermes.py. Co-authored-by: Isaac --- app.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app.yaml b/app.yaml index a0f443c..8a73491 100644 --- a/app.yaml +++ b/app.yaml @@ -10,6 +10,13 @@ env: value: databricks-gemini-2-5-pro - name: CODEX_MODEL value: databricks-gpt-5-5 + - name: HERMES_MODEL + value: databricks-claude-opus-4-7 + - name: HERMES_FALLBACK_MODEL + value: databricks-claude-opus-4-6 + # Set ENABLE_HERMES=false to skip Hermes Agent install. Other CLIs are unaffected. + - name: ENABLE_HERMES + value: "true" - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 - name: MAX_CONCURRENT_SESSIONS From cb65d2768aa88997754b9367ad704286bb8f1c50 Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Mon, 27 Apr 2026 13:22:35 +1000 Subject: [PATCH 2/3] fix(hermes): route via DATABRICKS_GATEWAY_HOST so opus-4-7 resolves setup_hermes.py's direct-serving path is `{host}/serving-endpoints` with no API namespace, so the OpenAI-style /chat/completions append 404s on Databricks. The other CLIs work direct because they target namespaced paths (/serving-endpoints/anthropic, /openai, /gemini). Setting DATABRICKS_GATEWAY_HOST routes all five CLIs through the AI Gateway uniformly, matching the recommendation in app.yaml.template. For daveok the gateway is workspace-id 7405611674437990 on Azure. Co-authored-by: Isaac --- app.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app.yaml b/app.yaml index 8a73491..8e1d5ea 100644 --- a/app.yaml +++ b/app.yaml @@ -17,6 +17,13 @@ env: # Set ENABLE_HERMES=false to skip Hermes Agent install. Other CLIs are unaffected. - name: ENABLE_HERMES value: "true" + # Routes all CLIs (Claude, Codex, Gemini, OpenCode, Hermes) through the + # Databricks AI Gateway instead of direct /serving-endpoints. Required for + # Hermes specifically: setup_hermes.py's direct-serving fallback uses + # `{host}/serving-endpoints` with no API namespace, so chat/completions + # 404s. Gateway path (mlflow/v1) handles OpenAI-style requests correctly. + - name: DATABRICKS_GATEWAY_HOST + value: https://7405611674437990.0.ai-gateway.azuredatabricks.net - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 - name: MAX_CONCURRENT_SESSIONS From 5aabef95d80eb3d8788a04cf98172c2252cbebfc Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Mon, 27 Apr 2026 13:29:51 +1000 Subject: [PATCH 3/3] fix(gateway): auto-construct Azure URL + drop daveok-only hardcode utils.py: - get_gateway_host() now derives the workspace ID from DATABRICKS_HOST on Azure (host pattern: adb-{ws}.{region}.azuredatabricks.net) when DATABRICKS_WORKSPACE_ID isn't set. - Builds the cloud-specific gateway URL by checking for azuredatabricks.net in the host (Azure: {ws}.0.ai-gateway.azuredatabricks.net; AWS keeps the existing {ws}.ai-gateway.cloud.databricks.com pattern). - Probe-then-cache logic is unchanged; this just adds two helpers (_derive_workspace_id_from_host, _build_gateway_candidate) and threads DATABRICKS_HOST into the resolution. app.yaml: - Drop the explicit DATABRICKS_GATEWAY_HOST hardcode added in bce6dab. Auto-construction now picks up the right Azure URL from DATABRICKS_HOST, so the env var is redundant on daveok and would be wrong on other workspaces. - HERMES_MODEL: opus-4-7 -> opus-4-6 (only opus-4-6 is on this gateway for OpenAI/MLflow-style routing). Co-authored-by: Isaac --- app.yaml | 9 +-------- utils.py | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/app.yaml b/app.yaml index 8e1d5ea..1a2fbc0 100644 --- a/app.yaml +++ b/app.yaml @@ -11,19 +11,12 @@ env: - name: CODEX_MODEL value: databricks-gpt-5-5 - name: HERMES_MODEL - value: databricks-claude-opus-4-7 + value: databricks-claude-opus-4-6 - name: HERMES_FALLBACK_MODEL value: databricks-claude-opus-4-6 # Set ENABLE_HERMES=false to skip Hermes Agent install. Other CLIs are unaffected. - name: ENABLE_HERMES value: "true" - # Routes all CLIs (Claude, Codex, Gemini, OpenCode, Hermes) through the - # Databricks AI Gateway instead of direct /serving-endpoints. Required for - # Hermes specifically: setup_hermes.py's direct-serving fallback uses - # `{host}/serving-endpoints` with no API namespace, so chat/completions - # 404s. Gateway path (mlflow/v1) handles OpenAI-style requests correctly. - - name: DATABRICKS_GATEWAY_HOST - value: https://7405611674437990.0.ai-gateway.azuredatabricks.net - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 - name: MAX_CONCURRENT_SESSIONS diff --git a/utils.py b/utils.py index 94237bf..1f0318e 100644 --- a/utils.py +++ b/utils.py @@ -81,6 +81,28 @@ def _probe_gateway(url: str, timeout: float = 2.0) -> bool: return False +def _derive_workspace_id_from_host(host: str) -> str: + """Extract the workspace ID from a Databricks host URL. + + Azure host pattern is `adb-{workspace_id}.{region}.azuredatabricks.net`, + so the digits between `adb-` and the first dot are the workspace ID. AWS + hosts don't carry the workspace ID in the URL, so this returns "" there. + """ + m = re.match(r"(?:https?://)?adb-(\d+)\.", host or "") + return m.group(1) if m else "" + + +def _build_gateway_candidate(workspace_id: str, host: str) -> str: + """Build the AI Gateway URL for a workspace, picking the right cloud pattern. + + Azure: `https://{ws}.0.ai-gateway.azuredatabricks.net` + AWS: `https://{ws}.ai-gateway.cloud.databricks.com` + """ + if "azuredatabricks.net" in (host or "").lower(): + return f"https://{workspace_id}.0.ai-gateway.azuredatabricks.net" + return f"https://{workspace_id}.ai-gateway.cloud.databricks.com" + + def get_gateway_host() -> str: """Resolve the AI Gateway host URL. @@ -88,7 +110,11 @@ def get_gateway_host() -> str: 0. _GATEWAY_RESOLVED env var (set by parent process after probing — avoids re-probing in subprocesses). None = never probed, "" = probed, no gateway. 1. Explicit DATABRICKS_GATEWAY_HOST env var (trusted — no probe) - 2. Auto-constructed from DATABRICKS_WORKSPACE_ID (probed for reachability) + 2. Auto-constructed from workspace ID. Workspace ID is read from + DATABRICKS_WORKSPACE_ID, or derived from DATABRICKS_HOST on Azure + (host pattern `adb-{ws}.{region}.azuredatabricks.net`). Cloud-specific + URL pattern is picked based on whether the host is Azure or AWS. + Result is probed for reachability before returning. 3. Empty string (caller falls back to DATABRICKS_HOST/serving-endpoints) """ # Tier 0: already resolved by a parent process @@ -102,9 +128,13 @@ def get_gateway_host() -> str: return ensure_https(explicit) # Tier 2: auto-construct from workspace ID and probe for reachability - workspace_id = os.environ.get("DATABRICKS_WORKSPACE_ID", "").strip() + host = os.environ.get("DATABRICKS_HOST", "") + workspace_id = ( + os.environ.get("DATABRICKS_WORKSPACE_ID", "").strip() + or _derive_workspace_id_from_host(host) + ) if workspace_id: - candidate = f"https://{workspace_id}.ai-gateway.cloud.databricks.com" + candidate = _build_gateway_candidate(workspace_id, host) if _probe_gateway(candidate): return candidate print(