From 0421bb116cae983b3dc7ce9f88043d98af405dd3 Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Wed, 6 May 2026 19:39:50 +1000 Subject: [PATCH 1/2] fix(claude): pick models from workspace's serving endpoints (GDS-aware) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `setup_claude.py` hardcoded ANTHROPIC_DEFAULT_OPUS_MODEL=opus-4-7 regardless of what the workspace actually serves. On workspaces in geos that don't have opus-4-7 (e.g. AU's adb-7405613340366915.15 serves only opus-4-6 / sonnet-4-6 / sonnet-4-5 / haiku-4-5), every opus-tier call ENDPOINT_NOT_FOUNDs. Adds `utils.discover_serving_endpoints()` to query the workspace's `/api/2.0/serving-endpoints` and return the READY model names. Workspace direct-serving endpoints reflect Databricks Geo Designated Services policy — using this list as the validation oracle gets GDS compliance for free, no policy parsing needed. `setup_claude.py` now picks each tier (opus / sonnet / haiku) by walking a priority chain against the discovered list; falls back to the original env-set default if discovery fails (e.g. workspace unreachable at startup) so behaviour matches main when discovery isn't available. Logs the substitution when it happens. Verified against live daveok (AU geo, no opus-4-7): Active model: databricks-claude-opus-4-6 (was opus-4-7) Opus tier: databricks-claude-opus-4-6 Sonnet tier: databricks-claude-sonnet-4-6 Haiku tier: databricks-claude-haiku-4-5 Setup_codex / setup_hermes / setup_gemini follow the same pattern; filed as follow-up so this PR stays single-agent surgical. Co-authored-by: Isaac --- setup_claude.py | 43 +++++++++++++++++++++++++++++++++++----- utils.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) diff --git a/setup_claude.py b/setup_claude.py index 9815ef5..ab2ed87 100644 --- a/setup_claude.py +++ b/setup_claude.py @@ -4,7 +4,7 @@ import subprocess from pathlib import Path -from utils import ensure_https, get_gateway_host +from utils import discover_serving_endpoints, ensure_https, get_gateway_host, pick_in_geo_model # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -40,13 +40,46 @@ else: settings = {} + # Discover models actually served at this workspace. The direct serving- + # endpoints list reflects Databricks Geo Designated Services policy — a + # workspace in AU only sees in-geo models, etc. Validating env-set defaults + # against this list avoids configuring Claude Code with a model the gateway + # claims to serve but the user's geo can't access. + available = discover_serving_endpoints(databricks_host, token) + if available: + print(f"Discovered {len(available)} READY serving endpoints at workspace") + + requested_model = os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7") + active_model = pick_in_geo_model( + [requested_model, "databricks-claude-opus-4-6", "databricks-claude-sonnet-4-6"], + available, + fallback=requested_model, + ) + opus_model = pick_in_geo_model( + ["databricks-claude-opus-4-7", "databricks-claude-opus-4-6"], + available, + fallback="databricks-claude-opus-4-7", + ) + sonnet_model = pick_in_geo_model( + ["databricks-claude-sonnet-4-6", "databricks-claude-sonnet-4-5"], + available, + fallback="databricks-claude-sonnet-4-6", + ) + haiku_model = pick_in_geo_model( + ["databricks-claude-haiku-4-5"], + available, + fallback="databricks-claude-haiku-4-5", + ) + if available and active_model != requested_model: + print(f"ANTHROPIC_MODEL={requested_model} not served at this workspace, using {active_model}") + settings.setdefault("env", {}) - settings["env"]["ANTHROPIC_MODEL"] = os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7") + settings["env"]["ANTHROPIC_MODEL"] = active_model settings["env"]["ANTHROPIC_BASE_URL"] = anthropic_base_url settings["env"]["ANTHROPIC_AUTH_TOKEN"] = token - settings["env"]["ANTHROPIC_DEFAULT_OPUS_MODEL"] = "databricks-claude-opus-4-7" - settings["env"]["ANTHROPIC_DEFAULT_SONNET_MODEL"] = "databricks-claude-sonnet-4-6" - settings["env"]["ANTHROPIC_DEFAULT_HAIKU_MODEL"] = "databricks-claude-haiku-4-5" + settings["env"]["ANTHROPIC_DEFAULT_OPUS_MODEL"] = opus_model + settings["env"]["ANTHROPIC_DEFAULT_SONNET_MODEL"] = sonnet_model + settings["env"]["ANTHROPIC_DEFAULT_HAIKU_MODEL"] = haiku_model settings["env"]["ANTHROPIC_CUSTOM_HEADERS"] = "x-databricks-use-coding-agent-mode: true" settings["env"]["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1" diff --git a/utils.py b/utils.py index 94237bf..82448f2 100644 --- a/utils.py +++ b/utils.py @@ -2,11 +2,63 @@ from __future__ import annotations +import logging import os import re import subprocess from pathlib import Path +logger = logging.getLogger(__name__) + + +def discover_serving_endpoints(host: str, token: str, timeout: float = 5.0) -> set[str]: + """Return the set of READY serving-endpoint names at the workspace. + + The workspace's direct serving-endpoints list naturally reflects in-geo + model availability — Databricks Geo Designated Services restricts which + models are deployed to each region. Validating an env-set model against + this list is therefore equivalent to "is this model in the workspace's + geo / data-residency policy", without parsing GDS rules ourselves. + + Returns an empty set on any failure (auth error, network blip, JSON parse, + etc.) — caller should treat empty as "discovery unavailable, keep defaults". + """ + if not host or not token: + return set() + try: + import requests + resp = requests.get( + f"{host}/api/2.0/serving-endpoints", + headers={"Authorization": f"Bearer {token}"}, + timeout=timeout, + ) + resp.raise_for_status() + endpoints = resp.json().get("endpoints", []) + return { + ep["name"] + for ep in endpoints + if ep.get("name") and ep.get("state", {}).get("ready") == "READY" + } + except Exception as e: + logger.warning("Could not discover serving endpoints at %s: %s", host, e) + return set() + + +def pick_in_geo_model(preferred: list[str], available: set[str], fallback: str) -> str: + """Pick the highest-priority preferred model that's actually served here. + + `preferred` is the caller's degradation chain (e.g. opus-4-7 → opus-4-6). + Returns the first entry that's in `available`. If none match (or `available` + is empty because discovery failed), returns `fallback` — typically the + original env-set default. The user will see a clean ENDPOINT_NOT_FOUND + later if they actually try to use a missing model, rather than getting + silently downgraded to a different model tier. + """ + for m in preferred: + if m in available: + return m + return fallback + def get_npm_version(package_name): """Resolve the latest stable version of an npm package. From 8eb22eea7955f1de8a32482d4f698f7d4abe121a Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Thu, 7 May 2026 10:22:45 +1000 Subject: [PATCH 2/2] review: move requests import to module level in utils.py per @mpkrass7 --- utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils.py b/utils.py index 82448f2..ad5305a 100644 --- a/utils.py +++ b/utils.py @@ -8,6 +8,8 @@ import subprocess from pathlib import Path +import requests + logger = logging.getLogger(__name__) @@ -26,7 +28,6 @@ def discover_serving_endpoints(host: str, token: str, timeout: float = 5.0) -> s if not host or not token: return set() try: - import requests resp = requests.get( f"{host}/api/2.0/serving-endpoints", headers={"Authorization": f"Bearer {token}"},