From 6346f0df6837fe762b8b5def7136a54822fbd51a Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Tue, 12 May 2026 22:11:21 +0000
Subject: [PATCH 1/5] fix: address ACE multi-model code review findings

Batch of small fixes surfaced by a parallel GPT 5.4 + Gemini 3.1 Pro
review. None of these are introduced by recent changes; they are
pre-existing issues that ought to land regardless.

databricks-agent-bricks/scripts/mas_manager.py:
- `_delete` now tolerates empty / non-JSON DELETE response bodies
  instead of raising JSONDecodeError on success.
- `add_examples_batch` short-circuits on empty input (ThreadPoolExecutor
  rejects max_workers=0).
- `_build_agent_list` validates `uc_function_name` has 3 dotted parts
  and rejects missing `endpoint_name` with a clear error, instead of
  IndexError / `{"name": null}` on the API.
- `main()` uses a `_parse_json_arg` helper that exits cleanly on
  malformed JSON CLI args, replacing raw `json.loads` tracebacks.

databricks-execution-compute/scripts/compute.py:
- `manage-cluster --action get` now keys off the SDK's `NotFound`
  exception type instead of substring-matching "does not exist" in the
  message. Also returns `success: false` for the DELETED state, so
  callers gating on `success` don't treat a missing cluster as a
  successful lookup.

databricks-app-python/examples/llm_config.py:
- OAuth error no longer interpolates the full token-endpoint payload
  (which can contain `id_token` / refresh material). Logs the present
  key names instead.
- DATABRICKS_MODEL validation error drops the `response.text[:300]`
  echo so server bodies don't end up in operator-visible error text.

databricks-app-python/examples/fm-minimal-chat.py:
- Docstring + `app.yaml` examples reference the actual filename
  (`fm-minimal-chat.py`), not `2-minimal-chat-app.py`.

databricks-app-python/examples/fm-parallel-calls.py:
- Guard `Speedup` division on `total_time > 0` to avoid
  ZeroDivisionError on fast paths.
- Convert the trailing standalone triple-quoted string (dead code) to
  real `#` comments.

databricks-python-sdk/examples/5-serving-and-vector-search.py:
- Replace the `[0.1, 0.2, 0.3, ...]` literal-Ellipsis vector with a
  named placeholder + comment explaining it's a stand-in.

This pull request was AI-assisted by Isaac.

Co-authored-by: Isaac
---
 .../scripts/mas_manager.py                    | 37 +++++++++--
 .../examples/fm-minimal-chat.py               |  4 +-
 .../examples/fm-parallel-calls.py             | 66 +++++++++----------
 .../examples/llm_config.py                    |  7 +-
 .../scripts/compute.py                        |  7 +-
 .../examples/5-serving-and-vector-search.py   |  3 +-
 6 files changed, 78 insertions(+), 46 deletions(-)

diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
index bf1d92ee..adb8c284 100644
--- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
+++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
@@ -233,6 +233,9 @@ def create_example(q: Dict[str, Any]) -> Optional[Dict[str, Any]]:
                 logger.error(f"Failed to add MAS example '{question_text[:50]}...': {e}")
                 return None
 
+        if not questions:
+            return created_examples
+
         max_workers = min(2, len(questions))
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             future_to_q = {executor.submit(create_example, q): q for q in questions}
@@ -285,7 +288,12 @@ def _delete(self, path: str) -> Dict[str, Any]:
         url = f"{self.w.config.host}{path}"
         response = requests.delete(url, headers=headers, timeout=20)
         self._handle_response_error(response, "DELETE", path)
-        return response.json()
+        if not response.content:
+            return {}
+        try:
+            return response.json()
+        except ValueError:
+            return {}
 
 
 # ============================================================================
@@ -321,6 +329,10 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]:
         elif agent.get("uc_function_name"):
             uc_function_name = agent.get("uc_function_name")
             uc_parts = uc_function_name.split(".")
+            if len(uc_parts) != 3:
+                raise ValueError(
+                    f"uc_function_name must be 'catalog.schema.function', got: {uc_function_name!r}"
+                )
             agent_config["agent_type"] = "unity_catalog_function"
             agent_config["unity_catalog_function"] = {
                 "uc_path": {
@@ -333,8 +345,14 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]:
             agent_config["agent_type"] = "external_mcp_server"
             agent_config["external_mcp_server"] = {"connection_name": agent.get("connection_name")}
         else:
+            endpoint_name = agent.get("endpoint_name")
+            if not endpoint_name:
+                raise ValueError(
+                    "agent config must specify one of: genie_space_id, ka_tile_id, "
+                    "uc_function_name, connection_name, or endpoint_name"
+                )
             agent_config["agent_type"] = "serving_endpoint"
-            agent_config["serving_endpoint"] = {"name": agent.get("endpoint_name")}
+            agent_config["serving_endpoint"] = {"name": endpoint_name}
 
         agent_list.append(agent_config)
     return agent_list
@@ -578,6 +596,15 @@ def _print_json(data: Any) -> None:
     print(json.dumps(data, indent=2))
 
 
+def _parse_json_arg(arg: str, label: str) -> Any:
+    """Parse a JSON CLI arg, exiting cleanly on malformed input."""
+    try:
+        return json.loads(arg)
+    except json.JSONDecodeError as e:
+        print(f"error: {label} must be valid JSON ({e})", file=sys.stderr)
+        sys.exit(1)
+
+
 def main():
     """CLI entry point."""
     if len(sys.argv) < 2:
@@ -591,7 +618,7 @@ def main():
             print("Usage: python mas_manager.py create_mas NAME '{\"agents\": [...], ...}'")
             sys.exit(1)
         name = sys.argv[2]
-        config = json.loads(sys.argv[3])
+        config = _parse_json_arg(sys.argv[3], "config")
         result = create_mas(
             name=name,
             agents=config.get("agents", []),
@@ -619,7 +646,7 @@ def main():
             print("Usage: python mas_manager.py update_mas TILE_ID '{\"name\": ..., \"agents\": [...], ...}'")
             sys.exit(1)
         tile_id = sys.argv[2]
-        config = json.loads(sys.argv[3])
+        config = _parse_json_arg(sys.argv[3], "config")
         result = update_mas(
             tile_id=tile_id,
             name=config.get("name"),
@@ -645,7 +672,7 @@ def main():
             print("Usage: python mas_manager.py add_examples TILE_ID '[{\"question\": \"...\", \"guideline\": \"...\"}]' [--wait]")
             sys.exit(1)
         tile_id = sys.argv[2]
-        examples = json.loads(sys.argv[3])
+        examples = _parse_json_arg(sys.argv[3], "examples")
         wait = "--wait" in sys.argv[4:]
         result = add_examples(tile_id, examples, wait_for_online=wait)
         _print_json(result)
diff --git a/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py b/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py
index db9a35d6..920a4051 100644
--- a/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py
+++ b/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py
@@ -16,11 +16,11 @@
     export DATABRICKS_TOKEN="dapi..."
     export DATABRICKS_SERVING_BASE_URL="https://<workspace>/serving-endpoints"
     export DATABRICKS_MODEL="<endpoint-name>"  # See databricks-model-serving
-    streamlit run 2-minimal-chat-app.py
+    streamlit run fm-minimal-chat.py
 
 Databricks Apps Deployment:
     1. Create app.yaml:
-       command: ["streamlit", "run", "2-minimal-chat-app.py"]
+       command: ["streamlit", "run", "fm-minimal-chat.py"]
        env:
          - name: DATABRICKS_SERVING_BASE_URL
            value: "https://<workspace>/serving-endpoints"
diff --git a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
index 53cc6a23..41226196 100644
--- a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
+++ b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
@@ -224,42 +224,42 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]:
     time_saved = (total_latency / 1000) - total_time
     print(f"\n{'='*60}")
     print(f"Time saved vs serial execution: {time_saved:.2f}s")
-    print(f"Speedup: {(total_latency/1000) / total_time:.1f}×")
+    if total_time > 0:
+        print(f"Speedup: {(total_latency/1000) / total_time:.1f}×")
     print(f"{'='*60}")
 
 
 # =============================================================================
 # Production Best Practices
 # =============================================================================
-"""
-Best practices from databricksters-check-and-pub:
-
-1. Configurable concurrency
-   - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app)
-   - Balance throughput vs rate limits
-   - Too high = rate limit errors
-   - Too low = underutilized resources
-
-2. Error handling
-   - Capture exceptions per job
-   - Return None for failed jobs
-   - Collect error messages for debugging
-   - Continue execution even if some jobs fail
-
-3. Bounded execution
-   - Only parallelize independent checks
-   - Cap concurrency with an env var rather than firing unlimited requests
-   - Keep the job contract simple: name -> (callable, args, kwargs)
-
-4. When to use parallel calls
-   - Multiple independent evaluations of same content
-   - Batch processing multiple documents
-   - A/B testing different prompts
-   - Multi-aspect analysis
-
-5. When NOT to use parallel calls
-   - Dependent/sequential operations
-   - Single evaluation needed
-   - Rate limits are very strict
-   - Debugging (use serial for easier troubleshooting)
-"""
+#
+# Best practices from databricksters-check-and-pub:
+#
+# 1. Configurable concurrency
+#    - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app)
+#    - Balance throughput vs rate limits
+#    - Too high = rate limit errors
+#    - Too low = underutilized resources
+#
+# 2. Error handling
+#    - Capture exceptions per job
+#    - Return None for failed jobs
+#    - Collect error messages for debugging
+#    - Continue execution even if some jobs fail
+#
+# 3. Bounded execution
+#    - Only parallelize independent checks
+#    - Cap concurrency with an env var rather than firing unlimited requests
+#    - Keep the job contract simple: name -> (callable, args, kwargs)
+#
+# 4. When to use parallel calls
+#    - Multiple independent evaluations of same content
+#    - Batch processing multiple documents
+#    - A/B testing different prompts
+#    - Multi-aspect analysis
+#
+# 5. When NOT to use parallel calls
+#    - Dependent/sequential operations
+#    - Single evaluation needed
+#    - Rate limits are very strict
+#    - Debugging (use serial for easier troubleshooting)
diff --git a/databricks-skills/databricks-apps-python/examples/llm_config.py b/databricks-skills/databricks-apps-python/examples/llm_config.py
index 6c4d5509..200aaef9 100644
--- a/databricks-skills/databricks-apps-python/examples/llm_config.py
+++ b/databricks-skills/databricks-apps-python/examples/llm_config.py
@@ -218,8 +218,10 @@ def get_databricks_bearer_token(
         access_token = payload.get("access_token")
         expires_in = int(payload.get("expires_in", 300))
         if not access_token:
+            payload_keys = sorted(payload.keys()) if isinstance(payload, dict) else []
             raise DatabricksLLMConfigError(
-                f"Token endpoint response is missing access_token: {payload}"
+                "Token endpoint response is missing access_token "
+                f"(keys present: {payload_keys})"
             )
 
         expires_at = int(time.time()) + expires_in
@@ -278,8 +280,7 @@ def validate_databricks_llm_config(
     if response.status_code >= 400:
         raise DatabricksLLMConfigError(
             f"Failed to validate DATABRICKS_MODEL={config.model!r} in workspace "
-            f"{config.workspace_host} (HTTP {response.status_code}). "
-            f"Response: {response.text[:300]}"
+            f"{config.workspace_host} (HTTP {response.status_code})."
         )
 
     _validation_cache[cache_key] = int(time.time()) + VALIDATION_TTL_SECONDS
diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py
index e90a4ac7..49c66f77 100644
--- a/databricks-skills/databricks-execution-compute/scripts/compute.py
+++ b/databricks-skills/databricks-execution-compute/scripts/compute.py
@@ -20,6 +20,7 @@
 from typing import Any, Dict, List, Optional
 
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import DatabricksError, NotFound
 from databricks.sdk.service.compute import (
     ClusterSource,
     CommandStatus,
@@ -673,9 +674,11 @@ def cmd_manage_cluster(args):
             return {"success": False, "error": "cluster_id is required for get action."}
         try:
             return get_cluster_status(cluster_id)
+        except NotFound:
+            return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False}
+        except DatabricksError as e:
+            return {"success": False, "error": str(e)}
         except Exception as e:
-            if "does not exist" in str(e).lower():
-                return {"success": True, "cluster_id": cluster_id, "state": "DELETED", "exists": False}
             return {"success": False, "error": str(e)}
 
     else:
diff --git a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
index 2a47c2b0..70b87116 100644
--- a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
+++ b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
@@ -168,10 +168,11 @@
 
 
 # Query with embedding vector directly
+query_vector = [0.1, 0.2, 0.3]  # Replace with your real embedding (list of floats matching the index's dimension)
 results = w.vector_search_indexes.query_index(
     index_name="main.default.my_index",
     columns=["id", "text"],
-    query_vector=[0.1, 0.2, 0.3, ...],  # Your embedding vector
+    query_vector=query_vector,
     num_results=10
 )
 

From dd92cbf8d9c53695e5145eca9fe8a62aacb1b812 Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Fri, 15 May 2026 11:07:45 +0000
Subject: [PATCH 2/5] fix: round-2 review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to the ACE multi-model self-review on this branch.

mas_manager.py
- _build_agent_list: reject UC function names with empty / whitespace-only
  segments (e.g. "catalog..fn"). The previous len==3 check let these through
  and they reached the API as schema="".
- main(): catch ValueError from _build_agent_list so malformed agent configs
  exit cleanly via stderr instead of dumping a raw traceback — consistent
  with the JSON-arg handling _parse_json_arg already provides.

compute.py
- get_cluster_status: add success=True to the happy-path return so the
  manage-cluster get contract is coherent (success absent on success vs.
  success=False on missing/error was confusing).
- cmd_manage_cluster: drop the redundant DatabricksError clause (identical
  body to the generic Exception fallback) and the now-unused import.

.tests/test_agent_bricks_manager.py
- Drop the stale `add_examples_queued` import (function does not exist),
  which was causing the entire test module to fail collection.
- Fix sys.path to point at databricks-agent-bricks/scripts (not the parent),
  so mas_manager imports actually resolve.
- Add TestBuildAgentListValidation, TestAddExamplesBatch, and
  TestDeleteResponseHandling covering every new error path / contract
  change introduced in the previous commit on this branch.

Co-authored-by: Isaac
---
 .../.tests/test_agent_bricks_manager.py       | 93 ++++++++++++++++++-
 .../scripts/mas_manager.py                    | 15 ++-
 .../scripts/compute.py                        |  5 +-
 3 files changed, 105 insertions(+), 8 deletions(-)

diff --git a/databricks-skills/.tests/test_agent_bricks_manager.py b/databricks-skills/.tests/test_agent_bricks_manager.py
index d6c7d62f..f9958054 100644
--- a/databricks-skills/.tests/test_agent_bricks_manager.py
+++ b/databricks-skills/.tests/test_agent_bricks_manager.py
@@ -13,7 +13,7 @@
 
 # Add the skills directory to the path
 SKILLS_DIR = Path(__file__).parent.parent
-sys.path.insert(0, str(SKILLS_DIR / "databricks-agent-bricks"))
+sys.path.insert(0, str(SKILLS_DIR / "databricks-agent-bricks" / "scripts"))
 
 from mas_manager import (
     create_mas,
@@ -23,9 +23,9 @@
     delete_mas,
     list_mas,
     add_examples,
-    add_examples_queued,
     list_examples,
     _build_agent_list,
+    MASManager,
 )
 
 
@@ -116,6 +116,95 @@ def test_build_multiple_agents(self, sample_agent_config, sample_genie_agent):
         assert result[1]["agent_type"] == "genie"
 
 
+class TestBuildAgentListValidation:
+    """Negative tests for _build_agent_list input validation."""
+
+    def test_rejects_uc_function_name_with_two_parts(self):
+        with pytest.raises(ValueError, match="catalog.schema.function"):
+            _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog.schema"}])
+
+    def test_rejects_uc_function_name_with_four_parts(self):
+        with pytest.raises(ValueError, match="catalog.schema.function"):
+            _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "a.b.c.d"}])
+
+    def test_rejects_uc_function_name_with_empty_segment(self):
+        """`catalog..fn` has three parts but middle is empty — must be rejected."""
+        with pytest.raises(ValueError, match="non-empty parts"):
+            _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog..fn"}])
+
+    def test_rejects_uc_function_name_with_whitespace_only_segment(self):
+        with pytest.raises(ValueError, match="non-empty parts"):
+            _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog.  .fn"}])
+
+    def test_rejects_agent_with_no_identifier(self):
+        """No genie/ka/uc/connection/endpoint -> must raise, not silently send name=None."""
+        with pytest.raises(ValueError, match="must specify one of"):
+            _build_agent_list([{"name": "x", "description": "y"}])
+
+    def test_rejects_agent_with_empty_endpoint_name(self):
+        with pytest.raises(ValueError, match="must specify one of"):
+            _build_agent_list([{"name": "x", "description": "y", "endpoint_name": ""}])
+
+
+class TestAddExamplesBatch:
+    """Unit tests for MASManager.add_examples_batch — no Databricks connection needed."""
+
+    def test_empty_list_returns_empty(self):
+        """Empty input must short-circuit (ThreadPoolExecutor rejects max_workers=0)."""
+        manager = MASManager.__new__(MASManager)  # skip __init__ (would need WorkspaceClient)
+        assert manager.add_examples_batch("tile-id", []) == []
+
+
+class TestDeleteResponseHandling:
+    """Unit tests for MASManager._delete — tolerate empty / non-JSON DELETE bodies."""
+
+    @pytest.fixture
+    def manager_with_fake_workspace(self):
+        from unittest.mock import MagicMock
+
+        class _Cfg:
+            host = "https://example.cloud.databricks.com"
+
+            def authenticate(self):
+                return {"Authorization": "Bearer x"}
+
+        class _W:
+            config = _Cfg()
+
+        manager = MASManager.__new__(MASManager)
+        manager.w = _W()
+        return manager
+
+    def test_empty_body_returns_empty_dict(self, manager_with_fake_workspace, monkeypatch):
+        """204 No Content / empty body must not raise JSONDecodeError."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 204
+        fake_resp.content = b""
+        fake_resp.text = ""
+        monkeypatch.setattr(mas_manager_module.requests, "delete", lambda *a, **kw: fake_resp)
+
+        assert manager_with_fake_workspace._delete("/api/test") == {}
+
+    def test_non_json_body_returns_empty_dict(self, manager_with_fake_workspace, monkeypatch):
+        """Some endpoints return plain text on success — must not raise."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 200
+        fake_resp.content = b"OK"
+        fake_resp.text = "OK"
+        fake_resp.json.side_effect = ValueError("not JSON")
+        monkeypatch.setattr(mas_manager_module.requests, "delete", lambda *a, **kw: fake_resp)
+
+        assert manager_with_fake_workspace._delete("/api/test") == {}
+
+
 @pytest.mark.integration
 class TestMASLifecycle:
     """Integration tests for MAS CRUD operations.
diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
index adb8c284..8f251961 100644
--- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
+++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
@@ -328,10 +328,11 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]:
             agent_config["serving_endpoint"] = {"name": f"ka-{tile_id_prefix}-endpoint"}
         elif agent.get("uc_function_name"):
             uc_function_name = agent.get("uc_function_name")
-            uc_parts = uc_function_name.split(".")
-            if len(uc_parts) != 3:
+            uc_parts = [p.strip() for p in uc_function_name.split(".")]
+            if len(uc_parts) != 3 or not all(uc_parts):
                 raise ValueError(
-                    f"uc_function_name must be 'catalog.schema.function', got: {uc_function_name!r}"
+                    f"uc_function_name must be 'catalog.schema.function' with non-empty parts, "
+                    f"got: {uc_function_name!r}"
                 )
             agent_config["agent_type"] = "unity_catalog_function"
             agent_config["unity_catalog_function"] = {
@@ -613,6 +614,14 @@ def main():
 
     command = sys.argv[1]
 
+    try:
+        _dispatch(command)
+    except ValueError as e:
+        print(f"error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def _dispatch(command: str) -> None:
     if command == "create_mas":
         if len(sys.argv) < 4:
             print("Usage: python mas_manager.py create_mas NAME '{\"agents\": [...], ...}'")
diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py
index 49c66f77..e81521d4 100644
--- a/databricks-skills/databricks-execution-compute/scripts/compute.py
+++ b/databricks-skills/databricks-execution-compute/scripts/compute.py
@@ -20,7 +20,7 @@
 from typing import Any, Dict, List, Optional
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors import DatabricksError, NotFound
+from databricks.sdk.errors import NotFound
 from databricks.sdk.service.compute import (
     ClusterSource,
     CommandStatus,
@@ -192,6 +192,7 @@ def get_cluster_status(cluster_id: str) -> Dict[str, Any]:
     w = get_workspace_client()
     c = w.clusters.get(cluster_id=cluster_id)
     return {
+        "success": True,
         "cluster_id": c.cluster_id,
         "cluster_name": c.cluster_name,
         "state": c.state.value if c.state else "UNKNOWN",
@@ -676,8 +677,6 @@ def cmd_manage_cluster(args):
             return get_cluster_status(cluster_id)
         except NotFound:
             return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False}
-        except DatabricksError as e:
-            return {"success": False, "error": str(e)}
         except Exception as e:
             return {"success": False, "error": str(e)}
 

From 9423425155d020b2d4cbca33f331ce18809b9008 Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Fri, 15 May 2026 11:15:04 +0000
Subject: [PATCH 3/5] =?UTF-8?q?fix:=20round-3=20review=20findings=20?=
 =?UTF-8?q?=E2=80=94=20neighboring=20consistency=20+=20polish?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sweep of issues neighboring the round-1/2 fixes that were called out in
the round-3 review.

mas_manager.py
- Introduce typed MASNotFound exception. _handle_response_error now raises
  it on HTTP 404 so MASManager.get() can branch on existence cleanly
  instead of substring-matching "does not exist" / "not found" in the
  error message (the same anti-pattern the round-1 fix removed from
  compute.py).
- Factor empty-body / non-JSON tolerance from _delete into a shared
  _safe_json helper and apply it symmetrically to _get / _post / _patch.
  Eliminates a class of latent JSONDecodeError on empty 2xx bodies.

compute.py
- cmd_list_compute's clusters resource path also called get_cluster_status
  with no try/except, so a stale --cluster-id raw-tracebacked. Apply the
  same typed-NotFound handling cmd_manage_cluster already uses.

fm-parallel-calls.py
- When total_time is below resolution, print "Speedup: N/A
  (total_time below resolution)" instead of silently omitting the line.

5-serving-and-vector-search.py
- Replace the dim-3 [0.1, 0.2, 0.3] placeholder with [0.0] * 768 and a
  comment listing common embedding dimensions (768 / 1024 / 1536). The
  earlier value was small enough to mislead copy-pasters into thinking
  any short list works.

.tests/test_agent_bricks_manager.py
- Add TestResponseErrorHandling covering the 4 new behaviors: 404 raises
  MASNotFound, MASManager.get() returns None on 404, non-404 errors still
  propagate, and _post tolerates empty success bodies.

Note: nothing under databricks-skills/.tests/ is currently wired into
CI (.github/workflows/ci.yml only lints databricks-tools-core,
databricks-mcp-server, .test/src). The tests run via local pytest only.
Separate concern, separate PR.

Co-authored-by: Isaac
---
 .../.tests/test_agent_bricks_manager.py       | 82 +++++++++++++++++++
 .../scripts/mas_manager.py                    | 58 ++++++++-----
 .../examples/fm-parallel-calls.py             |  2 +
 .../scripts/compute.py                        |  7 +-
 .../examples/5-serving-and-vector-search.py   |  6 +-
 5 files changed, 131 insertions(+), 24 deletions(-)

diff --git a/databricks-skills/.tests/test_agent_bricks_manager.py b/databricks-skills/.tests/test_agent_bricks_manager.py
index f9958054..fa8e334d 100644
--- a/databricks-skills/.tests/test_agent_bricks_manager.py
+++ b/databricks-skills/.tests/test_agent_bricks_manager.py
@@ -26,6 +26,7 @@
     list_examples,
     _build_agent_list,
     MASManager,
+    MASNotFound,
 )
 
 
@@ -155,6 +156,87 @@ def test_empty_list_returns_empty(self):
         assert manager.add_examples_batch("tile-id", []) == []
 
 
+class TestResponseErrorHandling:
+    """Unit tests for typed-404 (MASNotFound) and empty-body handling."""
+
+    @pytest.fixture
+    def manager_with_fake_workspace(self):
+        class _Cfg:
+            host = "https://example.cloud.databricks.com"
+
+            def authenticate(self):
+                return {"Authorization": "Bearer x"}
+
+        class _W:
+            config = _Cfg()
+
+        manager = MASManager.__new__(MASManager)
+        manager.w = _W()
+        return manager
+
+    def test_404_raises_MASNotFound_not_generic_exception(self, manager_with_fake_workspace, monkeypatch):
+        """A 404 response must raise MASNotFound so callers can branch on existence cleanly."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 404
+        fake_resp.content = b'{"message":"tile does not exist"}'
+        fake_resp.text = '{"message":"tile does not exist"}'
+        fake_resp.json.return_value = {"message": "tile does not exist"}
+        monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp)
+
+        with pytest.raises(MASNotFound):
+            manager_with_fake_workspace._get("/api/2.0/multi-agent-supervisors/missing")
+
+    def test_get_method_returns_None_on_404(self, manager_with_fake_workspace, monkeypatch):
+        """MASManager.get() should swallow MASNotFound and return None."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 404
+        fake_resp.content = b'{"message":"x"}'
+        fake_resp.text = '{"message":"x"}'
+        fake_resp.json.return_value = {"message": "x"}
+        monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp)
+
+        assert manager_with_fake_workspace.get("missing-tile") is None
+
+    def test_get_method_reraises_non_404(self, manager_with_fake_workspace, monkeypatch):
+        """Non-404 errors must still propagate from MASManager.get()."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 500
+        fake_resp.content = b'{"message":"boom"}'
+        fake_resp.text = '{"message":"boom"}'
+        fake_resp.json.return_value = {"message": "boom"}
+        monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp)
+
+        with pytest.raises(Exception) as exc_info:
+            manager_with_fake_workspace.get("any-tile")
+        assert not isinstance(exc_info.value, MASNotFound)
+
+    def test_post_handles_empty_body(self, manager_with_fake_workspace, monkeypatch):
+        """_post must tolerate empty success responses (symmetric with _delete)."""
+        from unittest.mock import MagicMock
+
+        import mas_manager as mas_manager_module
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 200
+        fake_resp.content = b""
+        fake_resp.text = ""
+        monkeypatch.setattr(mas_manager_module.requests, "post", lambda *a, **kw: fake_resp)
+
+        assert manager_with_fake_workspace._post("/api/test", {"x": 1}) == {}
+
+
 class TestDeleteResponseHandling:
     """Unit tests for MASManager._delete — tolerate empty / non-JSON DELETE bodies."""
 
diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
index 8f251961..e7ff8e10 100644
--- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
+++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py
@@ -65,6 +65,10 @@ class MASIds:
     name: str
 
 
+class MASNotFound(Exception):
+    """Raised when a MAS resource returns HTTP 404."""
+
+
 # ============================================================================
 # MAS Manager Class
 # ============================================================================
@@ -116,13 +120,11 @@ def create(
         return self._post("/api/2.0/multi-agent-supervisors", payload)
 
     def get(self, tile_id: str) -> Optional[Dict[str, Any]]:
-        """Get MAS by tile_id."""
+        """Get MAS by tile_id. Returns None if the tile does not exist."""
         try:
             return self._get(f"/api/2.0/multi-agent-supervisors/{tile_id}")
-        except Exception as e:
-            if "does not exist" in str(e).lower() or "not found" in str(e).lower():
-                return None
-            raise
+        except MASNotFound:
+            return None
 
     def update(
         self,
@@ -251,21 +253,38 @@ def create_example(q: Dict[str, Any]) -> Optional[Dict[str, Any]]:
     # ========================================================================
 
     def _handle_response_error(self, response: requests.Response, method: str, path: str) -> None:
-        """Extract detailed error from response and raise."""
-        if response.status_code >= 400:
-            try:
-                error_data = response.json()
-                error_msg = error_data.get("message", error_data.get("error", str(error_data)))
-                raise Exception(f"{method} {path} failed: {error_msg}")
-            except ValueError:
-                raise Exception(f"{method} {path} failed with status {response.status_code}: {response.text}")
+        """Extract detailed error from response and raise.
+
+        Raises MASNotFound on 404 so callers can branch on existence cleanly,
+        instead of substring-matching error messages.
+        """
+        if response.status_code < 400:
+            return
+        try:
+            error_data = response.json()
+            error_msg = error_data.get("message", error_data.get("error", str(error_data)))
+        except ValueError:
+            error_msg = f"status {response.status_code}: {response.text}"
+        if response.status_code == 404:
+            raise MASNotFound(f"{method} {path} not found: {error_msg}")
+        raise Exception(f"{method} {path} failed: {error_msg}")
+
+    @staticmethod
+    def _safe_json(response: requests.Response) -> Dict[str, Any]:
+        """Return parsed JSON, or {} if the body is empty / non-JSON."""
+        if not response.content:
+            return {}
+        try:
+            return response.json()
+        except ValueError:
+            return {}
 
     def _get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         headers = self.w.config.authenticate()
         url = f"{self.w.config.host}{path}"
         response = requests.get(url, headers=headers, params=params or {}, timeout=20)
         self._handle_response_error(response, "GET", path)
-        return response.json()
+        return self._safe_json(response)
 
     def _post(self, path: str, body: Dict[str, Any], timeout: int = 300) -> Dict[str, Any]:
         headers = self.w.config.authenticate()
@@ -273,7 +292,7 @@ def _post(self, path: str, body: Dict[str, Any], timeout: int = 300) -> Dict[str
         url = f"{self.w.config.host}{path}"
         response = requests.post(url, headers=headers, json=body, timeout=timeout)
         self._handle_response_error(response, "POST", path)
-        return response.json()
+        return self._safe_json(response)
 
     def _patch(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
         headers = self.w.config.authenticate()
@@ -281,19 +300,14 @@ def _patch(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
         url = f"{self.w.config.host}{path}"
         response = requests.patch(url, headers=headers, json=body, timeout=20)
         self._handle_response_error(response, "PATCH", path)
-        return response.json()
+        return self._safe_json(response)
 
     def _delete(self, path: str) -> Dict[str, Any]:
         headers = self.w.config.authenticate()
         url = f"{self.w.config.host}{path}"
         response = requests.delete(url, headers=headers, timeout=20)
         self._handle_response_error(response, "DELETE", path)
-        if not response.content:
-            return {}
-        try:
-            return response.json()
-        except ValueError:
-            return {}
+        return self._safe_json(response)
 
 
 # ============================================================================
diff --git a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
index 41226196..71cd81b6 100644
--- a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
+++ b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py
@@ -226,6 +226,8 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]:
     print(f"Time saved vs serial execution: {time_saved:.2f}s")
     if total_time > 0:
         print(f"Speedup: {(total_latency/1000) / total_time:.1f}×")
+    else:
+        print("Speedup: N/A (total_time below resolution)")
     print(f"{'='*60}")
 
 
diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py
index e81521d4..91e3413c 100644
--- a/databricks-skills/databricks-execution-compute/scripts/compute.py
+++ b/databricks-skills/databricks-execution-compute/scripts/compute.py
@@ -621,7 +621,12 @@ def cmd_list_compute(args):
 
     if resource == "clusters":
         if cluster_id:
-            return get_cluster_status(cluster_id)
+            try:
+                return get_cluster_status(cluster_id)
+            except NotFound:
+                return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False}
+            except Exception as e:
+                return {"success": False, "error": str(e)}
         if auto_select:
             try:
                 best = get_best_cluster()
diff --git a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
index 70b87116..597aedee 100644
--- a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
+++ b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py
@@ -168,7 +168,11 @@
 
 
 # Query with embedding vector directly
-query_vector = [0.1, 0.2, 0.3]  # Replace with your real embedding (list of floats matching the index's dimension)
+# query_vector must be a list[float] whose length matches your index's
+# embedding dimension (e.g. 768 for bge-small, 1024 for bge-large, 1536 for
+# text-embedding-3-small / ada-002). The [0.0] * N below is a stand-in;
+# replace with the actual vector returned by your embedding model.
+query_vector = [0.0] * 768
 results = w.vector_search_indexes.query_index(
     index_name="main.default.my_index",
     columns=["id", "text"],

From 9d06ca049406a64492d26354986ca2753ccad3c5 Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Fri, 15 May 2026 14:15:01 +0000
Subject: [PATCH 4/5] ci: wire databricks-skills unit tests + add experimental
 to triggers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-3 review surfaced that nothing under databricks-skills/.tests/ was
being run by CI — which is why the stale add_examples_queued import
(collection failure) had gone unnoticed in this branch's parent.

Add a skills-unit-tests job that runs the .tests/ tree via uvx with
pytest + databricks-sdk + requests, deselecting @pytest.mark.integration
(those need a real Databricks workspace).

Also add experimental to the pull_request branches list so PRs targeting
experimental — like this one — trigger CI at all. Previously CI only
fired on PRs to main, leaving experimental-branch work entirely
unvalidated.

Local trial run: 18 unit tests pass, 3 integration tests correctly
deselected, ~25s wall-clock including dependency install.

Co-authored-by: Isaac
---
 .github/workflows/ci.yml | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9d88b308..df159d28 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,12 +2,30 @@ name: CI
 
 on:
   pull_request:
-    branches: [main]
+    branches: [main, experimental]
 
 permissions:
   contents: read
 
 jobs:
+  skills-unit-tests:
+    name: databricks-skills unit tests
+    runs-on: linux-ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+
+      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
+        with:
+          version: "0.11.4"
+
+      - name: Run databricks-skills unit tests
+        run: |
+          uvx --python 3.11 \
+            --with pytest \
+            --with databricks-sdk \
+            --with requests \
+            pytest databricks-skills/.tests/ -m "not integration" -v
+
   lint:
     name: Lint & Format
     runs-on: linux-ubuntu-latest

From 4b0bf260a96cdb98dbb7b8aee21ad2f49144d0f9 Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Fri, 15 May 2026 14:33:44 +0000
Subject: [PATCH 5/5] fix: drop dead .tests/test_genie_conversation.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The script this targets — databricks-genie/scripts/conversation.py —
was removed in 8fd7f31 ("Replace Genie conversation.py script with pure
CLI flow"). The test file's `from conversation import ...` has been
unimportable ever since, but nothing was running tests in this tree
(see ci.yml until two commits ago), so the breakage went unnoticed.

With the new skills-unit-tests CI job pointed at databricks-skills/.tests/,
this file's collection error halts the whole run before any real test
executes. Removing it.
Co-authored-by: Isaac
---
 .../.tests/test_genie_conversation.py         | 204 ------------------
 1 file changed, 204 deletions(-)
 delete mode 100644 databricks-skills/.tests/test_genie_conversation.py

diff --git a/databricks-skills/.tests/test_genie_conversation.py b/databricks-skills/.tests/test_genie_conversation.py
deleted file mode 100644
index 0ada389f..00000000
--- a/databricks-skills/.tests/test_genie_conversation.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""
-Integration tests for databricks-genie/scripts/conversation.py
-
-Tests the Genie Conversation API CLI interface.
-Requires databricks.sdk for Genie Space operations.
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-# Add the skills directory to the path
-SKILLS_DIR = Path(__file__).parent.parent
-sys.path.insert(0, str(SKILLS_DIR / "databricks-genie"))
-
-from conversation import ask_genie, _print_json
-
-
-class TestAskGenieFunction:
-    """Tests for the ask_genie function structure and error handling."""
-
-    def test_ask_genie_returns_dict(self):
-        """Should return a dictionary result."""
-        # Test with a mock to verify return structure
-        with patch("conversation.WorkspaceClient") as mock_client:
-            # Setup mock
-            mock_response = MagicMock()
-            mock_response.conversation_id = "conv-123"
-            mock_response.message_id = "msg-456"
-
-            mock_message = MagicMock()
-            mock_message.status = MagicMock()
-            mock_message.status.value = "COMPLETED"
-            mock_message.attachments = []
-            mock_message.query_result = None
-
-            mock_instance = mock_client.return_value
-            mock_instance.genie.start_conversation_and_wait.return_value = mock_response
-            mock_instance.genie.get_message.return_value = mock_message
-
-            result = ask_genie(
-                space_id="test-space",
-                question="Test question",
-                timeout_seconds=5,
-            )
-
-            assert isinstance(result, dict)
-            assert "question" in result
-            assert "conversation_id" in result
-            assert "message_id" in result
-            assert "status" in result
-
-    def test_ask_genie_with_conversation_id(self):
-        """Should pass conversation_id for follow-up questions."""
-        with patch("conversation.WorkspaceClient") as mock_client:
-            mock_response = MagicMock()
-            mock_response.conversation_id = "conv-123"
-            mock_response.message_id = "msg-456"
-
-            mock_message = MagicMock()
-            mock_message.status = MagicMock()
-            mock_message.status.value = "COMPLETED"
-            mock_message.attachments = []
-            mock_message.query_result = None
-
-            mock_instance = mock_client.return_value
-            mock_instance.genie.start_conversation_and_wait.return_value = mock_response
-            mock_instance.genie.get_message.return_value = mock_message
-
-            result = ask_genie(
-                space_id="test-space",
-                question="Follow-up question",
-                conversation_id="existing-conv-id",
-                timeout_seconds=5,
-            )
-
-            # Verify the conversation_id was passed
-            call_args = mock_instance.genie.start_conversation_and_wait.call_args
-            assert call_args.kwargs.get("conversation_id") == "existing-conv-id"
-
-    def test_ask_genie_handles_timeout(self):
-        """Should return timeout status when query exceeds timeout."""
-        with patch("conversation.WorkspaceClient") as mock_client:
-            mock_response = MagicMock()
-            mock_response.conversation_id = "conv-123"
-            mock_response.message_id = "msg-456"
-
-            mock_message = MagicMock()
-            mock_message.status = MagicMock()
-            mock_message.status.value = "EXECUTING_QUERY"  # Never completes
-            mock_message.attachments = []
-
-            mock_instance = mock_client.return_value
-            mock_instance.genie.start_conversation_and_wait.return_value = mock_response
-            mock_instance.genie.get_message.return_value = mock_message
-
-            # Very short timeout to trigger timeout path
-            result = ask_genie(
-                space_id="test-space",
-                question="Test question",
-                timeout_seconds=0.1,  # Will timeout immediately
-            )
-
-            assert result["status"] == "TIMEOUT"
-            assert "error" in result
-
-    def test_ask_genie_handles_failure(self):
-        """Should return failure status when query fails."""
-        with patch("conversation.WorkspaceClient") as mock_client:
-            mock_response = MagicMock()
-            mock_response.conversation_id = "conv-123"
-            mock_response.message_id = "msg-456"
-
-            mock_message = MagicMock()
-            mock_message.status = MagicMock()
-            mock_message.status.value = "FAILED"
-            mock_message.attachments = []
-
-            mock_instance = mock_client.return_value
-            mock_instance.genie.start_conversation_and_wait.return_value = mock_response
-            mock_instance.genie.get_message.return_value = mock_message
-
-            result = ask_genie(
-                space_id="test-space",
-                question="Test question",
-                timeout_seconds=5,
-            )
-
-            assert result["status"] == "FAILED"
-
-
-class TestPrintJson:
-    """Tests for the _print_json helper function."""
-
-    def test_print_json_dict(self, capsys):
-        """Should print dict as formatted JSON."""
-        _print_json({"key": "value", "number": 42})
-        captured = capsys.readouterr()
-        assert '"key": "value"' in captured.out
-        assert '"number": 42' in captured.out
-
-    def test_print_json_list(self, capsys):
-        """Should print list as formatted JSON."""
-        _print_json([1, 2, 3])
-        captured = capsys.readouterr()
-        assert "1" in captured.out
-        assert "2" in captured.out
-        assert "3" in captured.out
-
-
-@pytest.mark.integration
-class TestGenieConversationIntegration:
-    """Integration tests for Genie Conversation API.
-
-    Note: These tests require a Databricks workspace with Genie enabled
-    and a valid Genie Space ID configured via environment variable.
-    """
-
-    @pytest.fixture
-    def genie_space_id(self):
-        """Get Genie Space ID from environment."""
-        space_id = os.environ.get("TEST_GENIE_SPACE_ID")
-        if not space_id:
-            pytest.skip("TEST_GENIE_SPACE_ID not set - skipping Genie integration tests")
-        return space_id
-
-    def test_ask_genie_simple_question(self, workspace_client, genie_space_id):
-        """Should be able to ask a simple question to Genie."""
-        result = ask_genie(
-            space_id=genie_space_id,
-            question="How many rows are in the table?",
-            timeout_seconds=120,
-        )
-
-        # Should return a valid result
-        assert result["conversation_id"] is not None
-        assert result["status"] in ["COMPLETED", "FAILED", "TIMEOUT"]
-
-    def test_ask_genie_follow_up(self, workspace_client, genie_space_id):
-        """Should be able to ask follow-up questions."""
-        # First question
-        result1 = ask_genie(
-            space_id=genie_space_id,
-            question="Show me the first 5 rows",
-            timeout_seconds=120,
-        )
-
-        if result1["status"] != "COMPLETED":
-            pytest.skip("First query did not complete - skipping follow-up test")
-
-        # Follow-up question
-        result2 = ask_genie(
-            space_id=genie_space_id,
-            question="Now show me the count",
-            conversation_id=result1["conversation_id"],
-            timeout_seconds=120,
-        )
-
-        # Should use same conversation
-        assert result2["conversation_id"] == result1["conversation_id"]