From 6346f0df6837fe762b8b5def7136a54822fbd51a Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Tue, 12 May 2026 22:11:21 +0000 Subject: [PATCH 1/5] fix: address ACE multi-model code review findings Batch of small fixes surfaced by a parallel GPT 5.4 + Gemini 3.1 Pro review. None of these are introduced by recent changes; they are pre-existing issues that ought to land regardless. databricks-agent-bricks/scripts/mas_manager.py: - `_delete` now tolerates empty / non-JSON DELETE response bodies instead of raising JSONDecodeError on success. - `add_examples_batch` short-circuits on empty input (ThreadPoolExecutor rejects max_workers=0). - `_build_agent_list` validates `uc_function_name` has 3 dotted parts and rejects missing `endpoint_name` with a clear error, instead of IndexError / `{"name": null}` on the API. - `main()` uses a `_parse_json_arg` helper that exits cleanly on malformed JSON CLI args, replacing raw `json.loads` tracebacks. databricks-execution-compute/scripts/compute.py: - `manage-cluster --action get` now keys off the SDK's `NotFound` exception type instead of substring-matching "does not exist" in the message. Also returns `success: false` for the DELETED state, so callers gating on `success` don't treat a missing cluster as a successful lookup. databricks-app-python/examples/llm_config.py: - OAuth error no longer interpolates the full token-endpoint payload (which can contain `id_token` / refresh material). Logs the present key names instead. - DATABRICKS_MODEL validation error drops the `response.text[:300]` echo so server bodies don't end up in operator-visible error text. databricks-app-python/examples/fm-minimal-chat.py: - Docstring + `app.yaml` examples reference the actual filename (`fm-minimal-chat.py`), not `2-minimal-chat-app.py`. databricks-app-python/examples/fm-parallel-calls.py: - Guard `Speedup` division on `total_time > 0` to avoid ZeroDivisionError on fast paths. - Convert the trailing standalone triple-quoted string (dead code) to real `#` comments. databricks-python-sdk/examples/5-serving-and-vector-search.py: - Replace the `[0.1, 0.2, 0.3, ...]` literal-Ellipsis vector with a named placeholder + comment explaining it's a stand-in. This pull request was AI-assisted by Isaac. Co-authored-by: Isaac --- .../scripts/mas_manager.py | 37 +++++++++-- .../examples/fm-minimal-chat.py | 4 +- .../examples/fm-parallel-calls.py | 66 +++++++++---------- .../examples/llm_config.py | 7 +- .../scripts/compute.py | 7 +- .../examples/5-serving-and-vector-search.py | 3 +- 6 files changed, 78 insertions(+), 46 deletions(-) diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py index bf1d92ee..adb8c284 100644 --- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py +++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py @@ -233,6 +233,9 @@ def create_example(q: Dict[str, Any]) -> Optional[Dict[str, Any]]: logger.error(f"Failed to add MAS example '{question_text[:50]}...': {e}") return None + if not questions: + return created_examples + max_workers = min(2, len(questions)) with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_q = {executor.submit(create_example, q): q for q in questions} @@ -285,7 +288,12 @@ def _delete(self, path: str) -> Dict[str, Any]: url = f"{self.w.config.host}{path}" response = requests.delete(url, headers=headers, timeout=20) self._handle_response_error(response, "DELETE", path) - return response.json() + if not response.content: + return {} + try: + return response.json() + except ValueError: + return {} # ============================================================================ @@ -321,6 +329,10 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]: elif agent.get("uc_function_name"): uc_function_name = agent.get("uc_function_name") uc_parts = uc_function_name.split(".") + if len(uc_parts) != 3: + raise ValueError( + f"uc_function_name must be 'catalog.schema.function', got: {uc_function_name!r}" + ) agent_config["agent_type"] = "unity_catalog_function" agent_config["unity_catalog_function"] = { "uc_path": { @@ -333,8 +345,14 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]: agent_config["agent_type"] = "external_mcp_server" agent_config["external_mcp_server"] = {"connection_name": agent.get("connection_name")} else: + endpoint_name = agent.get("endpoint_name") + if not endpoint_name: + raise ValueError( + "agent config must specify one of: genie_space_id, ka_tile_id, " + "uc_function_name, connection_name, or endpoint_name" + ) agent_config["agent_type"] = "serving_endpoint" - agent_config["serving_endpoint"] = {"name": agent.get("endpoint_name")} + agent_config["serving_endpoint"] = {"name": endpoint_name} agent_list.append(agent_config) return agent_list @@ -578,6 +596,15 @@ def _print_json(data: Any) -> None: print(json.dumps(data, indent=2)) +def _parse_json_arg(arg: str, label: str) -> Any: + """Parse a JSON CLI arg, exiting cleanly on malformed input.""" + try: + return json.loads(arg) + except json.JSONDecodeError as e: + print(f"error: {label} must be valid JSON ({e})", file=sys.stderr) + sys.exit(1) + + def main(): """CLI entry point.""" if len(sys.argv) < 2: @@ -591,7 +618,7 @@ def main(): print("Usage: python mas_manager.py create_mas NAME '{\"agents\": [...], ...}'") sys.exit(1) name = sys.argv[2] - config = json.loads(sys.argv[3]) + config = _parse_json_arg(sys.argv[3], "config") result = create_mas( name=name, agents=config.get("agents", []), @@ -619,7 +646,7 @@ def main(): print("Usage: python mas_manager.py update_mas TILE_ID '{\"name\": ..., \"agents\": [...], ...}'") sys.exit(1) tile_id = sys.argv[2] - config = json.loads(sys.argv[3]) + config = _parse_json_arg(sys.argv[3], "config") result = update_mas( tile_id=tile_id, name=config.get("name"), @@ -645,7 +672,7 @@ def main(): print("Usage: python mas_manager.py add_examples TILE_ID '[{\"question\": \"...\", \"guideline\": \"...\"}]' [--wait]") sys.exit(1) tile_id = sys.argv[2] - examples = json.loads(sys.argv[3]) + examples = _parse_json_arg(sys.argv[3], "examples") wait = "--wait" in sys.argv[4:] result = add_examples(tile_id, examples, wait_for_online=wait) _print_json(result) diff --git a/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py b/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py index db9a35d6..920a4051 100644 --- a/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py +++ b/databricks-skills/databricks-apps-python/examples/fm-minimal-chat.py @@ -16,11 +16,11 @@ export DATABRICKS_TOKEN="dapi..." export DATABRICKS_SERVING_BASE_URL="https:///serving-endpoints" export DATABRICKS_MODEL="" # See databricks-model-serving - streamlit run 2-minimal-chat-app.py + streamlit run fm-minimal-chat.py Databricks Apps Deployment: 1. Create app.yaml: - command: ["streamlit", "run", "2-minimal-chat-app.py"] + command: ["streamlit", "run", "fm-minimal-chat.py"] env: - name: DATABRICKS_SERVING_BASE_URL value: "https:///serving-endpoints" diff --git a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py index 53cc6a23..41226196 100644 --- a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py +++ b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py @@ -224,42 +224,42 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]: time_saved = (total_latency / 1000) - total_time print(f"\n{'='*60}") print(f"Time saved vs serial execution: {time_saved:.2f}s") - print(f"Speedup: {(total_latency/1000) / total_time:.1f}×") + if total_time > 0: + print(f"Speedup: {(total_latency/1000) / total_time:.1f}×") print(f"{'='*60}") # ============================================================================= # Production Best Practices # ============================================================================= -""" -Best practices from databricksters-check-and-pub: - -1. Configurable concurrency - - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app) - - Balance throughput vs rate limits - - Too high = rate limit errors - - Too low = underutilized resources - -2. Error handling - - Capture exceptions per job - - Return None for failed jobs - - Collect error messages for debugging - - Continue execution even if some jobs fail - -3. Bounded execution - - Only parallelize independent checks - - Cap concurrency with an env var rather than firing unlimited requests - - Keep the job contract simple: name -> (callable, args, kwargs) - -4. When to use parallel calls - - Multiple independent evaluations of same content - - Batch processing multiple documents - - A/B testing different prompts - - Multi-aspect analysis - -5. When NOT to use parallel calls - - Dependent/sequential operations - - Single evaluation needed - - Rate limits are very strict - - Debugging (use serial for easier troubleshooting) -""" +# +# Best practices from databricksters-check-and-pub: +# +# 1. Configurable concurrency +# - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app) +# - Balance throughput vs rate limits +# - Too high = rate limit errors +# - Too low = underutilized resources +# +# 2. Error handling +# - Capture exceptions per job +# - Return None for failed jobs +# - Collect error messages for debugging +# - Continue execution even if some jobs fail +# +# 3. Bounded execution +# - Only parallelize independent checks +# - Cap concurrency with an env var rather than firing unlimited requests +# - Keep the job contract simple: name -> (callable, args, kwargs) +# +# 4. When to use parallel calls +# - Multiple independent evaluations of same content +# - Batch processing multiple documents +# - A/B testing different prompts +# - Multi-aspect analysis +# +# 5. When NOT to use parallel calls +# - Dependent/sequential operations +# - Single evaluation needed +# - Rate limits are very strict +# - Debugging (use serial for easier troubleshooting) diff --git a/databricks-skills/databricks-apps-python/examples/llm_config.py b/databricks-skills/databricks-apps-python/examples/llm_config.py index 6c4d5509..200aaef9 100644 --- a/databricks-skills/databricks-apps-python/examples/llm_config.py +++ b/databricks-skills/databricks-apps-python/examples/llm_config.py @@ -218,8 +218,10 @@ def get_databricks_bearer_token( access_token = payload.get("access_token") expires_in = int(payload.get("expires_in", 300)) if not access_token: + payload_keys = sorted(payload.keys()) if isinstance(payload, dict) else [] raise DatabricksLLMConfigError( - f"Token endpoint response is missing access_token: {payload}" + "Token endpoint response is missing access_token " + f"(keys present: {payload_keys})" ) expires_at = int(time.time()) + expires_in @@ -278,8 +280,7 @@ def validate_databricks_llm_config( if response.status_code >= 400: raise DatabricksLLMConfigError( f"Failed to validate DATABRICKS_MODEL={config.model!r} in workspace " - f"{config.workspace_host} (HTTP {response.status_code}). " - f"Response: {response.text[:300]}" + f"{config.workspace_host} (HTTP {response.status_code})." ) _validation_cache[cache_key] = int(time.time()) + VALIDATION_TTL_SECONDS diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py index e90a4ac7..49c66f77 100644 --- a/databricks-skills/databricks-execution-compute/scripts/compute.py +++ b/databricks-skills/databricks-execution-compute/scripts/compute.py @@ -20,6 +20,7 @@ from typing import Any, Dict, List, Optional from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import DatabricksError, NotFound from databricks.sdk.service.compute import ( ClusterSource, CommandStatus, @@ -673,9 +674,11 @@ def cmd_manage_cluster(args): return {"success": False, "error": "cluster_id is required for get action."} try: return get_cluster_status(cluster_id) + except NotFound: + return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False} + except DatabricksError as e: + return {"success": False, "error": str(e)} except Exception as e: - if "does not exist" in str(e).lower(): - return {"success": True, "cluster_id": cluster_id, "state": "DELETED", "exists": False} return {"success": False, "error": str(e)} else: diff --git a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py index 2a47c2b0..70b87116 100644 --- a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py +++ b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py @@ -168,10 +168,11 @@ # Query with embedding vector directly +query_vector = [0.1, 0.2, 0.3] # Replace with your real embedding (list of floats matching the index's dimension) results = w.vector_search_indexes.query_index( index_name="main.default.my_index", columns=["id", "text"], - query_vector=[0.1, 0.2, 0.3, ...], # Your embedding vector + query_vector=query_vector, num_results=10 ) From dd92cbf8d9c53695e5145eca9fe8a62aacb1b812 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Fri, 15 May 2026 11:07:45 +0000 Subject: [PATCH 2/5] fix: round-2 review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the ACE multi-model self-review on this branch. mas_manager.py - _build_agent_list: reject UC function names with empty / whitespace-only segments (e.g. "catalog..fn"). The previous len==3 check let these through and they reached the API as schema="". - main(): catch ValueError from _build_agent_list so malformed agent configs exit cleanly via stderr instead of dumping a raw traceback — consistent with the JSON-arg handling _parse_json_arg already provides. compute.py - get_cluster_status: add success=True to the happy-path return so the manage-cluster get contract is coherent (success absent on success vs. success=False on missing/error was confusing). - cmd_manage_cluster: drop the redundant DatabricksError clause (identical body to the generic Exception fallback) and the now-unused import. .tests/test_agent_bricks_manager.py - Drop the stale `add_examples_queued` import (function does not exist), which was causing the entire test module to fail collection. - Fix sys.path to point at databricks-agent-bricks/scripts (not the parent), so mas_manager imports actually resolve. - Add TestBuildAgentListValidation, TestAddExamplesBatch, and TestDeleteResponseHandling covering every new error path / contract change introduced in the previous commit on this branch. Co-authored-by: Isaac --- .../.tests/test_agent_bricks_manager.py | 93 ++++++++++++++++++- .../scripts/mas_manager.py | 15 ++- .../scripts/compute.py | 5 +- 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/databricks-skills/.tests/test_agent_bricks_manager.py b/databricks-skills/.tests/test_agent_bricks_manager.py index d6c7d62f..f9958054 100644 --- a/databricks-skills/.tests/test_agent_bricks_manager.py +++ b/databricks-skills/.tests/test_agent_bricks_manager.py @@ -13,7 +13,7 @@ # Add the skills directory to the path SKILLS_DIR = Path(__file__).parent.parent -sys.path.insert(0, str(SKILLS_DIR / "databricks-agent-bricks")) +sys.path.insert(0, str(SKILLS_DIR / "databricks-agent-bricks" / "scripts")) from mas_manager import ( create_mas, @@ -23,9 +23,9 @@ delete_mas, list_mas, add_examples, - add_examples_queued, list_examples, _build_agent_list, + MASManager, ) @@ -116,6 +116,95 @@ def test_build_multiple_agents(self, sample_agent_config, sample_genie_agent): assert result[1]["agent_type"] == "genie" +class TestBuildAgentListValidation: + """Negative tests for _build_agent_list input validation.""" + + def test_rejects_uc_function_name_with_two_parts(self): + with pytest.raises(ValueError, match="catalog.schema.function"): + _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog.schema"}]) + + def test_rejects_uc_function_name_with_four_parts(self): + with pytest.raises(ValueError, match="catalog.schema.function"): + _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "a.b.c.d"}]) + + def test_rejects_uc_function_name_with_empty_segment(self): + """`catalog..fn` has three parts but middle is empty — must be rejected.""" + with pytest.raises(ValueError, match="non-empty parts"): + _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog..fn"}]) + + def test_rejects_uc_function_name_with_whitespace_only_segment(self): + with pytest.raises(ValueError, match="non-empty parts"): + _build_agent_list([{"name": "x", "description": "y", "uc_function_name": "catalog. .fn"}]) + + def test_rejects_agent_with_no_identifier(self): + """No genie/ka/uc/connection/endpoint -> must raise, not silently send name=None.""" + with pytest.raises(ValueError, match="must specify one of"): + _build_agent_list([{"name": "x", "description": "y"}]) + + def test_rejects_agent_with_empty_endpoint_name(self): + with pytest.raises(ValueError, match="must specify one of"): + _build_agent_list([{"name": "x", "description": "y", "endpoint_name": ""}]) + + +class TestAddExamplesBatch: + """Unit tests for MASManager.add_examples_batch — no Databricks connection needed.""" + + def test_empty_list_returns_empty(self): + """Empty input must short-circuit (ThreadPoolExecutor rejects max_workers=0).""" + manager = MASManager.__new__(MASManager) # skip __init__ (would need WorkspaceClient) + assert manager.add_examples_batch("tile-id", []) == [] + + +class TestDeleteResponseHandling: + """Unit tests for MASManager._delete — tolerate empty / non-JSON DELETE bodies.""" + + @pytest.fixture + def manager_with_fake_workspace(self): + from unittest.mock import MagicMock + + class _Cfg: + host = "https://example.cloud.databricks.com" + + def authenticate(self): + return {"Authorization": "Bearer x"} + + class _W: + config = _Cfg() + + manager = MASManager.__new__(MASManager) + manager.w = _W() + return manager + + def test_empty_body_returns_empty_dict(self, manager_with_fake_workspace, monkeypatch): + """204 No Content / empty body must not raise JSONDecodeError.""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 204 + fake_resp.content = b"" + fake_resp.text = "" + monkeypatch.setattr(mas_manager_module.requests, "delete", lambda *a, **kw: fake_resp) + + assert manager_with_fake_workspace._delete("/api/test") == {} + + def test_non_json_body_returns_empty_dict(self, manager_with_fake_workspace, monkeypatch): + """Some endpoints return plain text on success — must not raise.""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 200 + fake_resp.content = b"OK" + fake_resp.text = "OK" + fake_resp.json.side_effect = ValueError("not JSON") + monkeypatch.setattr(mas_manager_module.requests, "delete", lambda *a, **kw: fake_resp) + + assert manager_with_fake_workspace._delete("/api/test") == {} + + @pytest.mark.integration class TestMASLifecycle: """Integration tests for MAS CRUD operations. diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py index adb8c284..8f251961 100644 --- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py +++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py @@ -328,10 +328,11 @@ def _build_agent_list(agents: List[Dict[str, str]]) -> List[Dict[str, Any]]: agent_config["serving_endpoint"] = {"name": f"ka-{tile_id_prefix}-endpoint"} elif agent.get("uc_function_name"): uc_function_name = agent.get("uc_function_name") - uc_parts = uc_function_name.split(".") - if len(uc_parts) != 3: + uc_parts = [p.strip() for p in uc_function_name.split(".")] + if len(uc_parts) != 3 or not all(uc_parts): raise ValueError( - f"uc_function_name must be 'catalog.schema.function', got: {uc_function_name!r}" + f"uc_function_name must be 'catalog.schema.function' with non-empty parts, " + f"got: {uc_function_name!r}" ) agent_config["agent_type"] = "unity_catalog_function" agent_config["unity_catalog_function"] = { @@ -613,6 +614,14 @@ def main(): command = sys.argv[1] + try: + _dispatch(command) + except ValueError as e: + print(f"error: {e}", file=sys.stderr) + sys.exit(1) + + +def _dispatch(command: str) -> None: if command == "create_mas": if len(sys.argv) < 4: print("Usage: python mas_manager.py create_mas NAME '{\"agents\": [...], ...}'") diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py index 49c66f77..e81521d4 100644 --- a/databricks-skills/databricks-execution-compute/scripts/compute.py +++ b/databricks-skills/databricks-execution-compute/scripts/compute.py @@ -20,7 +20,7 @@ from typing import Any, Dict, List, Optional from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import DatabricksError, NotFound +from databricks.sdk.errors import NotFound from databricks.sdk.service.compute import ( ClusterSource, CommandStatus, @@ -192,6 +192,7 @@ def get_cluster_status(cluster_id: str) -> Dict[str, Any]: w = get_workspace_client() c = w.clusters.get(cluster_id=cluster_id) return { + "success": True, "cluster_id": c.cluster_id, "cluster_name": c.cluster_name, "state": c.state.value if c.state else "UNKNOWN", @@ -676,8 +677,6 @@ def cmd_manage_cluster(args): return get_cluster_status(cluster_id) except NotFound: return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False} - except DatabricksError as e: - return {"success": False, "error": str(e)} except Exception as e: return {"success": False, "error": str(e)} From 9423425155d020b2d4cbca33f331ce18809b9008 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Fri, 15 May 2026 11:15:04 +0000 Subject: [PATCH 3/5] =?UTF-8?q?fix:=20round-3=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20neighboring=20consistency=20+=20polish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweep of issues neighboring the round-1/2 fixes that were called out in the round-3 review. mas_manager.py - Introduce typed MASNotFound exception. _handle_response_error now raises it on HTTP 404 so MASManager.get() can branch on existence cleanly instead of substring-matching "does not exist" / "not found" in the error message (the same anti-pattern the round-1 fix removed from compute.py). - Factor empty-body / non-JSON tolerance from _delete into a shared _safe_json helper and apply it symmetrically to _get / _post / _patch. Eliminates a class of latent JSONDecodeError on empty 2xx bodies. compute.py - cmd_list_compute's clusters resource path also called get_cluster_status with no try/except, so a stale --cluster-id raw-tracebacked. Apply the same typed-NotFound handling cmd_manage_cluster already uses. fm-parallel-calls.py - When total_time is below resolution, print "Speedup: N/A (total_time below resolution)" instead of silently omitting the line. 5-serving-and-vector-search.py - Replace the dim-3 [0.1, 0.2, 0.3] placeholder with [0.0] * 768 and a comment listing common embedding dimensions (768 / 1024 / 1536). The earlier value was small enough to mislead copy-pasters into thinking any short list works. .tests/test_agent_bricks_manager.py - Add TestResponseErrorHandling covering the 4 new behaviors: 404 raises MASNotFound, MASManager.get() returns None on 404, non-404 errors still propagate, and _post tolerates empty success bodies. Note: nothing under databricks-skills/.tests/ is currently wired into CI (.github/workflows/ci.yml only lints databricks-tools-core, databricks-mcp-server, .test/src). The tests run via local pytest only. Separate concern, separate PR. Co-authored-by: Isaac --- .../.tests/test_agent_bricks_manager.py | 82 +++++++++++++++++++ .../scripts/mas_manager.py | 58 ++++++++----- .../examples/fm-parallel-calls.py | 2 + .../scripts/compute.py | 7 +- .../examples/5-serving-and-vector-search.py | 6 +- 5 files changed, 131 insertions(+), 24 deletions(-) diff --git a/databricks-skills/.tests/test_agent_bricks_manager.py b/databricks-skills/.tests/test_agent_bricks_manager.py index f9958054..fa8e334d 100644 --- a/databricks-skills/.tests/test_agent_bricks_manager.py +++ b/databricks-skills/.tests/test_agent_bricks_manager.py @@ -26,6 +26,7 @@ list_examples, _build_agent_list, MASManager, + MASNotFound, ) @@ -155,6 +156,87 @@ def test_empty_list_returns_empty(self): assert manager.add_examples_batch("tile-id", []) == [] +class TestResponseErrorHandling: + """Unit tests for typed-404 (MASNotFound) and empty-body handling.""" + + @pytest.fixture + def manager_with_fake_workspace(self): + class _Cfg: + host = "https://example.cloud.databricks.com" + + def authenticate(self): + return {"Authorization": "Bearer x"} + + class _W: + config = _Cfg() + + manager = MASManager.__new__(MASManager) + manager.w = _W() + return manager + + def test_404_raises_MASNotFound_not_generic_exception(self, manager_with_fake_workspace, monkeypatch): + """A 404 response must raise MASNotFound so callers can branch on existence cleanly.""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 404 + fake_resp.content = b'{"message":"tile does not exist"}' + fake_resp.text = '{"message":"tile does not exist"}' + fake_resp.json.return_value = {"message": "tile does not exist"} + monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp) + + with pytest.raises(MASNotFound): + manager_with_fake_workspace._get("/api/2.0/multi-agent-supervisors/missing") + + def test_get_method_returns_None_on_404(self, manager_with_fake_workspace, monkeypatch): + """MASManager.get() should swallow MASNotFound and return None.""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 404 + fake_resp.content = b'{"message":"x"}' + fake_resp.text = '{"message":"x"}' + fake_resp.json.return_value = {"message": "x"} + monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp) + + assert manager_with_fake_workspace.get("missing-tile") is None + + def test_get_method_reraises_non_404(self, manager_with_fake_workspace, monkeypatch): + """Non-404 errors must still propagate from MASManager.get().""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 500 + fake_resp.content = b'{"message":"boom"}' + fake_resp.text = '{"message":"boom"}' + fake_resp.json.return_value = {"message": "boom"} + monkeypatch.setattr(mas_manager_module.requests, "get", lambda *a, **kw: fake_resp) + + with pytest.raises(Exception) as exc_info: + manager_with_fake_workspace.get("any-tile") + assert not isinstance(exc_info.value, MASNotFound) + + def test_post_handles_empty_body(self, manager_with_fake_workspace, monkeypatch): + """_post must tolerate empty success responses (symmetric with _delete).""" + from unittest.mock import MagicMock + + import mas_manager as mas_manager_module + + fake_resp = MagicMock() + fake_resp.status_code = 200 + fake_resp.content = b"" + fake_resp.text = "" + monkeypatch.setattr(mas_manager_module.requests, "post", lambda *a, **kw: fake_resp) + + assert manager_with_fake_workspace._post("/api/test", {"x": 1}) == {} + + class TestDeleteResponseHandling: """Unit tests for MASManager._delete — tolerate empty / non-JSON DELETE bodies.""" diff --git a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py index 8f251961..e7ff8e10 100644 --- a/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py +++ b/databricks-skills/databricks-agent-bricks/scripts/mas_manager.py @@ -65,6 +65,10 @@ class MASIds: name: str +class MASNotFound(Exception): + """Raised when a MAS resource returns HTTP 404.""" + + # ============================================================================ # MAS Manager Class # ============================================================================ @@ -116,13 +120,11 @@ def create( return self._post("/api/2.0/multi-agent-supervisors", payload) def get(self, tile_id: str) -> Optional[Dict[str, Any]]: - """Get MAS by tile_id.""" + """Get MAS by tile_id. Returns None if the tile does not exist.""" try: return self._get(f"/api/2.0/multi-agent-supervisors/{tile_id}") - except Exception as e: - if "does not exist" in str(e).lower() or "not found" in str(e).lower(): - return None - raise + except MASNotFound: + return None def update( self, @@ -251,21 +253,38 @@ def create_example(q: Dict[str, Any]) -> Optional[Dict[str, Any]]: # ======================================================================== def _handle_response_error(self, response: requests.Response, method: str, path: str) -> None: - """Extract detailed error from response and raise.""" - if response.status_code >= 400: - try: - error_data = response.json() - error_msg = error_data.get("message", error_data.get("error", str(error_data))) - raise Exception(f"{method} {path} failed: {error_msg}") - except ValueError: - raise Exception(f"{method} {path} failed with status {response.status_code}: {response.text}") + """Extract detailed error from response and raise. + + Raises MASNotFound on 404 so callers can branch on existence cleanly, + instead of substring-matching error messages. + """ + if response.status_code < 400: + return + try: + error_data = response.json() + error_msg = error_data.get("message", error_data.get("error", str(error_data))) + except ValueError: + error_msg = f"status {response.status_code}: {response.text}" + if response.status_code == 404: + raise MASNotFound(f"{method} {path} not found: {error_msg}") + raise Exception(f"{method} {path} failed: {error_msg}") + + @staticmethod + def _safe_json(response: requests.Response) -> Dict[str, Any]: + """Return parsed JSON, or {} if the body is empty / non-JSON.""" + if not response.content: + return {} + try: + return response.json() + except ValueError: + return {} def _get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: headers = self.w.config.authenticate() url = f"{self.w.config.host}{path}" response = requests.get(url, headers=headers, params=params or {}, timeout=20) self._handle_response_error(response, "GET", path) - return response.json() + return self._safe_json(response) def _post(self, path: str, body: Dict[str, Any], timeout: int = 300) -> Dict[str, Any]: headers = self.w.config.authenticate() @@ -273,7 +292,7 @@ def _post(self, path: str, body: Dict[str, Any], timeout: int = 300) -> Dict[str url = f"{self.w.config.host}{path}" response = requests.post(url, headers=headers, json=body, timeout=timeout) self._handle_response_error(response, "POST", path) - return response.json() + return self._safe_json(response) def _patch(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: headers = self.w.config.authenticate() @@ -281,19 +300,14 @@ def _patch(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: url = f"{self.w.config.host}{path}" response = requests.patch(url, headers=headers, json=body, timeout=20) self._handle_response_error(response, "PATCH", path) - return response.json() + return self._safe_json(response) def _delete(self, path: str) -> Dict[str, Any]: headers = self.w.config.authenticate() url = f"{self.w.config.host}{path}" response = requests.delete(url, headers=headers, timeout=20) self._handle_response_error(response, "DELETE", path) - if not response.content: - return {} - try: - return response.json() - except ValueError: - return {} + return self._safe_json(response) # ============================================================================ diff --git a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py index 41226196..71cd81b6 100644 --- a/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py +++ b/databricks-skills/databricks-apps-python/examples/fm-parallel-calls.py @@ -226,6 +226,8 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]: print(f"Time saved vs serial execution: {time_saved:.2f}s") if total_time > 0: print(f"Speedup: {(total_latency/1000) / total_time:.1f}×") + else: + print("Speedup: N/A (total_time below resolution)") print(f"{'='*60}") diff --git a/databricks-skills/databricks-execution-compute/scripts/compute.py b/databricks-skills/databricks-execution-compute/scripts/compute.py index e81521d4..91e3413c 100644 --- a/databricks-skills/databricks-execution-compute/scripts/compute.py +++ b/databricks-skills/databricks-execution-compute/scripts/compute.py @@ -621,7 +621,12 @@ def cmd_list_compute(args): if resource == "clusters": if cluster_id: - return get_cluster_status(cluster_id) + try: + return get_cluster_status(cluster_id) + except NotFound: + return {"success": False, "cluster_id": cluster_id, "state": "DELETED", "exists": False} + except Exception as e: + return {"success": False, "error": str(e)} if auto_select: try: best = get_best_cluster() diff --git a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py index 70b87116..597aedee 100644 --- a/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py +++ b/databricks-skills/databricks-python-sdk/examples/5-serving-and-vector-search.py @@ -168,7 +168,11 @@ # Query with embedding vector directly -query_vector = [0.1, 0.2, 0.3] # Replace with your real embedding (list of floats matching the index's dimension) +# query_vector must be a list[float] whose length matches your index's +# embedding dimension (e.g. 768 for bge-small, 1024 for bge-large, 1536 for +# text-embedding-3-small / ada-002). The [0.0] * N below is a stand-in; +# replace with the actual vector returned by your embedding model. +query_vector = [0.0] * 768 results = w.vector_search_indexes.query_index( index_name="main.default.my_index", columns=["id", "text"], From 9d06ca049406a64492d26354986ca2753ccad3c5 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Fri, 15 May 2026 14:15:01 +0000 Subject: [PATCH 4/5] ci: wire databricks-skills unit tests + add experimental to triggers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-3 review surfaced that nothing under databricks-skills/.tests/ was being run by CI — which is why the stale add_examples_queued import (collection failure) had gone unnoticed in this branch's parent. Add a skills-unit-tests job that runs the .tests/ tree via uvx with pytest + databricks-sdk + requests, deselecting @pytest.mark.integration (those need a real Databricks workspace). Also add experimental to the pull_request branches list so PRs targeting experimental — like this one — trigger CI at all. Previously CI only fired on PRs to main, leaving experimental-branch work entirely unvalidated. Local trial run: 18 unit tests pass, 3 integration tests correctly deselected, ~25s wall-clock including dependency install. Co-authored-by: Isaac --- .github/workflows/ci.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d88b308..df159d28 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,12 +2,30 @@ name: CI on: pull_request: - branches: [main] + branches: [main, experimental] permissions: contents: read jobs: + skills-unit-tests: + name: databricks-skills unit tests + runs-on: linux-ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 + with: + version: "0.11.4" + + - name: Run databricks-skills unit tests + run: | + uvx --python 3.11 \ + --with pytest \ + --with databricks-sdk \ + --with requests \ + pytest databricks-skills/.tests/ -m "not integration" -v + lint: name: Lint & Format runs-on: linux-ubuntu-latest From 4b0bf260a96cdb98dbb7b8aee21ad2f49144d0f9 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Fri, 15 May 2026 14:33:44 +0000 Subject: [PATCH 5/5] fix: drop dead .tests/test_genie_conversation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The script this targets — databricks-genie/scripts/conversation.py — was removed in 8fd7f31 ("Replace Genie conversation.py script with pure CLI flow"). The test file's `from conversation import ...` has been unimportable ever since, but nothing was running tests in this tree (see ci.yml until two commits ago), so the breakage went unnoticed. With the new skills-unit-tests CI job pointed at databricks-skills/.tests/, this file's collection error halts the whole run before any real test executes. Removing it. Co-authored-by: Isaac --- .../.tests/test_genie_conversation.py | 204 ------------------ 1 file changed, 204 deletions(-) delete mode 100644 databricks-skills/.tests/test_genie_conversation.py diff --git a/databricks-skills/.tests/test_genie_conversation.py b/databricks-skills/.tests/test_genie_conversation.py deleted file mode 100644 index 0ada389f..00000000 --- a/databricks-skills/.tests/test_genie_conversation.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -Integration tests for databricks-genie/scripts/conversation.py - -Tests the Genie Conversation API CLI interface. -Requires databricks.sdk for Genie Space operations. -""" - -import json -import os -import sys -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -# Add the skills directory to the path -SKILLS_DIR = Path(__file__).parent.parent -sys.path.insert(0, str(SKILLS_DIR / "databricks-genie")) - -from conversation import ask_genie, _print_json - - -class TestAskGenieFunction: - """Tests for the ask_genie function structure and error handling.""" - - def test_ask_genie_returns_dict(self): - """Should return a dictionary result.""" - # Test with a mock to verify return structure - with patch("conversation.WorkspaceClient") as mock_client: - # Setup mock - mock_response = MagicMock() - mock_response.conversation_id = "conv-123" - mock_response.message_id = "msg-456" - - mock_message = MagicMock() - mock_message.status = MagicMock() - mock_message.status.value = "COMPLETED" - mock_message.attachments = [] - mock_message.query_result = None - - mock_instance = mock_client.return_value - mock_instance.genie.start_conversation_and_wait.return_value = mock_response - mock_instance.genie.get_message.return_value = mock_message - - result = ask_genie( - space_id="test-space", - question="Test question", - timeout_seconds=5, - ) - - assert isinstance(result, dict) - assert "question" in result - assert "conversation_id" in result - assert "message_id" in result - assert "status" in result - - def test_ask_genie_with_conversation_id(self): - """Should pass conversation_id for follow-up questions.""" - with patch("conversation.WorkspaceClient") as mock_client: - mock_response = MagicMock() - mock_response.conversation_id = "conv-123" - mock_response.message_id = "msg-456" - - mock_message = MagicMock() - mock_message.status = MagicMock() - mock_message.status.value = "COMPLETED" - mock_message.attachments = [] - mock_message.query_result = None - - mock_instance = mock_client.return_value - mock_instance.genie.start_conversation_and_wait.return_value = mock_response - mock_instance.genie.get_message.return_value = mock_message - - result = ask_genie( - space_id="test-space", - question="Follow-up question", - conversation_id="existing-conv-id", - timeout_seconds=5, - ) - - # Verify the conversation_id was passed - call_args = mock_instance.genie.start_conversation_and_wait.call_args - assert call_args.kwargs.get("conversation_id") == "existing-conv-id" - - def test_ask_genie_handles_timeout(self): - """Should return timeout status when query exceeds timeout.""" - with patch("conversation.WorkspaceClient") as mock_client: - mock_response = MagicMock() - mock_response.conversation_id = "conv-123" - mock_response.message_id = "msg-456" - - mock_message = MagicMock() - mock_message.status = MagicMock() - mock_message.status.value = "EXECUTING_QUERY" # Never completes - mock_message.attachments = [] - - mock_instance = mock_client.return_value - mock_instance.genie.start_conversation_and_wait.return_value = mock_response - mock_instance.genie.get_message.return_value = mock_message - - # Very short timeout to trigger timeout path - result = ask_genie( - space_id="test-space", - question="Test question", - timeout_seconds=0.1, # Will timeout immediately - ) - - assert result["status"] == "TIMEOUT" - assert "error" in result - - def test_ask_genie_handles_failure(self): - """Should return failure status when query fails.""" - with patch("conversation.WorkspaceClient") as mock_client: - mock_response = MagicMock() - mock_response.conversation_id = "conv-123" - mock_response.message_id = "msg-456" - - mock_message = MagicMock() - mock_message.status = MagicMock() - mock_message.status.value = "FAILED" - mock_message.attachments = [] - - mock_instance = mock_client.return_value - mock_instance.genie.start_conversation_and_wait.return_value = mock_response - mock_instance.genie.get_message.return_value = mock_message - - result = ask_genie( - space_id="test-space", - question="Test question", - timeout_seconds=5, - ) - - assert result["status"] == "FAILED" - - -class TestPrintJson: - """Tests for the _print_json helper function.""" - - def test_print_json_dict(self, capsys): - """Should print dict as formatted JSON.""" - _print_json({"key": "value", "number": 42}) - captured = capsys.readouterr() - assert '"key": "value"' in captured.out - assert '"number": 42' in captured.out - - def test_print_json_list(self, capsys): - """Should print list as formatted JSON.""" - _print_json([1, 2, 3]) - captured = capsys.readouterr() - assert "1" in captured.out - assert "2" in captured.out - assert "3" in captured.out - - -@pytest.mark.integration -class TestGenieConversationIntegration: - """Integration tests for Genie Conversation API. - - Note: These tests require a Databricks workspace with Genie enabled - and a valid Genie Space ID configured via environment variable. - """ - - @pytest.fixture - def genie_space_id(self): - """Get Genie Space ID from environment.""" - space_id = os.environ.get("TEST_GENIE_SPACE_ID") - if not space_id: - pytest.skip("TEST_GENIE_SPACE_ID not set - skipping Genie integration tests") - return space_id - - def test_ask_genie_simple_question(self, workspace_client, genie_space_id): - """Should be able to ask a simple question to Genie.""" - result = ask_genie( - space_id=genie_space_id, - question="How many rows are in the table?", - timeout_seconds=120, - ) - - # Should return a valid result - assert result["conversation_id"] is not None - assert result["status"] in ["COMPLETED", "FAILED", "TIMEOUT"] - - def test_ask_genie_follow_up(self, workspace_client, genie_space_id): - """Should be able to ask follow-up questions.""" - # First question - result1 = ask_genie( - space_id=genie_space_id, - question="Show me the first 5 rows", - timeout_seconds=120, - ) - - if result1["status"] != "COMPLETED": - pytest.skip("First query did not complete - skipping follow-up test") - - # Follow-up question - result2 = ask_genie( - space_id=genie_space_id, - question="Now show me the count", - conversation_id=result1["conversation_id"], - timeout_seconds=120, - ) - - # Should use same conversation - assert result2["conversation_id"] == result1["conversation_id"]