Skip to content

Commit 31ff30a

Browse files
committed
feat:Bigquery ADK support for search catalog tool
1 parent 5b7c8c0 commit 31ff30a

9 files changed

Lines changed: 549 additions & 15 deletions

File tree

contributing/samples/bigquery/README.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
2424
5. `get_job_info`
2525
Fetches metadata about a BigQuery job.
2626

27-
5. `execute_sql`
27+
6. `execute_sql`
2828

2929
Runs or dry-runs a SQL query in BigQuery.
3030

31-
6. `ask_data_insights`
31+
7. `ask_data_insights`
3232

3333
Natural language-in, natural language-out tool that answers questions
3434
about structured data in BigQuery. Provides a one-stop solution for generating
@@ -38,23 +38,26 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
3838
the official [Conversational Analytics API documentation](https://cloud.google.com/gemini/docs/conversational-analytics-api/overview)
3939
for instructions.
4040

41-
7. `forecast`
41+
8. `forecast`
4242

4343
Perform time series forecasting using BigQuery's `AI.FORECAST` function,
4444
leveraging the TimesFM 2.0 model.
4545

46-
8. `analyze_contribution`
46+
9. `analyze_contribution`
4747

4848
Perform contribution analysis in BigQuery by creating a temporary
4949
`CONTRIBUTION_ANALYSIS` model and then querying it with
5050
`ML.GET_INSIGHTS` to find top contributors for a given metric.
5151

52-
9. `detect_anomalies`
52+
10. `detect_anomalies`
5353

5454
Perform time series anomaly detection in BigQuery by creating a temporary
5555
`ARIMA_PLUS` model and then querying it with
5656
`ML.DETECT_ANOMALIES` to detect time series data anomalies.
5757

58+
11. `search_catalog`
59+
Searches for data entries across projects using the Dataplex Catalog. This allows discovery of datasets, tables, and other assets.
60+
5861
## How to use
5962

6063
Set up environment variables in your `.env` file for using

src/google/adk/tools/bigquery/bigquery_credentials.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
from .._google_credentials import BaseGoogleCredentialsConfig
2020

2121
BIGQUERY_TOKEN_CACHE_KEY = "bigquery_token_cache"
22-
BIGQUERY_DEFAULT_SCOPE = ["https://www.googleapis.com/auth/bigquery"]
23-
22+
BIGQUERY_SCOPES = [
23+
"https://www.googleapis.com/auth/bigquery",
24+
"https://www.googleapis.com/auth/cloud-platform",
25+
]
2426

2527
@experimental(FeatureName.GOOGLE_CREDENTIALS_CONFIG)
2628
class BigQueryCredentialsConfig(BaseGoogleCredentialsConfig):
@@ -34,8 +36,7 @@ def __post_init__(self) -> BigQueryCredentialsConfig:
3436
super().__post_init__()
3537

3638
if not self.scopes:
37-
self.scopes = BIGQUERY_DEFAULT_SCOPE
38-
39+
self.scopes = BIGQUERY_SCOPES
3940
# Set the token cache key
4041
self._token_cache_key = BIGQUERY_TOKEN_CACHE_KEY
4142

src/google/adk/tools/bigquery/bigquery_toolset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from . import data_insights_tool
2525
from . import metadata_tool
2626
from . import query_tool
27+
from . import search_tool
2728
from ...features import experimental
2829
from ...features import FeatureName
2930
from ...tools.base_tool import BaseTool
@@ -87,6 +88,7 @@ async def get_tools(
8788
query_tool.analyze_contribution,
8889
query_tool.detect_anomalies,
8990
data_insights_tool.ask_data_insights,
91+
search_tool.search_catalog,
9092
]
9193
]
9294

src/google/adk/tools/bigquery/client.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919
import google.api_core.client_info
2020
from google.auth.credentials import Credentials
2121
from google.cloud import bigquery
22+
from google.cloud import dataplex_v1
23+
from google.api_core.gapic_v1 import client_info as gapic_client_info
2224

2325
from ... import version
2426

25-
USER_AGENT = f"adk-bigquery-tool google-adk/{version.__version__}"
27+
USER_AGENT_BASE = f"google-adk/{version.__version__}"
28+
BQ_USER_AGENT = f"adk-bigquery-tool {USER_AGENT_BASE}"
29+
DP_USER_AGENT = f"adk-dataplex-tool {USER_AGENT_BASE}"
2630

2731

2832
from typing import List
@@ -48,7 +52,7 @@ def get_bigquery_client(
4852
A BigQuery client.
4953
"""
5054

51-
user_agents = [USER_AGENT]
55+
user_agents = [BQ_USER_AGENT]
5256
if user_agent:
5357
if isinstance(user_agent, str):
5458
user_agents.append(user_agent)
@@ -67,3 +71,36 @@ def get_bigquery_client(
6771
)
6872

6973
return bigquery_client
74+
75+
def get_dataplex_catalog_client(
76+
*,
77+
credentials: Credentials,
78+
user_agent: Optional[Union[str, List[str]]] = None,
79+
) -> dataplex_v1.CatalogServiceClient:
80+
"""Get a Dataplex CatalogServiceClient with minimal necessary arguments.
81+
82+
Args:
83+
credentials: The credentials to use for the request.
84+
user_agent: Additional user agent string(s) to append.
85+
86+
Returns:
87+
A Dataplex Client.
88+
"""
89+
90+
user_agents = [DP_USER_AGENT]
91+
if user_agent:
92+
if isinstance(user_agent, str):
93+
user_agents.append(user_agent)
94+
else:
95+
user_agents.extend([ua for ua in user_agent if ua])
96+
97+
client_info = gapic_client_info.ClientInfo(
98+
user_agent=" ".join(user_agents)
99+
)
100+
101+
dataplex_client = dataplex_v1.CatalogServiceClient(
102+
credentials=credentials,
103+
client_info=client_info,
104+
)
105+
106+
return dataplex_client
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
from typing import Any, Dict, List, Optional
19+
20+
from google.api_core import exceptions as api_exceptions
21+
from google.auth.credentials import Credentials
22+
from google.cloud import dataplex_v1
23+
24+
from . import client
25+
from .config import BigQueryToolConfig
26+
27+
def _construct_search_query_helper(predicate: str, operator: str, items: List[str]) -> str:
28+
if not items:
29+
return ""
30+
if len(items) == 1:
31+
return f'{predicate}{operator}"{items[0]}"'
32+
33+
clauses = [f'{predicate}{operator}"{item}"' for item in items]
34+
return "(" + " OR ".join(clauses) + ")"
35+
36+
def search_catalog(
37+
prompt: str,
38+
project_id: str,
39+
credentials: Credentials,
40+
settings: BigQueryToolConfig,
41+
location: str,
42+
page_size: int = 10,
43+
project_ids_filter: Optional[List[str]] = None,
44+
dataset_ids_filter: Optional[List[str]] = None,
45+
types_filter: Optional[List[str]] = None,
46+
) -> Dict[str, Any]:
47+
"""Search for BigQuery assets within Dataplex.
48+
49+
Args:
50+
prompt (str): The base search query (natural language or keywords).
51+
project_id (str): The Google Cloud project ID to scope the search.
52+
credentials (Credentials): Credentials for the request.
53+
settings (BigQueryToolConfig): BigQuery tool settings.
54+
location (str): The Dataplex location to use.
55+
page_size (int): Maximum number of results.
56+
project_ids_filter (Optional[List[str]]): Specific project IDs to include in the search results.
57+
If None, defaults to the scoping project_id.
58+
dataset_ids_filter (Optional[List[str]]): BigQuery dataset IDs to filter by.
59+
types_filter (Optional[List[str]]): Entry types to filter by (e.g., "TABLE", "DATASET").
60+
61+
Returns:
62+
dict: Search results or error.
63+
"""
64+
try:
65+
if not project_id:
66+
return {"status": "ERROR", "error_details": "project_id must be provided."}
67+
68+
dataplex_client = client.get_dataplex_catalog_client(
69+
credentials=credentials,
70+
user_agent=[settings.application_name, "search_catalog"],
71+
)
72+
73+
query_parts = []
74+
if prompt:
75+
query_parts.append(f"({prompt})")
76+
77+
# Filter by project IDs
78+
projects_to_filter = project_ids_filter if project_ids_filter else [project_id]
79+
if projects_to_filter:
80+
query_parts.append(_construct_search_query_helper("projectid", "=", projects_to_filter))
81+
82+
# Filter by dataset IDs
83+
if dataset_ids_filter:
84+
dataset_resource_filters = [f'linked_resource:"//bigquery.googleapis.com/projects/{pid}/datasets/{did}/*"' for pid in projects_to_filter for did in dataset_ids_filter]
85+
if dataset_resource_filters:
86+
query_parts.append(f"({' OR '.join(dataset_resource_filters)})")
87+
# Filter by entry types
88+
if types_filter:
89+
query_parts.append(_construct_search_query_helper("type", "=", types_filter))
90+
91+
# Always scope to BigQuery system
92+
query_parts.append("system=BIGQUERY")
93+
94+
full_query = " AND ".join(filter(None, query_parts))
95+
96+
search_scope = f"projects/{project_id}/locations/{location}"
97+
98+
request = dataplex_v1.SearchEntriesRequest(
99+
name=search_scope,
100+
query=full_query,
101+
page_size=page_size,
102+
semantic_search=True,
103+
)
104+
105+
response = dataplex_client.search_entries(request=request)
106+
107+
results = []
108+
for result in response.results:
109+
entry = result.dataplex_entry
110+
source = entry.entry_source
111+
results.append(
112+
{
113+
"name": entry.name,
114+
"display_name": source.display_name or "",
115+
"entry_type": entry.entry_type,
116+
"update_time": str(entry.update_time),
117+
"linked_resource": source.resource or "",
118+
"description": source.description or "",
119+
"location": source.location or "",
120+
}
121+
)
122+
return {"status": "SUCCESS", "results": results}
123+
124+
except api_exceptions.GoogleAPICallError as e:
125+
logging.exception("search_catalog tool: API call failed")
126+
return {"status": "ERROR", "error_details": f"Dataplex API Error: {str(e)}"}
127+
except Exception as ex:
128+
logging.exception("search_catalog tool: Unexpected error")
129+
return {"status": "ERROR", "error_details": str(ex)}
130+

tests/unittests/tools/bigquery/test_bigquery_credentials.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def test_valid_credentials_object_auth_credentials(self):
4646
assert config.credentials == auth_creds
4747
assert config.client_id is None
4848
assert config.client_secret is None
49-
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
49+
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform"]
5050

5151
def test_valid_credentials_object_oauth2_credentials(self):
5252
"""Test that providing valid Credentials object works correctly with
@@ -86,7 +86,7 @@ def test_valid_client_id_secret_pair_default_scope(self):
8686
assert config.credentials is None
8787
assert config.client_id == "test_client_id"
8888
assert config.client_secret == "test_client_secret"
89-
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
89+
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform",]
9090

9191
def test_valid_client_id_secret_pair_w_scope(self):
9292
"""Test that providing client ID and secret with explicit scopes works.
@@ -128,7 +128,7 @@ def test_valid_client_id_secret_pair_w_empty_scope(self):
128128
assert config.credentials is None
129129
assert config.client_id == "test_client_id"
130130
assert config.client_secret == "test_client_secret"
131-
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
131+
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform"]
132132

133133
def test_missing_client_secret_raises_error(self):
134134
"""Test that missing client secret raises appropriate validation error.

0 commit comments

Comments
 (0)