From 723d690f19096974705887416e9744bb23862857 Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Mon, 30 Mar 2026 10:18:42 +0200 Subject: [PATCH 1/6] Refact: Use ProjectLayout directly in the McpContext --- src/databao_cli/commands/mcp.py | 9 +++++---- src/databao_cli/features/mcp/server.py | 8 ++++---- .../features/mcp/tools/databao_ask/agent_factory.py | 6 +----- src/databao_cli/features/mcp/tools/databao_ask/tool.py | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/databao_cli/commands/mcp.py b/src/databao_cli/commands/mcp.py index e06a96a1..8c7d2a13 100644 --- a/src/databao_cli/commands/mcp.py +++ b/src/databao_cli/commands/mcp.py @@ -2,7 +2,7 @@ import click -from databao_cli.shared.cli_utils import handle_feature_errors +from databao_cli.shared.cli_utils import get_project_or_raise, handle_feature_errors @click.command() @@ -43,8 +43,9 @@ def mcp(ctx: click.Context, transport: str, host: str, port: int) -> None: """ from databao_cli.features.mcp.server import mcp_impl from databao_cli.shared.log.logging import configure_logging - from databao_cli.shared.project.layout import find_project + + project_layout = get_project_or_raise(ctx.obj["project_dir"]) if transport == "stdio": - configure_logging(find_project(ctx.obj["project_dir"]), quiet=True) - mcp_impl(ctx.obj["project_dir"], transport, host, port) + configure_logging(project_layout, quiet=True) + mcp_impl(project_layout, transport, host, port) diff --git a/src/databao_cli/features/mcp/server.py b/src/databao_cli/features/mcp/server.py index 83668dc1..8d0133c0 100644 --- a/src/databao_cli/features/mcp/server.py +++ b/src/databao_cli/features/mcp/server.py @@ -1,18 +1,18 @@ """Databao MCP server setup and tool registration.""" from dataclasses import dataclass -from pathlib import Path from fastmcp import FastMCP from databao_cli.features.mcp.tools import databao_ask +from databao_cli.shared.project.layout import ProjectLayout @dataclass(frozen=True) class McpContext: """Shared context available to all MCP tools.""" - project_dir: Path + project_layout: ProjectLayout def create_server(context: McpContext) -> FastMCP: @@ -40,6 +40,6 @@ def run_server( raise ValueError(f"Unknown transport: {transport!r}. Supported: stdio, sse") -def mcp_impl(project_dir: Path, transport: str, host: str, port: int) -> None: - context = McpContext(project_dir=project_dir) +def mcp_impl(project_layout: ProjectLayout, transport: str, host: str, port: int) -> None: + context = McpContext(project_layout=project_layout) run_server(context, transport=transport, host=host, port=port) diff --git a/src/databao_cli/features/mcp/tools/databao_ask/agent_factory.py b/src/databao_cli/features/mcp/tools/databao_ask/agent_factory.py index 8e59ba97..cad1a265 100644 --- a/src/databao_cli/features/mcp/tools/databao_ask/agent_factory.py +++ b/src/databao_cli/features/mcp/tools/databao_ask/agent_factory.py @@ -1,7 +1,5 @@ """Agent creation logic for the databao_ask tool.""" -from pathlib import Path - from databao.agent import Agent from databao.agent import domain as create_domain from databao.agent.api import agent as create_agent @@ -13,7 +11,7 @@ def create_agent_for_tool( - project_dir: Path, + project: ProjectLayout, model: str | None = None, temperature: float = 0.0, executor: str = "claude_code", @@ -23,8 +21,6 @@ def create_agent_for_tool( Raises ValueError if the project is not ready (no datasources, no build). """ - project = ProjectLayout(project_dir) - status = databao_project_status(project) if status == DatabaoProjectStatus.NOT_INITIALIZED: raise ValueError("Databao project is not initialized. Run 'databao init' first.") diff --git a/src/databao_cli/features/mcp/tools/databao_ask/tool.py b/src/databao_cli/features/mcp/tools/databao_ask/tool.py index 8d76c496..e9f4d5b5 100644 --- a/src/databao_cli/features/mcp/tools/databao_ask/tool.py +++ b/src/databao_cli/features/mcp/tools/databao_ask/tool.py @@ -99,7 +99,7 @@ def databao_ask( try: agent = create_agent_for_tool( - project_dir=context.project_dir, + project=context.project_layout, model=model, temperature=temperature, executor=executor, From 6ef5a5107e259aeca113822e3aea83fc9ba13d0c Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Mon, 30 Mar 2026 12:18:57 +0200 Subject: [PATCH 2/6] Add MCP tools to explore context generated for a database in the Databao project --- pyproject.toml | 2 +- src/databao_cli/features/mcp/server.py | 2 + .../mcp/tools/database_context/__init__.py | 3 + .../mcp/tools/database_context/tools.py | 98 +++++++++++++++++++ uv.lock | 9 +- 5 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 src/databao_cli/features/mcp/tools/database_context/__init__.py create mode 100644 src/databao_cli/features/mcp/tools/database_context/tools.py diff --git a/pyproject.toml b/pyproject.toml index 8c968be8..7572dfe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "click>=8.2.1,<9.0.0", - "databao-context-engine[snowflake]~=0.7.0", + "databao-context-engine[snowflake]~=0.7.1.dev1", "prettytable>=3.10.0", "databao-agent~=0.2.1.dev1", "streamlit[snowflake]>=1.53.0", diff --git a/src/databao_cli/features/mcp/server.py b/src/databao_cli/features/mcp/server.py index 8d0133c0..b811c252 100644 --- a/src/databao_cli/features/mcp/server.py +++ b/src/databao_cli/features/mcp/server.py @@ -5,6 +5,7 @@ from fastmcp import FastMCP from databao_cli.features.mcp.tools import databao_ask +from databao_cli.features.mcp.tools import database_context as database_context_tools from databao_cli.shared.project.layout import ProjectLayout @@ -19,6 +20,7 @@ def create_server(context: McpContext) -> FastMCP: """Create a FastMCP server with all registered tools.""" mcp = FastMCP("databao") databao_ask.register(mcp, context) + database_context_tools.register(mcp, context) return mcp diff --git a/src/databao_cli/features/mcp/tools/database_context/__init__.py b/src/databao_cli/features/mcp/tools/database_context/__init__.py new file mode 100644 index 00000000..e61110a6 --- /dev/null +++ b/src/databao_cli/features/mcp/tools/database_context/__init__.py @@ -0,0 +1,3 @@ +from databao_cli.features.mcp.tools.database_context.tools import register + +__all__ = ["register"] diff --git a/src/databao_cli/features/mcp/tools/database_context/tools.py b/src/databao_cli/features/mcp/tools/database_context/tools.py new file mode 100644 index 00000000..5f93dde9 --- /dev/null +++ b/src/databao_cli/features/mcp/tools/database_context/tools.py @@ -0,0 +1,98 @@ +from typing import TYPE_CHECKING + +from databao_context_engine import DatabaoContextEngine, DatabaseSchemaLite, DatabaseTableDetails, DatasourceId +from mcp.types import ToolAnnotations +from pydantic import BaseModel, TypeAdapter + +if TYPE_CHECKING: + from fastmcp import FastMCP + + from databao_cli.features.mcp.server import McpContext + + +class DatasourceResult(BaseModel): + id: str + name: str + type: str + + +class ListDatasourceResult(BaseModel): + datasources: list[DatasourceResult] + + +class ListDatabaseSchemaResult(BaseModel): + schemas: list[DatabaseSchemaLite] + + +def register(mcp: "FastMCP", context: "McpContext") -> None: + """Register the database context tools with the MCP server.""" + + @mcp.tool( + annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False), + ) + def list_database_datasources() -> ListDatasourceResult: + """List all configured datasources that support database metadata tools and SQL execution. + + Use this to narrow datasource selection before browsing schemas, inspecting table metadata, + or running SQL. + """ + context_engine = DatabaoContextEngine(domain_dir=context.project_layout.root_domain_dir) + + datasources = context_engine.list_database_datasources() + + return ListDatasourceResult( + datasources=[ + DatasourceResult( + id=str(ds.id), + name=ds.id.name, + type=ds.type.full_type, + ) + for ds in datasources + ] + ) + + @mcp.tool( + annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False), + ) + def list_database_schemas(datasource_id: str) -> ListDatabaseSchemaResult: + """List all catalogs, schemas and tables for a database-capable datasource. + + The returned list will only contain the name and description of the schemas and tables. + This allows to find tables related to your query and then query the full details + using the "get_database_table_details" tool + """ + context_engine = DatabaoContextEngine(domain_dir=context.project_layout.root_domain_dir) + + ds = DatasourceId.from_string_repr(datasource_id) + return ListDatabaseSchemaResult( + schemas=context_engine.list_database_schemas_and_tables(ds), + ) + + @mcp.tool( + annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False), + ) + def get_database_table_details(datasource_id: str, catalog: str, schema: str, table: str) -> DatabaseTableDetails: + """Get the full built metadata for one specific table in a database-capable datasource. + + Requires an exact datasource_id, catalog, schema, and table name. + + Use this when you already know which table you want and need detailed schema information + such as columns, types, keys, indexes, samples, or profiling data to help write or validate SQL. + """ + context_engine = DatabaoContextEngine(domain_dir=context.project_layout.root_domain_dir) + + ds = DatasourceId.from_string_repr(datasource_id) + # We return a dumped python object so that we can exclude None values in the resulting JSON + # Without this manual dump, FastMCP includes them by default and we can't configure it + # The type hint of the function still declares DatabaseTableDetails because a JSON Schema is automatically + # created from it by FastMCP + # See https://github.com/PrefectHQ/fastmcp/issues/1090 + return TypeAdapter(DatabaseTableDetails).dump_python( # type: ignore[no-any-return] + context_engine.get_database_table_details( + datasource_id=ds, + catalog_name=catalog, + schema_name=schema, + table_name=table, + ), + exclude_none=True, + ) diff --git a/uv.lock b/uv.lock index 7876c79c..4812d625 100644 --- a/uv.lock +++ b/uv.lock @@ -1165,7 +1165,7 @@ requires-dist = [ { name = "databao-context-engine", extras = ["clickhouse"], marker = "extra == 'clickhouse'" }, { name = "databao-context-engine", extras = ["mssql"], marker = "extra == 'mssql'" }, { name = "databao-context-engine", extras = ["pdf"], marker = "extra == 'pdf'" }, - { name = "databao-context-engine", extras = ["snowflake"], specifier = "~=0.7.0" }, + { name = "databao-context-engine", extras = ["snowflake"], specifier = "~=0.7.1.dev1" }, { name = "fastmcp", specifier = ">=2.0.0" }, { name = "nh3", specifier = ">=0.2.15" }, { name = "prettytable", specifier = ">=3.10.0" }, @@ -1232,10 +1232,9 @@ wheels = [ [[package]] name = "databao-context-engine" -version = "0.7.0" +version = "0.7.1.dev1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "click" }, { name = "duckdb" }, { name = "jinja2" }, { name = "mcp" }, @@ -1246,9 +1245,9 @@ dependencies = [ { name = "sqlparse" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/72/040aa0c0c1fa2302bc350ae0dc18cd82dadad9bcab1b23d2bb2b0491b6a8/databao_context_engine-0.7.0.tar.gz", hash = "sha256:447e7c1cf6bbe899a125296e2362838cc7e5bd4770cb1d9bf682c9c12bac20aa", size = 124182, upload-time = "2026-03-18T09:54:43.204Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/9c/6e11b09afbad26edf9326a53ffd4d384e86825a3f7bfc5eab36b252a61c0/databao_context_engine-0.7.1.dev1.tar.gz", hash = "sha256:dd58cbd284990f334c7c768b5ecb25ac63bd8cafae1022082c7e5c4d4847e771", size = 132709, upload-time = "2026-03-30T07:43:43.413Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/fd/a271eac8b1486f41a4d40b89aa7212f8f1581b9c97b00e9b60b35ca5a32c/databao_context_engine-0.7.0-py3-none-any.whl", hash = "sha256:d4521c63980e87a9906e890424244e672a0bdc953d13c9e484152dd2a8612fd5", size = 195832, upload-time = "2026-03-18T09:54:42.059Z" }, + { url = "https://files.pythonhosted.org/packages/8f/07/429773157c29576b347672a9433d3fa35482bd201827c6c6d764e9d25c87/databao_context_engine-0.7.1.dev1-py3-none-any.whl", hash = "sha256:cbe2ded07c733c2da509a23810f27e9535ee9763d1be059354e77a4f8df24bd5", size = 210150, upload-time = "2026-03-30T07:43:41.92Z" }, ] [package.optional-dependencies] From c3170c80de5dcbc7261c036f72b5247164097385 Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Mon, 30 Mar 2026 12:20:15 +0200 Subject: [PATCH 3/6] Do not show the banner asking to upgrade the FastMCP version when running the MCP server --- src/databao_cli/features/mcp/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databao_cli/features/mcp/server.py b/src/databao_cli/features/mcp/server.py index b811c252..8d5e94b2 100644 --- a/src/databao_cli/features/mcp/server.py +++ b/src/databao_cli/features/mcp/server.py @@ -35,9 +35,9 @@ def run_server( match transport: case "stdio": - mcp.run(transport="stdio") + mcp.run(transport="stdio", show_banner=False) case "sse": - mcp.run(transport="sse", host=host, port=port) + mcp.run(transport="sse", host=host, port=port, show_banner=False) case _: raise ValueError(f"Unknown transport: {transport!r}. Supported: stdio, sse") From ef8532f14b57bcbd7a13bbbd0f9bf1972d7052d6 Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Mon, 30 Mar 2026 14:11:38 +0200 Subject: [PATCH 4/6] Remove mentions of running SQL from the tools' descriptions since it isn't available in this MCP server --- src/databao_cli/features/mcp/tools/database_context/tools.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/databao_cli/features/mcp/tools/database_context/tools.py b/src/databao_cli/features/mcp/tools/database_context/tools.py index 5f93dde9..9a8ed736 100644 --- a/src/databao_cli/features/mcp/tools/database_context/tools.py +++ b/src/databao_cli/features/mcp/tools/database_context/tools.py @@ -31,10 +31,9 @@ def register(mcp: "FastMCP", context: "McpContext") -> None: annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False), ) def list_database_datasources() -> ListDatasourceResult: - """List all configured datasources that support database metadata tools and SQL execution. + """List all configured datasources that support database metadata tools. - Use this to narrow datasource selection before browsing schemas, inspecting table metadata, - or running SQL. + Use this to narrow datasource selection before browsing schemas or inspecting table metadata. """ context_engine = DatabaoContextEngine(domain_dir=context.project_layout.root_domain_dir) From 18705db375af21a4c8de1497c016f09a917815df Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Tue, 31 Mar 2026 09:21:21 +0200 Subject: [PATCH 5/6] Update end2end expected output contexts --- .../resources/bigquery_introspections.yaml | 383 --------------- .../resources/duckdb_introspections.yaml | 148 +----- .../tests/resources/mysql_introspections.yaml | 176 +------ .../resources/postgres_introspections.yaml | 157 +----- ...res_partitioned_tables_introspections.yaml | 220 +-------- .../resources/snowflake_introspections.yaml | 456 +----------------- .../resources/sqlite_introspections.yaml | 89 +--- 7 files changed, 31 insertions(+), 1598 deletions(-) diff --git a/e2e-tests/tests/resources/bigquery_introspections.yaml b/e2e-tests/tests/resources/bigquery_introspections.yaml index 98434580..6665e00b 100644 --- a/e2e-tests/tests/resources/bigquery_introspections.yaml +++ b/e2e-tests/tests/resources/bigquery_introspections.yaml @@ -1,7 +1,6 @@ # ===== my_test_bigquery.yaml ===== datasource_id: my_test_bigquery.yaml datasource_type: bigquery -context_built_at: 2026-03-06 16:30:20.504412 context: catalogs: - name: datalore-internal @@ -13,115 +12,59 @@ context: - name: Region type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Country type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Item_Type type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Sales_Channel type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_Priority type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_Date type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_ID type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Ship_Date type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Units_Sold type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Unit_Price type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Unit_Cost type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Revenue type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Cost type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Profit type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - Region: Asia Country: Singapore @@ -193,291 +136,119 @@ context: Total_Revenue: 1183401.45 Total_Cost: 1022955.45 Total_Profit: 160446.0 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: dl13444 columns: - name: Region type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Country type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Item_Type type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Sales_Channel type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_Priority type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_Date type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Order_ID type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Ship_Date type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Units_Sold type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Unit_Price type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Unit_Cost type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Revenue type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Cost type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Total_Profit type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - Region: Asia Country: Uzbekistan - Item_Type: null - Sales_Channel: null - Order_Priority: null - Order_Date: null - Order_ID: null - Ship_Date: null - Units_Sold: null - Unit_Price: null - Unit_Cost: null - Total_Revenue: null - Total_Cost: null - Total_Profit: null - Region: Asia Country: Bangladesh - Item_Type: null - Sales_Channel: null - Order_Priority: null - Order_Date: null - Order_ID: null - Ship_Date: null - Units_Sold: null - Unit_Price: null - Unit_Cost: null - Total_Revenue: null - Total_Cost: null - Total_Profit: null - Region: Asia Country: Myanmar - Item_Type: null - Sales_Channel: null - Order_Priority: null - Order_Date: null - Order_ID: null - Ship_Date: null - Units_Sold: null - Unit_Price: null - Unit_Cost: null - Total_Revenue: null - Total_Cost: null - Total_Profit: null - Region: Asia Country: South Korea - Item_Type: null - Sales_Channel: null - Order_Priority: null - Order_Date: null - Order_ID: null - Ship_Date: null - Units_Sold: null - Unit_Price: null - Unit_Cost: null - Total_Revenue: null - Total_Cost: null - Total_Profit: null - Region: Asia Country: Bangladesh - Item_Type: null - Sales_Channel: null - Order_Priority: null - Order_Date: null - Order_ID: null - Ship_Date: null - Units_Sold: null - Unit_Price: null - Unit_Cost: null - Total_Revenue: null - Total_Cost: null - Total_Profit: null - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: effects_of_covid_on_trade columns: - name: Direction type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Year type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Date type: DATE nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Weekday type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Country type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Commodity type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Transport_Mode type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Measure type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Value type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: Cumulative type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - Direction: Exports Year: 2015 @@ -529,41 +300,21 @@ context: Measure: $ Value: 125000000 Cumulative: 3423000000 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: new_table columns: - name: column1 type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column2 type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column3 type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - column1: 61 column2: A @@ -580,41 +331,21 @@ context: - column1: 73 column2: A column3: 0.3975720210875223 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: stupin_DL_13554 columns: - name: column1 type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column2 type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column3 type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - column1: 15 column2: D @@ -631,121 +362,61 @@ context: - column1: 15 column2: D column3: 0.18182496720710062 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: stupin_corrupted_df_export columns: - name: code type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: customer_type type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: country type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: via_reseller type: BOOL nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: subscription_type type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: email_campaign type: STRING(1000) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: program_name type: STRING(1000) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: recipient_action_datetime type: DATETIME nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: placed_date type: DATE nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: purchased_product type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: license_type type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: license_id type: STRING(100) nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: price type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - code: 18636715 customer_type: Personal @@ -812,41 +483,21 @@ context: license_type: New license_id: YYSQU9RKJY price: 100.0 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: test_stgn columns: - name: column1 type: INT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column2 type: STRING nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: column3 type: FLOAT64 nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - column1: 20 column2: A @@ -863,49 +514,25 @@ context: - column1: 56 column2: A column3: 0.06234136251124145 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - name: timestamp_table columns: - name: d type: DATE nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: t type: TIME nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: dt type: DATETIME nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null - name: ts type: TIMESTAMP nullable: true - description: null default_expression: 'NULL' - generated: null - checks: [] - stats: null samples: - d: 2024-01-01 t: 00:00:00 @@ -927,14 +554,4 @@ context: t: 06:00:00 dt: 1600-02-29 06:00:00 ts: 1600-02-29 05:06:32+00:00 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - description: null - description: null diff --git a/e2e-tests/tests/resources/duckdb_introspections.yaml b/e2e-tests/tests/resources/duckdb_introspections.yaml index 23a95bdf..553d6c3d 100644 --- a/e2e-tests/tests/resources/duckdb_introspections.yaml +++ b/e2e-tests/tests/resources/duckdb_introspections.yaml @@ -1,7 +1,6 @@ # ===== test_duckdb_conn.yaml ===== datasource_id: test_duckdb_conn.yaml datasource_type: duckdb -context_built_at: 2026-02-27 16:38:36.999625 context: catalogs: - name: test_db @@ -13,206 +12,90 @@ context: - name: id type: INTEGER nullable: false - description: null - default_expression: null - generated: null - checks: [] - name: tinyint_col type: TINYINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: smallint_col type: SMALLINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: int_col type: INTEGER nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bigint_col type: BIGINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: hugeint_col type: HUGEINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: utinyint_col type: UTINYINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: usmallint_col type: USMALLINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: uinteger_col type: UINTEGER nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: ubigint_col type: UBIGINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: real_col type: FLOAT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: double_col type: DOUBLE nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: decimal_col type: DECIMAL(10,2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: boolean_col type: BOOLEAN nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: char_col type: VARCHAR nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varchar_col type: VARCHAR nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: text_col type: VARCHAR nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: blob_col type: BLOB nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: date_col type: DATE nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: time_col type: TIME nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timestamp_col type: TIMESTAMP nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timestamptz_col type: TIMESTAMP WITH TIME ZONE nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: interval_col type: INTERVAL nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: uuid_col type: UUID nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: json_col type: JSON nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: int_array_col type: INTEGER[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varchar_array_col type: VARCHAR[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: struct_col type: STRUCT("name" VARCHAR, age INTEGER) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: map_col type: MAP(VARCHAR, INTEGER) nullable: true - description: null - default_expression: null - generated: null - checks: [] samples: - id: 1 tinyint_col: 10 @@ -231,40 +114,21 @@ context: char_col: ABCDE varchar_col: Hello DuckDB text_col: This is a text column - blob_col: !!binary | - REVBREJFRUY= + blob_col: date_col: 2025-01-01 time_col: '12:30:00' timestamp_col: 2025-01-01 12:30:00 - timestamptz_col: 2025-01-01 11:30:00+01:00 + timestamptz_col: 2025-01-01 10:30:00+00:00 interval_col: 2 days, 0:00:00 uuid_col: 550e8400-e29b-41d4-a716-446655440000 json_col: '{"key": "value"}' - int_array_col: - - 1 - - 2 - - 3 - varchar_array_col: - - a - - b - - c - struct_col: - name: Alice - age: 30 - map_col: - a: 1 - b: 2 - partition_info: null - description: null + int_array_col: '[1, 2, 3]' + varchar_array_col: '["a", "b", "c"]' + struct_col: '{"name": "Alice", "age": 30}' + map_col: '{"a": 1, "b": 2}' kind: table primary_key: name: all_types_demo_id_pkey columns: - id validated: true - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - description: null - description: null diff --git a/e2e-tests/tests/resources/mysql_introspections.yaml b/e2e-tests/tests/resources/mysql_introspections.yaml index 161c3e88..7344e5bf 100644 --- a/e2e-tests/tests/resources/mysql_introspections.yaml +++ b/e2e-tests/tests/resources/mysql_introspections.yaml @@ -1,7 +1,6 @@ # ===== test_mysql_conn.yaml ===== datasource_id: test_mysql_conn.yaml datasource_type: mysql -context_built_at: 2026-02-26 16:12:59.000053 context: catalogs: - name: test @@ -13,241 +12,106 @@ context: - name: id type: int nullable: false - description: null - default_expression: null generated: identity - checks: [] - name: tinyint_col type: tinyint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: smallint_col type: smallint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: mediumint_col type: mediumint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: int_col type: int nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bigint_col type: bigint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: decimal_col type: decimal(10,2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: float_col type: float nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: double_col type: double nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bit_col type: bit(4) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: boolean_col type: tinyint(1) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: date_col type: date nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: time_col type: time nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: datetime_col type: datetime nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timestamp_col type: timestamp nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: year_col type: year nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: char_col type: char(5) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varchar_col type: varchar(50) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: binary_col type: binary(4) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varbinary_col type: varbinary(10) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: tinytext_col type: tinytext nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: text_col type: text nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: mediumtext_col type: mediumtext nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: longtext_col type: longtext nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: tinyblob_col type: tinyblob nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: blob_col type: blob nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: mediumblob_col type: mediumblob nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: longblob_col type: longblob nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: enum_col type: enum('active','inactive','pending') nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: set_col type: set('a','b','c') nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: json_col type: json nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: point_col type: point nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: linestring_col type: linestring nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: polygon_col type: polygon nullable: true - description: null - default_expression: null - generated: null - checks: [] samples: - id: 1 tinyint_col: 1 @@ -258,8 +122,7 @@ context: decimal_col: '12345.67' float_col: 3.14 double_col: 3.1415926535 - bit_col: !!binary | - Cg== + bit_col: boolean_col: 1 date_col: 2025-01-01 time_col: '12:30:00' @@ -268,44 +131,25 @@ context: year_col: 2025 char_col: ABCDE varchar_col: Hello MySQL - binary_col: !!binary | - QUJDRA== - varbinary_col: !!binary | - dmFyYmlu + binary_col: + varbinary_col: tinytext_col: tiny text text_col: regular text column mediumtext_col: medium text column longtext_col: long text column - tinyblob_col: !!binary | - qg== - blob_col: !!binary | - 3q2+7w== - mediumblob_col: !!binary | - 3q2+796tvu8= - longblob_col: !!binary | - 3q2+796tvu/erb7v + tinyblob_col: + blob_col: + mediumblob_col: + longblob_col: enum_col: active set_col: a,b json_col: '{"key": "value"}' - point_col: !!binary | - AAAAAAEBAAAAAAAAAAAA8D8AAAAAAAAAQA== - linestring_col: !!binary | - AAAAAAECAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwPwAAAAAAAPA/AAAAAAAAAEAAAAAA - AAAAQA== - polygon_col: !!binary | - AAAAAAEDAAAAAQAAAAUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUQAAAAAAAABRA - AAAAAAAAFEAAAAAAAAAUQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA== - partition_info: null - description: null + point_col: + linestring_col: + polygon_col: kind: table primary_key: name: PRIMARY columns: - id validated: true - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - description: null - description: null diff --git a/e2e-tests/tests/resources/postgres_introspections.yaml b/e2e-tests/tests/resources/postgres_introspections.yaml index dc6f9873..eddce15d 100644 --- a/e2e-tests/tests/resources/postgres_introspections.yaml +++ b/e2e-tests/tests/resources/postgres_introspections.yaml @@ -1,7 +1,6 @@ # ===== my_postgres.yaml ===== datasource_id: my_postgres.yaml datasource_type: postgres -context_built_at: 2026-02-26 13:56:39.140577 context: catalogs: - name: test @@ -13,227 +12,100 @@ context: - name: id type: integer nullable: false - description: null default_expression: nextval('all_types_demo_id_seq'::regclass) - generated: null - checks: [] - name: small_int_col type: smallint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: int_col type: integer nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: big_int_col type: bigint nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: numeric_col type: numeric(10,2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: real_col type: real nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: double_col type: double precision nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: money_col type: money nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: char_col type: character(5) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varchar_col type: character varying(50) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: text_col type: text nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: boolean_col type: boolean nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: date_col type: date nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: time_col type: time without time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timetz_col type: time with time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timestamp_col type: timestamp without time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: timestamptz_col type: timestamp with time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: interval_col type: interval nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: json_col type: json nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: jsonb_col type: jsonb nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: int_array_col type: integer[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: text_array_col type: text[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bytea_col type: bytea nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: status_col type: text nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: inet_col type: inet nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: cidr_col type: cidr nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: macaddr_col type: macaddr nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: point_col type: point nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: circle_col type: circle nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bit_col type: bit(4) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bitvarying_col type: bit varying(8) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: xml_col type: xml nullable: true - description: null - default_expression: null - generated: null - checks: [] samples: - id: 1 small_int_col: 10 @@ -255,42 +127,25 @@ context: interval_col: 2 days, 3:00:00 json_col: '{"key": "value"}' jsonb_col: '{"key": "value"}' - int_array_col: - - 1 - - 2 - - 3 - text_array_col: - - a - - b - - c - bytea_col: !!binary | - 3q2+7w== + int_array_col: '[1, 2, 3]' + text_array_col: '["a", "b", "c"]' + bytea_col: status_col: active inet_col: 192.168.1.1 - cidr_col: - netmask: 255.255.255.0 - network_address: 192.168.0.0 + cidr_col: '"192.168.0.0/24"' macaddr_col: 08:00:2b:01:02:03 - point_col: asyncpg.pgproto.types.Point((1.0, 2.0)) - circle_col: ((1.0, 2.0), 5.0) + point_col: '[1.0, 2.0]' + circle_col: '[[1.0, 2.0], 5.0]' bit_col: bitvarying_col: xml_col: value - partition_info: null - description: null kind: table primary_key: name: all_types_demo_pkey columns: - id validated: true - unique_constraints: [] checks: - name: all_types_demo_status_col_check expression: (status_col = ANY (ARRAY['active'::text, 'inactive'::text, 'pending'::text])) validated: true - indexes: [] - foreign_keys: [] - description: null - description: null - diff --git a/e2e-tests/tests/resources/postgres_partitioned_tables_introspections.yaml b/e2e-tests/tests/resources/postgres_partitioned_tables_introspections.yaml index a729c795..b76f6095 100644 --- a/e2e-tests/tests/resources/postgres_partitioned_tables_introspections.yaml +++ b/e2e-tests/tests/resources/postgres_partitioned_tables_introspections.yaml @@ -1,6 +1,5 @@ datasource_id: my_postgres.yaml datasource_type: postgres -context_built_at: 2026-03-12 17:37:19.271780 context: catalogs: - name: test @@ -12,259 +11,100 @@ context: - name: id type: integer nullable: false - description: null default_expression: nextval('all_types_demo_id_seq'::regclass) - generated: null - checks: [] - stats: null - name: small_int_col type: smallint nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: int_col type: integer nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: big_int_col type: bigint nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: numeric_col type: numeric(10,2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: real_col type: real nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: double_col type: double precision nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: money_col type: money nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: char_col type: character(5) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: varchar_col type: character varying(50) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: text_col type: text nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: boolean_col type: boolean nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: date_col type: date nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: time_col type: time without time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: timetz_col type: time with time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: timestamp_col type: timestamp without time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: timestamptz_col type: timestamp with time zone nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: interval_col type: interval nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: json_col type: json nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: jsonb_col type: jsonb nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: int_array_col type: integer[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: text_array_col type: text[] nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: bytea_col type: bytea nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: status_col type: text nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: inet_col type: inet nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: cidr_col type: cidr nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: macaddr_col type: macaddr nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: point_col type: point nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: circle_col type: circle nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: bit_col type: bit(4) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: bitvarying_col type: bit varying(8) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: xml_col type: xml nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null samples: - id: 1 small_int_col: 10 @@ -286,85 +126,46 @@ context: interval_col: 2 days, 3:00:00 json_col: '{"key": "value"}' jsonb_col: '{"key": "value"}' - int_array_col: - - 1 - - 2 - - 3 - text_array_col: - - a - - b - - c - bytea_col: !!binary | - 3q2+7w== + int_array_col: '[1, 2, 3]' + text_array_col: '["a", "b", "c"]' + bytea_col: status_col: active inet_col: 192.168.1.1 - cidr_col: - netmask: 255.255.255.0 - network_address: 192.168.0.0 + cidr_col: '"192.168.0.0/24"' macaddr_col: 08:00:2b:01:02:03 - point_col: asyncpg.pgproto.types.Point((1.0, 2.0)) - circle_col: ((1.0, 2.0), 5.0) + point_col: '[1.0, 2.0]' + circle_col: '[[1.0, 2.0], 5.0]' bit_col: bitvarying_col: xml_col: value - partition_info: null - description: null kind: table primary_key: name: all_types_demo_pkey columns: - id validated: true - unique_constraints: [] checks: - name: all_types_demo_status_col_check expression: (status_col = ANY (ARRAY['active'::text, 'inactive'::text, 'pending'::text])) validated: true - indexes: [] - foreign_keys: [] - stats: null - name: sales columns: - name: id type: integer nullable: false - description: null default_expression: nextval('sales_id_seq'::regclass) - generated: null - checks: [] - stats: null - name: sale_date type: date nullable: false - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: amount type: numeric(10,2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: product_name type: character varying(100) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: region type: character varying(50) nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null samples: - id: 1 sale_date: 2024-01-15 @@ -390,13 +191,4 @@ context: - sales_2024_q1 - sales_2024_q2 - sales_2024_q3 - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: null - description: null - description: null diff --git a/e2e-tests/tests/resources/snowflake_introspections.yaml b/e2e-tests/tests/resources/snowflake_introspections.yaml index 11702cfc..7dc150cd 100644 --- a/e2e-tests/tests/resources/snowflake_introspections.yaml +++ b/e2e-tests/tests/resources/snowflake_introspections.yaml @@ -1,6 +1,5 @@ datasource_id: my_test_snowflake.yaml datasource_type: snowflake -context_built_at: 2026-03-13 17:38:22.254348 context: catalogs: - name: E2ETESTDB @@ -12,187 +11,72 @@ context: - name: ID type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: INT_COL type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: BIGINT_COL type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: SMALLINT_COL type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: DECIMAL_COL type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: FLOAT_COL type: FLOAT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: DOUBLE_COL type: FLOAT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: BOOL_COL type: BOOLEAN nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: VARCHAR_COL type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: CHAR_COL type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TEXT_COL type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: BINARY_COL type: BINARY nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: DATE_COL type: DATE nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TIME_COL type: TIME nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TIMESTAMP_COL type: TIMESTAMP_NTZ nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TIMESTAMP_LTZ_COL type: TIMESTAMP_LTZ nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TIMESTAMP_NTZ_COL type: TIMESTAMP_NTZ nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: TIMESTAMP_TZ_COL type: TIMESTAMP_TZ nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: VARIANT_COL type: VARIANT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: OBJECT_COL type: OBJECT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: ARRAY_COL type: ARRAY nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: GEOGRAPHY_COL type: GEOGRAPHY nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null - name: GEOMETRY_COL type: GEOMETRY nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: null samples: - id: 1 int_col: 10 @@ -205,7 +89,7 @@ context: varchar_col: hello char_col: char text_col: long text example - binary_col: bytearray(b'\xff') + binary_col: date_col: 2026-03-13 time_col: '12:34:56' timestamp_col: '2026-03-13T03:54:40.856000' @@ -230,7 +114,7 @@ context: varchar_col: hello char_col: char text_col: long text example - binary_col: bytearray(b'\xff') + binary_col: date_col: 2026-03-13 time_col: '12:34:56' timestamp_col: '2026-03-13T03:55:00.230000' @@ -244,375 +128,51 @@ context: \ \"type\": \"Point\"\n}" geometry_col: "{\n \"coordinates\": [\n 1.000000000000000e+00,\n \ \ 2.000000000000000e+00\n ],\n \"type\": \"Point\"\n}" - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: - row_count: 2 - approximate: true - name: SALESRECORDSSMALL columns: - name: REGION type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 7 - cardinality_kind: 5-9 - min_value: Asia - max_value: Sub-Saharan Africa - top_values: - - - Europe - - 5138 - - - Sub-Saharan Africa - - 5070 - - - Asia - - 2936 - - - Middle East and North Africa - - 2435 - - - Central America and the Caribbean - - 2050 - total_row_count: 1974887 - name: COUNTRY type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 100-999 - min_value: Afghanistan - max_value: Zimbabwe - top_values: - - - Cape Verde - - 154 - - - Portugal - - 150 - - - Chad - - 150 - - - Barbados - - 146 - - - Cambodia - - 145 - total_row_count: 1974887 - name: ITEMTYPE type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 12 - cardinality_kind: 10-19 - min_value: Baby Food - max_value: Vegetables - top_values: - - - Vegetables - - 1711 - - - Snacks - - 1704 - - - Cereal - - 1700 - - - Fruits - - 1648 - - - Office Supplies - - 1646 - total_row_count: 1974887 - name: SALESCHANNEL type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 2 - cardinality_kind: 2-4 - min_value: Offline - max_value: Online - top_values: - - - Offline - - 9945 - - - Online - - 9761 - total_row_count: 1974887 - name: ORDERPRIORITY type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 4 - cardinality_kind: 2-4 - min_value: C - max_value: M - top_values: - - - M - - 5032 - - - L - - 4938 - - - C - - 4883 - - - H - - 4853 - total_row_count: 1974887 - name: ORDER_DATE type: DATE nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '2010-01-01' - max_value: '2020-09-10' - top_values: - - - '2017-01-31' - - 50 - - - '2015-12-04' - - 49 - - - '2015-10-09' - - 49 - - - '2016-03-25' - - 48 - - - '2010-10-04' - - 48 - total_row_count: 1974887 - name: ORDERID type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '100005042' - max_value: '999994313' - top_values: - - - 572139489 - - 26 - - - 654742515 - - 26 - - - 162716376 - - 26 - - - 354081475 - - 26 - - - 831967866 - - 26 - total_row_count: 1974887 - name: SHIPDATE type: DATE nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '2010-01-02' - max_value: '2020-10-25' - top_values: - - - '2016-05-23' - - 62 - - - '2016-11-21' - - 56 - - - '2013-01-11' - - 56 - - - '2020-06-02' - - 50 - - - '2011-07-25' - - 48 - total_row_count: 1974887 - name: UNITSSOLD type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '1' - max_value: '10000' - top_values: - - - 4325 - - 49 - - - 9051 - - 49 - - - 7358 - - 48 - - - 3442 - - 43 - - - 4173 - - 43 - total_row_count: 1974887 - name: UNITPRICE type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 12 - cardinality_kind: 10-19 - min_value: '9.33' - max_value: '668.27' - top_values: - - - 154.06 - - 1711 - - - 152.58 - - 1704 - - - 205.7 - - 1700 - - - 9.33 - - 1648 - - - 651.21 - - 1646 - total_row_count: 1974887 - name: UNITCOST type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: 12 - cardinality_kind: 10-19 - min_value: '6.92' - max_value: '524.96' - top_values: - - - 90.93 - - 1711 - - - 97.44 - - 1704 - - - 117.11 - - 1700 - - - 6.92 - - 1648 - - - 524.96 - - 1646 - total_row_count: 1974887 - name: TOTALREVENUE type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '27.99' - max_value: '6678022.11' - top_values: - - - 71318.52 - - 26 - - - 1146083.05 - - 26 - - - 26992.16 - - 26 - - - 2407800.96 - - 26 - - - 314830.75 - - 26 - total_row_count: 1974887 - name: TOTALCOST type: NUMBER nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '20.76' - max_value: '5249600.00' - top_values: - - - 52896.48 - - 26 - - - 861856.1 - - 26 - - - 8852.48 - - 26 - - - 1503649.44 - - 26 - - - 210926.65 - - 26 - total_row_count: 1974887 - name: TOTALPROFIT type: FLOAT nullable: true - description: null - default_expression: null - generated: null - checks: [] - stats: - null_count: 4287 - non_null_count: 1970600 - distinct_count: null - cardinality_kind: 1000+ - min_value: '7.23' - max_value: '1738526.13' - top_values: - - - 18422.04 - - 26 - - - 284226.95 - - 26 - - - 18139.68 - - 26 - - - 904151.52 - - 26 - - - 103904.1 - - 26 - total_row_count: 1974887 samples: - region: Australia and Oceania country: Palau @@ -684,16 +244,4 @@ context: totalrevenue: '1445453.90' totalcost: '822931.97' totalprofit: 622521.93 - partition_info: null - description: null kind: table - primary_key: null - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - stats: - row_count: 1974887 - approximate: true - description: null - description: null diff --git a/e2e-tests/tests/resources/sqlite_introspections.yaml b/e2e-tests/tests/resources/sqlite_introspections.yaml index 6c5da4db..1e1722d4 100644 --- a/e2e-tests/tests/resources/sqlite_introspections.yaml +++ b/e2e-tests/tests/resources/sqlite_introspections.yaml @@ -1,7 +1,6 @@ # ===== test_sqlite_conn.yaml ===== datasource_id: test_sqlite_conn.yaml datasource_type: sqlite -context_built_at: 2026-02-27 16:15:31.010532 context: catalogs: - name: default @@ -13,136 +12,60 @@ context: - name: id type: INTEGER nullable: false - description: null - default_expression: null - generated: null - checks: [] - name: int_col type: INTEGER nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: bigint_col type: BIGINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: smallint_col type: SMALLINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: tinyint_col type: TINYINT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: boolean_col type: BOOLEAN nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: real_col type: REAL nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: double_col type: DOUBLE nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: float_col type: FLOAT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: numeric_col type: NUMERIC nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: decimal_col type: DECIMAL(10, 2) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: char_col type: CHAR(5) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: varchar_col type: VARCHAR(50) nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: text_col type: TEXT nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: clob_col type: CLOB nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: date_col type: DATE nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: datetime_col type: DATETIME nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: blob_col type: BLOB nullable: true - description: null - default_expression: null - generated: null - checks: [] - name: no_type_col type: '' nullable: true - description: null - default_expression: null - generated: null - checks: [] samples: - id: 1 int_col: 42 @@ -161,21 +84,11 @@ context: clob_col: This is a CLOB column date_col: '2025-01-01' datetime_col: '2025-01-01 12:30:00' - blob_col: !!binary | - 3q2+7w== + blob_col: no_type_col: No declared type value - partition_info: null - description: null kind: table primary_key: name: pk_all_types_demo columns: - id validated: true - unique_constraints: [] - checks: [] - indexes: [] - foreign_keys: [] - description: null - description: null - From 1615b3569ad773d99b3a668c853d5139178d0215 Mon Sep 17 00:00:00 2001 From: Julien Arzul Date: Tue, 31 Mar 2026 10:10:37 +0200 Subject: [PATCH 6/6] Upgrade databao-context-engine to a stable version --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7572dfe7..5c11b3c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "click>=8.2.1,<9.0.0", - "databao-context-engine[snowflake]~=0.7.1.dev1", + "databao-context-engine[snowflake]~=0.7.1", "prettytable>=3.10.0", "databao-agent~=0.2.1.dev1", "streamlit[snowflake]>=1.53.0", diff --git a/uv.lock b/uv.lock index 4812d625..e3ecfc58 100644 --- a/uv.lock +++ b/uv.lock @@ -1165,7 +1165,7 @@ requires-dist = [ { name = "databao-context-engine", extras = ["clickhouse"], marker = "extra == 'clickhouse'" }, { name = "databao-context-engine", extras = ["mssql"], marker = "extra == 'mssql'" }, { name = "databao-context-engine", extras = ["pdf"], marker = "extra == 'pdf'" }, - { name = "databao-context-engine", extras = ["snowflake"], specifier = "~=0.7.1.dev1" }, + { name = "databao-context-engine", extras = ["snowflake"], specifier = "~=0.7.1" }, { name = "fastmcp", specifier = ">=2.0.0" }, { name = "nh3", specifier = ">=0.2.15" }, { name = "prettytable", specifier = ">=3.10.0" }, @@ -1232,7 +1232,7 @@ wheels = [ [[package]] name = "databao-context-engine" -version = "0.7.1.dev1" +version = "0.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "duckdb" }, @@ -1245,9 +1245,9 @@ dependencies = [ { name = "sqlparse" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/9c/6e11b09afbad26edf9326a53ffd4d384e86825a3f7bfc5eab36b252a61c0/databao_context_engine-0.7.1.dev1.tar.gz", hash = "sha256:dd58cbd284990f334c7c768b5ecb25ac63bd8cafae1022082c7e5c4d4847e771", size = 132709, upload-time = "2026-03-30T07:43:43.413Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/44/e42e4b0d3939313fa183569a3494c1d6e24c6c5c66eb678f464014920281/databao_context_engine-0.7.1.tar.gz", hash = "sha256:449d6e292a496b357f18f0e654bf751c52a1c5a380ea0c6029c5ea4eb7f2ed75", size = 132049, upload-time = "2026-03-31T08:07:22.477Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/07/429773157c29576b347672a9433d3fa35482bd201827c6c6d764e9d25c87/databao_context_engine-0.7.1.dev1-py3-none-any.whl", hash = "sha256:cbe2ded07c733c2da509a23810f27e9535ee9763d1be059354e77a4f8df24bd5", size = 210150, upload-time = "2026-03-30T07:43:41.92Z" }, + { url = "https://files.pythonhosted.org/packages/40/45/f0991598680baa26b852b0c11b0c19f224492885b0768a6f705e0667530f/databao_context_engine-0.7.1-py3-none-any.whl", hash = "sha256:c6527b16c2e74d00f1182bfd8ea18ae2a81e76df68c98e523f399745013cb64a", size = 210892, upload-time = "2026-03-31T08:07:21.261Z" }, ] [package.optional-dependencies]