}}
\preformatted{QueryChat$generate_greeting(echo = c("none", "output"))}
@@ -547,8 +536,7 @@ qc2 <- QueryChat$new(mtcars, greeting = "mtcars_greeting.md")
\subsection{Arguments}{
\if{html}{\out{
}}
\describe{
- \item{\code{echo}}{Whether to print the greeting to the console. Options are
-\code{"none"} (default, no output) or \code{"output"} (print to console).}
+ \item{\code{echo}}{Whether to print the greeting to the console.}
}
\if{html}{\out{
}}
}
@@ -562,13 +550,6 @@ qc2 <- QueryChat$new(mtcars, greeting = "mtcars_greeting.md")
\if{latex}{\out{\hypertarget{method-QueryChat-cleanup}{}}}
\subsection{\code{QueryChat$cleanup()}}{
Clean up resources associated with the data source.
-
-This method releases any resources (e.g., database connections)
-associated with the data source. Call this when you are done using the
-QueryChat object to avoid resource leaks.
-
-Note: If \code{auto_cleanup} was set to \code{TRUE} in the constructor, this will
-be called automatically when the Shiny app stops.
\subsection{Usage}{
\if{html}{\out{
}}
\preformatted{QueryChat$cleanup()}
diff --git a/pkg-r/man/TableAccessor.Rd b/pkg-r/man/TableAccessor.Rd
new file mode 100644
index 000000000..9b8557bdd
--- /dev/null
+++ b/pkg-r/man/TableAccessor.Rd
@@ -0,0 +1,106 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/TableAccessor.R
+\name{TableAccessor}
+\alias{TableAccessor}
+\title{Table Accessor}
+\description{
+Accessor for a specific table's data source and per-table reactive state.
+Returned by the server return value's \verb{$table("name")} method.
+}
+\keyword{internal}
+\section{Active bindings}{
+ \if{html}{\out{
}}
+ \describe{
+ \item{\code{table_name}}{The name of this table.}
+
+ \item{\code{data_source}}{The DataSource for this table.}
+ }
+ \if{html}{\out{
}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+ \itemize{
+ \item \href{#method-TableAccessor-initialize}{\code{TableAccessor$new()}}
+ \item \href{#method-TableAccessor-df}{\code{TableAccessor$df()}}
+ \item \href{#method-TableAccessor-sql}{\code{TableAccessor$sql()}}
+ \item \href{#method-TableAccessor-title}{\code{TableAccessor$title()}}
+ \item \href{#method-TableAccessor-clone}{\code{TableAccessor$clone()}}
+ }
+}
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TableAccessor-initialize}{}}}
+\subsection{\code{TableAccessor$new()}}{
+ Create a new TableAccessor.
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TableAccessor$new(table_name, data_source, state)}
+ \if{html}{\out{
}}
+ }
+ \subsection{Arguments}{
+ \if{html}{\out{
}}
+ \describe{
+ \item{\code{table_name}}{The name of the table.}
+ \item{\code{data_source}}{The DataSource for this table.}
+ \item{\code{state}}{List of per-table reactive state (\code{sql}, \code{title}, \code{df}).}
+ }
+ \if{html}{\out{
}}
+ }
+}
+
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TableAccessor-df}{}}}
+\subsection{\code{TableAccessor$df()}}{
+ Return the current filtered data for this table.
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TableAccessor$df()}
+ \if{html}{\out{
}}
+ }
+}
+
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TableAccessor-sql}{}}}
+\subsection{\code{TableAccessor$sql()}}{
+ Return the current SQL filter for this table.
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TableAccessor$sql()}
+ \if{html}{\out{
}}
+ }
+}
+
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TableAccessor-title}{}}}
+\subsection{\code{TableAccessor$title()}}{
+ Return the current filter title for this table.
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TableAccessor$title()}
+ \if{html}{\out{
}}
+ }
+}
+
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TableAccessor-clone}{}}}
+\subsection{\code{TableAccessor$clone()}}{
+ The objects of this class are cloneable with this method.
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TableAccessor$clone(deep = FALSE)}
+ \if{html}{\out{
}}
+ }
+ \subsection{Arguments}{
+ \if{html}{\out{
}}
+ \describe{
+ \item{\code{deep}}{Whether to make a deep clone.}
+ }
+ \if{html}{\out{
}}
+ }
+}
+
+}
diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd
index af4bb42be..f15a1cccc 100644
--- a/pkg-r/man/TblSqlSource.Rd
+++ b/pkg-r/man/TblSqlSource.Rd
@@ -46,6 +46,7 @@ mtcars_source$cleanup()
\item \href{#method-TblSqlSource-initialize}{\code{TblSqlSource$new()}}
\item \href{#method-TblSqlSource-get_db_type}{\code{TblSqlSource$get_db_type()}}
\item \href{#method-TblSqlSource-get_schema}{\code{TblSqlSource$get_schema()}}
+ \item \href{#method-TblSqlSource-get_schema_result}{\code{TblSqlSource$get_schema_result()}}
\item \href{#method-TblSqlSource-execute_query}{\code{TblSqlSource$execute_query()}}
\item \href{#method-TblSqlSource-test_query}{\code{TblSqlSource$test_query()}}
\item \href{#method-TblSqlSource-prep_query}{\code{TblSqlSource$prep_query()}}
@@ -106,7 +107,7 @@ string, or will be inferred from the \code{tbl} argument, if possible.}
Get schema information about the table
\subsection{Usage}{
\if{html}{\out{
}}
- \preformatted{TblSqlSource$get_schema(categorical_threshold = 20)}
+ \preformatted{TblSqlSource$get_schema(categorical_threshold = 20, table_spec = NULL)}
\if{html}{\out{
}}
}
\subsection{Arguments}{
@@ -122,6 +123,17 @@ column to be considered categorical}
}
}
+\if{html}{\out{
}}
+\if{html}{\out{
}}
+\if{latex}{\out{\hypertarget{method-TblSqlSource-get_schema_result}{}}}
+\subsection{\code{TblSqlSource$get_schema_result()}}{
+ \subsection{Usage}{
+ \if{html}{\out{
}}
+ \preformatted{TblSqlSource$get_schema_result(categorical_threshold = 20, table_spec = NULL)}
+ \if{html}{\out{
}}
+ }
+}
+
\if{html}{\out{
}}
\if{html}{\out{
}}
\if{latex}{\out{\hypertarget{method-TblSqlSource-execute_query}{}}}
diff --git a/pkg-r/man/execute_ggsql.Rd b/pkg-r/man/execute_ggsql.Rd
index 3713aab02..532af74ae 100644
--- a/pkg-r/man/execute_ggsql.Rd
+++ b/pkg-r/man/execute_ggsql.Rd
@@ -2,12 +2,12 @@
% Please edit documentation in R/querychat_viz.R
\name{execute_ggsql}
\alias{execute_ggsql}
-\title{Execute a pre-validated ggsql query against a DataSource}
+\title{Execute a pre-validated ggsql query against an executor}
\usage{
-execute_ggsql(data_source, validated)
+execute_ggsql(executor, validated)
}
\arguments{
-\item{data_source}{A querychat DataSource R6 object.}
+\item{executor}{A querychat QueryExecutor R6 object.}
\item{validated}{A pre-validated ggsql query (from \code{ggsql::ggsql_validate()}).
Must be a list with \verb{$sql} and \verb{$visual} fields.}
@@ -16,7 +16,7 @@ Must be a list with \verb{$sql} and \verb{$visual} fields.}
A \code{ggsql::Spec} R6 object (the writer-independent plot specification).
}
\description{
-Executes the SQL portion through a DataSource (preserving database pushdown),
+Executes the SQL portion through an executor (preserving database pushdown),
then feeds the result into a ggsql DuckDB reader to produce a Spec.
}
\keyword{internal}
diff --git a/pkg-r/man/querychat-convenience.Rd b/pkg-r/man/querychat-convenience.Rd
index 9a0a63ddd..5be1320fb 100644
--- a/pkg-r/man/querychat-convenience.Rd
+++ b/pkg-r/man/querychat-convenience.Rd
@@ -17,6 +17,7 @@ querychat(
categorical_threshold = 20,
extra_instructions = NULL,
prompt_template = NULL,
+ data_dict = NULL,
cleanup = NA
)
@@ -32,6 +33,7 @@ querychat_app(
categorical_threshold = 20,
extra_instructions = NULL,
prompt_template = NULL,
+ data_dict = NULL,
cleanup = NA,
bookmark_store = "url"
)
@@ -40,66 +42,33 @@ querychat_app(
\item{data_source}{Either a data.frame or a database connection (e.g., DBI
connection).}
-\item{table_name}{A string specifying the table name to use in SQL queries.
-If \code{data_source} is a data.frame, this is the name to refer to it by in
-queries (typically the variable name). If not provided, will be inferred
-from the variable name for data.frame inputs. For database connections,
-this parameter is required.}
+\item{table_name}{A string specifying the table name to use in SQL queries.}
\item{...}{Additional arguments (currently unused).}
-\item{id}{Optional module ID for the QueryChat instance. If not provided,
-will be auto-generated from \code{table_name}. The ID is used to namespace
-the Shiny module.}
+\item{id}{Optional module ID for the QueryChat instance.}
-\item{greeting}{Optional initial message to display to users. Can be a
-character string (in Markdown format) or a file path. If not provided,
-a greeting will be generated at the start of each conversation using the
-LLM, which adds latency and cost. Use \verb{$generate_greeting()} to create
-a greeting to save and reuse.}
+\item{greeting}{Optional initial message to display to users.}
-\item{client}{Optional chat client. Can be:
-\itemize{
-\item An \link[ellmer:Chat]{ellmer::Chat} object
-\item A string to pass to \code{\link[ellmer:chat]{ellmer::chat()}} (e.g., \code{"openai/gpt-4o"})
-\item \code{NULL} (default): Uses the \code{querychat.client} option, the
-\code{QUERYCHAT_CLIENT} environment variable, or defaults to
-\code{\link[ellmer:chat_openai]{ellmer::chat_openai()}}
-}}
+\item{client}{Optional chat client.}
-\item{tools}{Which querychat tools to include in the chat client, by
-default. \code{"filter"} includes the tools for filtering and resetting the
-dashboard and \code{"query"} includes the tool for executing SQL queries.
-Use \code{tools = "filter"} when you only want the dashboard filtering tools,
-or when you want to disable the querying tool entirely to prevent the
-LLM from seeing any of the data in your dataset. The legacy name
-\code{"update"} is still accepted as an alias for \code{"filter"}.}
+\item{tools}{Which querychat tools to include in the chat client.}
-\item{data_description}{Optional description of the data in plain text or
-Markdown. Can be a string or a file path. This provides context to the
-LLM about what the data represents.}
+\item{data_description}{Optional description of the data.}
\item{categorical_threshold}{For text columns, the maximum number of unique
values to consider as a categorical variable. Default is 20.}
-\item{extra_instructions}{Optional additional instructions for the chat
-model in plain text or Markdown. Can be a string or a file path.}
+\item{extra_instructions}{Optional additional instructions for the chat model.}
-\item{prompt_template}{Optional path to or string of a custom prompt
-template file. If not provided, the default querychat template will be
-used. See the package prompts directory for the default template format.}
+\item{prompt_template}{Optional path to or string of a custom prompt template.}
-\item{cleanup}{Whether or not to automatically run \verb{$cleanup()} when the
-Shiny session/app stops. By default, cleanup only occurs if \code{QueryChat}
-is created within a Shiny app. Set to \code{TRUE} to always clean up, or
-\code{FALSE} to never clean up automatically.
+\item{data_dict}{Optional data dictionary. A path to a YAML file or a list of paths.}
-In \code{querychat_app()}, in-memory databases created for data frames are
-always cleaned up.}
+\item{cleanup}{Whether or not to automatically run \verb{$cleanup()} when the
+Shiny session/app stops.}
-\item{bookmark_store}{The bookmarking storage method. Passed to
-\code{\link[shiny:enableBookmarking]{shiny::enableBookmarking()}}. If \code{"url"} or \code{"server"}, the chat state
-(including current query) will be bookmarked. Default is \code{"url"}.}
+\item{bookmark_store}{The bookmarking storage method. Default is \code{"url"}.}
}
\value{
A \code{QueryChat} object. See \link{QueryChat} for available methods.
@@ -114,23 +83,5 @@ and app launching (i.e., \code{querychat_app()}).
\dontshow{if (rlang::is_interactive() && rlang::is_installed("RSQLite")) withAutoprint(\{ # examplesIf}
# Quick start - chat with mtcars dataset in one line
querychat_app(mtcars)
-
-# Add options
-querychat_app(
- mtcars,
- greeting = "Welcome to the mtcars explorer!",
- client = "openai/gpt-4o"
-)
-
-# Chat with a database table (table_name required)
-con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-DBI::dbWriteTable(con, "mtcars", mtcars)
-querychat_app(con, "mtcars")
-
-# Create QueryChat class object
-qc <- querychat(mtcars, greeting = "Welcome to the mtcars explorer!")
-
-# Run the app later
-qc$app()
\dontshow{\}) # examplesIf}
}
diff --git a/pkg-r/man/read_data_dict.Rd b/pkg-r/man/read_data_dict.Rd
new file mode 100644
index 000000000..25c43271d
--- /dev/null
+++ b/pkg-r/man/read_data_dict.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataDict.R
+\name{read_data_dict}
+\alias{read_data_dict}
+\title{Read a Data Dictionary from YAML}
+\usage{
+read_data_dict(path)
+}
+\arguments{
+\item{path}{Path to the YAML file.}
+}
+\value{
+A named list with the structure of the YAML file.
+}
+\description{
+Loads a data dictionary from a YAML file conforming to the
+\href{https://data-dict.tidyverse.org/}{data-dict spec}. The dictionary is
+returned as a plain list and can be passed directly to \link{QueryChat} via the
+\code{data_dict} argument.
+
+If \code{name} is absent from the YAML file, it defaults to the file stem.
+}
diff --git a/pkg-r/pkgdown/_pkgdown.yml b/pkg-r/pkgdown/_pkgdown.yml
index 2312aa5ab..364231216 100644
--- a/pkg-r/pkgdown/_pkgdown.yml
+++ b/pkg-r/pkgdown/_pkgdown.yml
@@ -69,6 +69,8 @@ reference:
contents:
- querychat
- QueryChat
+ - TableAccessor
+ - read_data_dict
- title: Data sources
contents:
- ends_with("Source")
diff --git a/pkg-r/tests/testthat/_snaps/querychat_tools.md b/pkg-r/tests/testthat/_snaps/querychat_tools.md
index eea808c30..a1a4c875d 100644
--- a/pkg-r/tests/testthat/_snaps/querychat_tools.md
+++ b/pkg-r/tests/testthat/_snaps/querychat_tools.md
@@ -1,44 +1,29 @@
-# tool_update_dashboard() checks inputs
+# tool_update_dashboard() checks update_fn inputs
Code
- tool_update_dashboard("foo")
- Condition
- Error in `tool_update_dashboard()`:
- ! `data_source` must be a
object, not a string.
-
----
-
- Code
- tool_update_dashboard(df_source, update_fn = NULL)
+ tool_update_dashboard(executor, "test_table", update_fn = NULL)
Condition
Error in `tool_update_dashboard()`:
! `update_fn` must be a function, not `NULL`.
Code
- tool_update_dashboard(df_source, update_fn = function(query) { })
+ tool_update_dashboard(executor, "test_table", update_fn = function(query) { })
Condition
Error in `tool_update_dashboard()`:
- ! `update_fn` must accept at least two named arguments: "query" and "title".
- x "title" argument was missing.
+ ! `update_fn` must accept at least three named arguments: "query", "title", and "table".
+ x "title" and "table" arguments were missing.
Code
- tool_update_dashboard(df_source, update_fn = function(title, extra) { })
+ tool_update_dashboard(executor, "test_table", update_fn = function(title, extra)
+ { })
Condition
Error in `tool_update_dashboard()`:
- ! `update_fn` must accept at least two named arguments: "query" and "title".
- x "query" argument was missing.
+ ! `update_fn` must accept at least three named arguments: "query", "title", and "table".
+ x "query" and "table" arguments were missing.
# tool_reset_dashboard() checks inputs
Code
- tool_reset_dashboard("not_a_function")
+ tool_reset_dashboard("not_a_function", table_names = "t")
Condition
Error in `tool_reset_dashboard()`:
! `reset_fn` must be a function, not the string "not_a_function".
-# tool_query() checks inputs
-
- Code
- tool_query("invalid_source")
- Condition
- Error in `tool_query()`:
- ! `data_source` must be a object, not a string.
-
diff --git a/pkg-r/tests/testthat/helper-fixtures.R b/pkg-r/tests/testthat/helper-fixtures.R
index d800b0106..3775b84ee 100644
--- a/pkg-r/tests/testthat/helper-fixtures.R
+++ b/pkg-r/tests/testthat/helper-fixtures.R
@@ -1,5 +1,10 @@
# Test fixture constructors for data source tests
+# Access the internal data source for a named table (test helper only)
+qc_data_source <- function(qc, table_name) {
+ qc$.__enclos_env__$private$.data_sources[[table_name]]
+}
+
# Simple data frame with id, name, and value columns
new_test_df <- function(rows = 5) {
data.frame(
diff --git a/pkg-r/tests/testthat/test-DBISource.R b/pkg-r/tests/testthat/test-DBISource.R
index 69ff7be26..487052ff4 100644
--- a/pkg-r/tests/testthat/test-DBISource.R
+++ b/pkg-r/tests/testthat/test-DBISource.R
@@ -82,9 +82,11 @@ describe("DBISource$test_query()", {
expect_error(dbi_source$test_query("SELECT * FROM non_existent_table"))
- expect_error(dbi_source$test_query(
- "SELECT non_existent_column FROM test_table"
- ))
+ expect_error(
+ dbi_source$test_query(
+ "SELECT non_existent_column FROM test_table"
+ )
+ )
})
it("works with different data types", {
diff --git a/pkg-r/tests/testthat/test-DataDict.R b/pkg-r/tests/testthat/test-DataDict.R
new file mode 100644
index 000000000..f10aab403
--- /dev/null
+++ b/pkg-r/tests/testthat/test-DataDict.R
@@ -0,0 +1,154 @@
+describe("read_data_dict()", {
+ it("reads a YAML file and returns a plain list", {
+ yaml_file <- withr::local_tempfile(fileext = ".yaml")
+ writeLines(
+ c(
+ "name: test",
+ "description: Test domain",
+ "tables:",
+ " orders:",
+ " description: Orders table",
+ " columns:",
+ " - name: id",
+ " type: integer",
+ " description: Primary key",
+ "relationships:",
+ " - join: orders.customer_id = customers.id",
+ " description: Order belongs to customer",
+ " cardinality: many-to-one",
+ "glossary:",
+ " ARR: Annual Recurring Revenue"
+ ),
+ yaml_file
+ )
+
+ dd <- read_data_dict(yaml_file)
+ expect_true(is.list(dd))
+ expect_equal(dd[["name"]], "test")
+ expect_equal(dd[["description"]], "Test domain")
+ expect_true(is.list(dd[["tables"]][["orders"]]))
+ expect_equal(dd[["tables"]][["orders"]][["description"]], "Orders table")
+ expect_length(dd[["tables"]][["orders"]][["columns"]], 1)
+ expect_equal(dd[["tables"]][["orders"]][["columns"]][[1]][["name"]], "id")
+ expect_length(dd[["relationships"]], 1)
+ expect_equal(
+ dd[["relationships"]][[1]][["join"]],
+ "orders.customer_id = customers.id"
+ )
+ expect_equal(dd[["glossary"]][["ARR"]], "Annual Recurring Revenue")
+ })
+
+ it("defaults name to file stem when not in YAML", {
+ yaml_file <- withr::local_tempfile(fileext = ".yaml")
+ writeLines("description: No name here", yaml_file)
+
+ dd <- read_data_dict(yaml_file)
+ expect_equal(dd[["name"]], tools::file_path_sans_ext(basename(yaml_file)))
+ })
+
+ it("reads a YAML with column range and values", {
+ yaml_file <- withr::local_tempfile(fileext = ".yaml")
+ writeLines(
+ c(
+ "tables:",
+ " products:",
+ " columns:",
+ " - name: price",
+ " range:",
+ " min: 0",
+ " max: 999",
+ " - name: category",
+ " values: [A, B, C]"
+ ),
+ yaml_file
+ )
+
+ dd <- read_data_dict(yaml_file)
+ cols <- dd[["tables"]][["products"]][["columns"]]
+ price_col <- cols[[1]]
+ cat_col <- cols[[2]]
+
+ expect_equal(price_col[["range"]][["min"]], 0)
+ expect_equal(price_col[["range"]][["max"]], 999)
+ expect_equal(cat_col[["values"]], c("A", "B", "C"))
+ })
+})
+
+describe("data_dict_to_prompt_list()", {
+ it("returns list with name and description", {
+ dd <- list(name = "sales", description = "Sales domain")
+ result <- data_dict_to_prompt_list(dd)
+ expect_equal(result[["name"]], "sales")
+ expect_equal(result[["description"]], "Sales domain")
+ })
+
+ it("omits NULL name and description", {
+ dd <- list()
+ result <- data_dict_to_prompt_list(dd)
+ expect_false("name" %in% names(result))
+ expect_false("description" %in% names(result))
+ })
+
+ it("includes table descriptions but strips column details", {
+ dd <- list(
+ tables = list(
+ orders = list(
+ description = "Orders table",
+ details = "Long details that should not appear",
+ columns = list(
+ list(name = "id", description = "PK", details = "Internal only")
+ )
+ )
+ )
+ )
+ result <- data_dict_to_prompt_list(dd)
+ expect_true("tables" %in% names(result))
+ expect_equal(
+ result[["tables"]][["orders"]][["description"]],
+ "Orders table"
+ )
+ expect_null(result[["tables"]][["orders"]][["columns"]])
+ expect_null(result[["tables"]][["orders"]][["details"]])
+ })
+
+ it("includes relationships as list of non-NULL fields", {
+ dd <- list(
+ relationships = list(
+ list(
+ join = "a.id = b.id",
+ description = "A to B",
+ cardinality = "one-to-many"
+ )
+ )
+ )
+ result <- data_dict_to_prompt_list(dd)
+ expect_true("relationships" %in% names(result))
+ expect_length(result[["relationships"]], 1)
+ rel <- result[["relationships"]][[1]]
+ expect_equal(rel[["join"]], "a.id = b.id")
+ expect_equal(rel[["description"]], "A to B")
+ expect_equal(rel[["cardinality"]], "one-to-many")
+ })
+
+ it("includes glossary", {
+ dd <- list(glossary = list(ARR = "Annual Recurring Revenue"))
+ result <- data_dict_to_prompt_list(dd)
+ expect_true("glossary" %in% names(result))
+ expect_equal(result[["glossary"]][["ARR"]], "Annual Recurring Revenue")
+ })
+
+ it("omits empty tables, relationships, glossary", {
+ dd <- list()
+ result <- data_dict_to_prompt_list(dd)
+ expect_false("tables" %in% names(result))
+ expect_false("relationships" %in% names(result))
+ expect_false("glossary" %in% names(result))
+ })
+
+ it("includes table entry as NULL when table has no description", {
+ dd <- list(tables = list(no_desc = list()))
+ result <- data_dict_to_prompt_list(dd)
+ expect_true("tables" %in% names(result))
+ expect_null(result[["tables"]][["no_desc"]])
+ })
+})
diff --git a/pkg-r/tests/testthat/test-DataSource.R b/pkg-r/tests/testthat/test-DataSource.R
index cf2fc0468..7defe8c21 100644
--- a/pkg-r/tests/testthat/test-DataSource.R
+++ b/pkg-r/tests/testthat/test-DataSource.R
@@ -29,7 +29,6 @@ describe("DataSource base class", {
})
})
-
describe("DataSource$get_schema()", {
it("returns proper schema for DataFrameSource", {
skip_if_no_dataframe_engine()
@@ -311,7 +310,6 @@ describe("DataSource$execute_query()", {
})
})
-
describe("test_query() column validation", {
skip_if_no_dataframe_engine()
diff --git a/pkg-r/tests/testthat/test-PinSource.R b/pkg-r/tests/testthat/test-PinSource.R
index 3c2f1e05e..2230dc31f 100644
--- a/pkg-r/tests/testthat/test-PinSource.R
+++ b/pkg-r/tests/testthat/test-PinSource.R
@@ -391,7 +391,7 @@ describe("QueryChat + PinSource integration", {
prompt_before <- qc$system_prompt
expect_match(prompt_before, "Motor Trend Cars")
- qc$data_source <- new_test_df()
+ qc$add_table(new_test_df(), 'cars', replace = TRUE)
prompt_after <- qc$system_prompt
expect_no_match(prompt_after, "Motor Trend Cars")
diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R
index cbbf20994..99313737d 100644
--- a/pkg-r/tests/testthat/test-QueryChat.R
+++ b/pkg-r/tests/testthat/test-QueryChat.R
@@ -9,8 +9,8 @@ describe("QueryChat$new()", {
)
withr::defer(qc$cleanup())
- expect_s3_class(qc$data_source, "DataSource")
- expect_s3_class(qc$data_source, "DataFrameSource")
+ expect_s3_class(qc_data_source(qc, "test_df"), "DataSource")
+ expect_s3_class(qc_data_source(qc, "test_df"), "DataFrameSource")
})
it("accepts DataFrameSource directly", {
@@ -22,8 +22,8 @@ describe("QueryChat$new()", {
greeting = "Test greeting"
)
- expect_s3_class(qc$data_source, "DataFrameSource")
- expect_equal(qc$data_source$table_name, "test_source")
+ expect_s3_class(qc_data_source(qc, "test_source"), "DataFrameSource")
+ expect_equal(qc_data_source(qc, "test_source")$table_name, "test_source")
})
it("accepts DBISource", {
@@ -36,8 +36,8 @@ describe("QueryChat$new()", {
greeting = "Test greeting"
)
- expect_s3_class(qc$data_source, "DBISource")
- expect_equal(qc$data_source$table_name, "test_table")
+ expect_s3_class(qc_data_source(qc, "test_table"), "DBISource")
+ expect_equal(qc_data_source(qc, "test_table")$table_name, "test_table")
})
it("infers table_name from data.frame variable name", {
@@ -45,7 +45,7 @@ describe("QueryChat$new()", {
qc <- QueryChat$new(my_data, greeting = "Test")
withr::defer(qc$cleanup())
- expect_equal(qc$data_source$table_name, "my_data")
+ expect_equal(qc_data_source(qc, "my_data")$table_name, "my_data")
expect_equal(qc$id, "querychat_my_data")
})
@@ -104,7 +104,7 @@ describe("QueryChat$new()", {
describe("QueryChat deferred client", {
it("accepts NULL data_source with table_name", {
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_null(qc$data_source)
+ expect_equal(length(qc$table_names()), 0L)
expect_equal(qc$id, "querychat_users")
})
@@ -119,41 +119,53 @@ describe("QueryChat deferred client", {
withr::local_envvar(OPENAI_API_KEY = NA)
withr::local_options(querychat.client = NULL)
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_null(qc$data_source)
+ expect_equal(length(qc$table_names()), 0L)
})
it("stores explicit client string as spec", {
withr::local_envvar(OPENAI_API_KEY = "boop")
qc <- QueryChat$new(NULL, "users", greeting = "Test", client = "openai")
- expect_null(qc$data_source)
+ expect_equal(length(qc$table_names()), 0L)
})
it("$client() errors when data_source is NULL", {
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_error(qc$client(), "data_source.*must be set")
+ expect_error(
+ qc$client(),
+ "data_source.*must be set|data_source.*set before"
+ )
})
it("$console() errors when data_source is NULL", {
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_error(qc$console(), "data_source.*must be set")
+ expect_error(
+ qc$console(),
+ "data_source.*must be set|data_source.*set before"
+ )
})
it("$generate_greeting() errors when data_source is NULL", {
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_error(qc$generate_greeting(), "data_source.*must be set")
+ expect_error(
+ qc$generate_greeting(),
+ "data_source.*must be set|data_source.*set before"
+ )
})
it("$system_prompt errors when data_source is NULL", {
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_error(qc$system_prompt, "data_source.*must be set")
+ expect_error(
+ qc$system_prompt,
+ "data_source.*must be set|data_source.*set before"
+ )
})
- it("works after setting data_source later", {
+ it("works after adding table via add_table()", {
skip_if_no_dataframe_engine()
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- qc$data_source <- new_users_df()
+ qc$add_table(new_users_df(), "users")
- expect_s3_class(qc$data_source, "DataFrameSource")
+ expect_s3_class(qc_data_source(qc, "users"), "DataFrameSource")
prompt <- qc$system_prompt
expect_match(prompt, "users")
})
@@ -186,15 +198,16 @@ describe("QueryChat integration with DBISource", {
client = mock_client
)
- expect_s3_class(qc$data_source, "DBISource")
- expect_s3_class(qc$data_source, "DataSource")
+ iris_source <- qc_data_source(qc, "iris")
+ expect_s3_class(iris_source, "DBISource")
+ expect_s3_class(iris_source, "DataSource")
- result_data <- qc$data_source$execute_query(NULL)
+ result_data <- iris_source$execute_query(NULL)
expect_s3_class(result_data, "data.frame")
expect_equal(nrow(result_data), 150)
expect_equal(ncol(result_data), 5)
- query_result <- qc$data_source$execute_query(
+ query_result <- iris_source$execute_query(
"SELECT \"Sepal.Length\", \"Sepal.Width\" FROM iris WHERE \"Species\" = 'setosa'"
)
expect_s3_class(query_result, "data.frame")
@@ -293,16 +306,20 @@ describe("QueryChat$system_prompt", {
describe("QueryChat$data_source", {
skip_if_no_dataframe_engine()
- it("returns the data source object", {
+ it("errors when accessed (removed)", {
test_df <- new_test_df()
qc <- QueryChat$new(test_df, greeting = "Test")
withr::defer(qc$cleanup())
- ds <- qc$data_source
+ expect_error(qc$data_source, "removed")
+ })
- expect_s3_class(ds, "DataSource")
- expect_s3_class(ds, "DataFrameSource")
- expect_equal(ds$table_name, "test_df")
+ it("errors when set (removed)", {
+ test_df <- new_test_df()
+ qc <- QueryChat$new(test_df, greeting = "Test")
+ withr::defer(qc$cleanup())
+
+ expect_error(qc$data_source <- test_df, "removed")
})
})
@@ -334,7 +351,7 @@ describe("QueryChat$client()", {
client <- qc$client(tools = "query")
- # Should only have query tool
+ # Should only have query tool (plus get_schema)
tool_names <- sapply(client$get_tools(), function(t) t@name)
expect_contains(tool_names, "querychat_query")
expect_false("querychat_update_dashboard" %in% tool_names)
@@ -451,7 +468,7 @@ describe("QueryChat$client()", {
client <- qc$client(tools = "visualize")
tool_names <- sapply(client$get_tools(), function(t) t@name)
- expect_equal(unname(tool_names), "querychat_visualize")
+ expect_contains(tool_names, "querychat_visualize")
})
it("registers only visualize tool when tools = 'visualize'", {
@@ -473,7 +490,11 @@ describe("QueryChat$client()", {
client <- qc$client(tools = "visualize", session = session)
tool_names <- sapply(client$get_tools(), function(t) t@name)
- expect_equal(unname(tool_names), "querychat_visualize")
+ # get_schema is always registered when tools != NULL
+ expect_contains(tool_names, "querychat_visualize")
+ expect_false("querychat_update_dashboard" %in% tool_names)
+ expect_false("querychat_reset_dashboard" %in% tool_names)
+ expect_false("querychat_query" %in% tool_names)
})
it("returns client with no tools when tools = NULL", {
@@ -520,8 +541,8 @@ describe("QueryChat$client()", {
update_calls <- list()
client <- qc$client(
tools = "update",
- update_dashboard = function(query, title) {
- update_calls <<- list(query = query, title = title)
+ update_dashboard = function(query, title, table) {
+ update_calls <<- list(query = query, title = title, table = table)
}
)
@@ -538,12 +559,14 @@ describe("QueryChat$client()", {
# Call the tool - it should execute the query and call the callback
result <- update_tool(
query = "SELECT * FROM test_df WHERE id = 1",
- title = "Test Filter"
+ title = "Test Filter",
+ table = "test_df"
)
expect_null(result@error)
expect_equal(update_calls$query, "SELECT * FROM test_df WHERE id = 1")
expect_equal(update_calls$title, "Test Filter")
+ expect_equal(update_calls$table, "test_df")
})
it("passes reset_dashboard callback to tool", {
@@ -553,11 +576,11 @@ describe("QueryChat$client()", {
)
withr::defer(qc$cleanup())
- reset_called <- FALSE
+ reset_called_with <- NULL
client <- qc$client(
tools = "update",
- reset_dashboard = function() {
- reset_called <<- TRUE
+ reset_dashboard = function(table) {
+ reset_called_with <<- table
}
)
@@ -572,9 +595,9 @@ describe("QueryChat$client()", {
]]
# Call the tool
- reset_tool()
+ reset_tool("test_df")
- expect_true(reset_called)
+ expect_equal(reset_called_with, "test_df")
})
it("returns independent client instances on each call", {
@@ -675,7 +698,7 @@ describe("querychat()", {
withr::defer(qc$cleanup())
expect_s3_class(qc, "QueryChat")
- expect_s3_class(qc$data_source, "DataFrameSource")
+ expect_s3_class(qc_data_source(qc, "test_df"), "DataFrameSource")
expect_equal(qc$greeting, "Test greeting")
})
@@ -684,7 +707,10 @@ describe("querychat()", {
qc <- querychat(my_test_data, greeting = "Test")
withr::defer(qc$cleanup())
- expect_equal(qc$data_source$table_name, "my_test_data")
+ expect_equal(
+ qc_data_source(qc, "my_test_data")$table_name,
+ "my_test_data"
+ )
})
it("passes all arguments to QueryChat$new()", {
@@ -702,7 +728,7 @@ describe("querychat()", {
expect_equal(qc$id, "custom_id")
expect_equal(qc$greeting, "Custom greeting")
- expect_equal(qc$data_source$table_name, "custom_name")
+ expect_equal(qc_data_source(qc, "custom_name")$table_name, "custom_name")
})
})
@@ -734,7 +760,8 @@ describe("QueryChat$console()", {
expect_s3_class(console_client, "Chat")
tools <- console_client$get_tools()
- expect_equal(names(tools), "querychat_query")
+ tool_names <- names(tools)
+ expect_contains(tool_names, "querychat_query")
})
it("persists console client across calls", {
@@ -782,7 +809,7 @@ describe("QueryChat$console()", {
expect_s3_class(console_client, "Chat")
tools <- console_client$get_tools()
- expect_setequal(
+ expect_contains(
names(tools),
c(
"querychat_query",
@@ -900,10 +927,9 @@ describe("QueryChat deferred client with $server()", {
)
})
- it("$server(data_source=...) sets the data_source", {
+ it("$server(data_source=...) errors without Shiny session", {
skip_if_no_dataframe_engine()
qc <- QueryChat$new(NULL, "users", greeting = "Test")
- expect_null(qc$data_source)
expect_error(
qc$server(data_source = new_users_df()),
@@ -911,3 +937,105 @@ describe("QueryChat deferred client with $server()", {
)
})
})
+
+describe("QueryChat$add_tables()", {
+ local_multi_table_conn <- function(env = parent.frame()) {
+ skip_if_not_installed("RSQLite")
+ conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+ withr::defer(DBI::dbDisconnect(conn), envir = env)
+ DBI::dbWriteTable(
+ conn,
+ "orders",
+ data.frame(id = 1:2, amount = c(9.99, 4.50))
+ )
+ DBI::dbWriteTable(
+ conn,
+ "customers",
+ data.frame(id = 1:2, name = c("Alice", "Bob"))
+ )
+ conn
+ }
+
+ it("auto-discovery registers all tables", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ suppressWarnings(qc$add_tables(conn))
+ expect_setequal(qc$table_names(), c("orders", "customers"))
+ })
+
+ it("explicit tables registers only those", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ qc$add_tables(conn, tables = "orders")
+ expect_equal(qc$table_names(), "orders")
+ })
+
+ it("nonexistent table name raises error", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ expect_error(
+ qc$add_tables(conn, tables = "nonexistent"),
+ "not found"
+ )
+ })
+
+ it("duplicate without replace raises error", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ qc$add_tables(conn, tables = "orders")
+ expect_error(
+ qc$add_tables(conn, tables = "orders"),
+ "already exists"
+ )
+ })
+
+ it("replace = TRUE on existing table succeeds", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ qc$add_tables(conn, tables = "orders")
+ expect_no_error(qc$add_tables(conn, tables = "orders", replace = TRUE))
+ expect_true("orders" %in% qc$table_names())
+ })
+
+ it("non-DBI argument raises error", {
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ expect_error(
+ qc$add_tables(new_test_df()),
+ "DBIConnection"
+ )
+ })
+
+ it("empty tables vector raises error", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ expect_error(
+ qc$add_tables(conn, tables = character(0)),
+ "No tables found"
+ )
+ })
+
+ it("calling after server initialization raises error", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ qc$.__enclos_env__$private$.server_initialized <- TRUE
+ expect_error(
+ qc$add_tables(conn),
+ "after server initialization"
+ )
+ })
+
+ it("system prompt built exactly once for multiple tables", {
+ conn <- local_multi_table_conn()
+ qc <- QueryChat$new(NULL, "placeholder", greeting = "Test")
+ warns <- character(0)
+ withCallingHandlers(
+ qc$add_tables(conn),
+ warning = function(w) {
+ warns <<- c(warns, conditionMessage(w))
+ invokeRestart("muffleWarning")
+ }
+ )
+ multi_table_warns <- warns[grepl("Multiple tables", warns)]
+ expect_length(multi_table_warns, 1L)
+ })
+})
diff --git a/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R b/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R
index 5bf3e88e5..59894650f 100644
--- a/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R
+++ b/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R
@@ -8,12 +8,11 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template: {{schema}}",
- data_source = ds
+ data_sources = list(test_table = ds)
)
expect_type(sp$template, "character")
expect_true(grepl("Template:", sp$template))
- expect_type(sp$schema, "character")
expect_equal(sp$categorical_threshold, 10)
})
@@ -27,7 +26,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = temp_file,
- data_source = ds
+ data_sources = list(test_table = ds)
)
expect_type(sp$template, "character")
@@ -41,7 +40,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds,
+ data_sources = list(test_table = ds),
data_description = "Test data description"
)
@@ -58,7 +57,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds,
+ data_sources = list(test_table = ds),
data_description = temp_file
)
@@ -72,7 +71,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds,
+ data_sources = list(test_table = ds),
extra_instructions = "Extra instructions here"
)
@@ -89,7 +88,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds,
+ data_sources = list(test_table = ds),
extra_instructions = temp_file
)
@@ -103,7 +102,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds,
+ data_sources = list(test_table = ds),
categorical_threshold = 25
)
@@ -117,7 +116,7 @@ describe("QueryChatSystemPrompt$new()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Template",
- data_source = ds
+ data_sources = list(test_table = ds)
)
expect_null(sp$data_description)
@@ -140,7 +139,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(c("update", "query"))
@@ -164,7 +163,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render("query")
@@ -188,7 +187,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render("update")
@@ -213,7 +212,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -231,7 +230,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Schema: {{schema}}",
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -247,7 +246,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Database: {{db_type}}",
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -268,7 +267,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds,
+ data_sources = list(test_table = ds),
data_description = "My test data"
)
@@ -290,7 +289,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -311,7 +310,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds,
+ data_sources = list(test_table = ds),
extra_instructions = "Be concise"
)
@@ -333,7 +332,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -351,7 +350,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = template,
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -367,7 +366,7 @@ describe("QueryChatSystemPrompt$render()", {
sp <- QueryChatSystemPrompt$new(
prompt_template = "Simple template",
- data_source = ds
+ data_sources = list(test_table = ds)
)
result <- sp$render(NULL)
@@ -389,7 +388,7 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
"prompt.md",
package = "querychat"
),
- data_source = ds,
+ data_sources = list(test_table = ds),
data_description = "A test dataframe"
)
prompt <- sp$render(NULL)
@@ -397,7 +396,7 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
expect_type(prompt, "character")
expect_true(nchar(prompt) > 0)
expect_match(prompt, "A test dataframe")
- expect_match(prompt, "Table: test_table")
+ expect_match(prompt, "test_table")
})
it("includes DuckDB-specific content for DuckDB sources", {
@@ -411,7 +410,7 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
"prompt.md",
package = "querychat"
),
- data_source = ds
+ data_sources = list(test_table = ds)
)
sys_prompt <- sp$render(NULL)
@@ -419,8 +418,7 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
expect_true(grepl("DuckDB SQL Tips", sys_prompt, fixed = TRUE))
})
- it("handles categorical_threshold with full template", {
- # Create a source with categorical data
+ it("stores categorical_threshold for on-demand schema", {
df_with_categories <- data.frame(
id = 1:10,
category = rep(c("A", "B", "C", "D", "E"), each = 2)
@@ -428,31 +426,18 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
cat_source <- DataFrameSource$new(df_with_categories, "test_table")
withr::defer(cat_source$cleanup())
- # With low threshold, categories should not be listed
- sp_low <- QueryChatSystemPrompt$new(
+ sp <- QueryChatSystemPrompt$new(
prompt_template = system.file(
"prompts",
"prompt.md",
package = "querychat"
),
- data_source = cat_source,
+ data_sources = list(test_table = cat_source),
categorical_threshold = 3
)
- prompt_low <- sp_low$render(NULL)
- expect_false(grepl("Categorical values:", prompt_low))
-
- # With high threshold, categories should be listed
- sp_high <- QueryChatSystemPrompt$new(
- prompt_template = system.file(
- "prompts",
- "prompt.md",
- package = "querychat"
- ),
- data_source = cat_source,
- categorical_threshold = 10
- )
- prompt_high <- sp_high$render(NULL)
- expect_match(prompt_high, "Categorical values:")
+ expect_equal(sp$categorical_threshold, 3)
+ prompt <- sp$render(NULL)
+ expect_match(prompt, "querychat_get_schema")
})
})
@@ -467,7 +452,7 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("query", "visualize"))
expect_match(rendered, "querychat_visualize")
@@ -479,7 +464,7 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("query"))
expect_no_match(rendered, "querychat_visualize")
@@ -489,7 +474,7 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("query"))
expect_match(rendered, "visualization is not currently enabled")
@@ -499,7 +484,7 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("visualize"))
expect_match(rendered, "ggsql-syntax-reference")
@@ -516,7 +501,7 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("query", "visualize"))
expect_match(rendered, "Avoid redundant expanded results")
@@ -526,66 +511,9 @@ describe("viz prompt conditionals", {
ds <- local_data_frame_source(new_test_df())
sp <- QueryChatSystemPrompt$new(
prompt_template = default_prompt(),
- data_source = ds
+ data_sources = list(test_table = ds)
)
rendered <- sp$render(tools = c("visualize"))
expect_no_match(rendered, "Avoid redundant expanded results")
})
})
-
-describe("Schema inference skip", {
- skip_if_no_dataframe_engine()
-
- it("skips schema when template doesn't reference {{schema}}", {
- df <- new_test_df()
- ds <- DataFrameSource$new(df, "test_table")
- withr::defer(ds$cleanup())
-
- sp <- QueryChatSystemPrompt$new(
- prompt_template = "No schema here: {{db_type}}",
- data_source = ds
- )
-
- expect_equal(sp$schema, "")
- })
-
- it("computes schema when template uses {{schema}}", {
- df <- new_test_df()
- ds <- DataFrameSource$new(df, "test_table")
- withr::defer(ds$cleanup())
-
- sp <- QueryChatSystemPrompt$new(
- prompt_template = "Schema: {{schema}}",
- data_source = ds
- )
-
- expect_true(nchar(sp$schema) > 0)
- expect_match(sp$schema, "test_table")
- })
-
- it("computes schema for {{{schema}}} triple braces", {
- df <- new_test_df()
- ds <- DataFrameSource$new(df, "test_table")
- withr::defer(ds$cleanup())
-
- sp <- QueryChatSystemPrompt$new(
- prompt_template = "Schema: {{{schema}}}",
- data_source = ds
- )
-
- expect_true(nchar(sp$schema) > 0)
- })
-
- it("computes schema for {{#schema}} conditional sections", {
- df <- new_test_df()
- ds <- DataFrameSource$new(df, "test_table")
- withr::defer(ds$cleanup())
-
- sp <- QueryChatSystemPrompt$new(
- prompt_template = "{{#schema}}Has schema{{/schema}}",
- data_source = ds
- )
-
- expect_true(nchar(sp$schema) > 0)
- })
-})
diff --git a/pkg-r/tests/testthat/test-QueryExecutor.R b/pkg-r/tests/testthat/test-QueryExecutor.R
new file mode 100644
index 000000000..ca922c74f
--- /dev/null
+++ b/pkg-r/tests/testthat/test-QueryExecutor.R
@@ -0,0 +1,188 @@
+describe("DataSourceExecutor", {
+ skip_if_not_installed("duckdb")
+
+ users_source <- local_data_frame_source(new_users_df(), "users")
+ sources <- list(users = users_source)
+ executor <- DataSourceExecutor$new(sources)
+
+ it("delegates execute_query() to primary source", {
+ result <- executor$execute_query("SELECT * FROM users WHERE age > 28")
+ expect_s3_class(result, "data.frame")
+ expect_equal(nrow(result), 3)
+ })
+
+ it("delegates test_query() to the named source", {
+ result <- executor$test_query("SELECT * FROM users", "users")
+ expect_s3_class(result, "data.frame")
+ expect_equal(nrow(result), 1)
+ })
+
+ it("returns correct get_db_type()", {
+ expect_equal(executor$get_db_type(), "DuckDB")
+ })
+
+ it("gets schema for a named table", {
+ schema <- executor$get_schema("users", categorical_threshold = 20)
+ expect_type(schema, "character")
+ expect_match(schema, "Table: users")
+ expect_match(schema, "id")
+ expect_match(schema, "name")
+ expect_match(schema, "age")
+ })
+})
+
+describe("DuckDBExecutor", {
+ skip_if_not_installed("duckdb")
+
+ it("registers multiple data frames for cross-table JOINs", {
+ users <- new_users_df()
+ scores <- data.frame(
+ id = 1:5,
+ score = c(90, 85, 92, 78, 88),
+ stringsAsFactors = FALSE
+ )
+ dataframes <- list(users = users, scores = scores)
+ executor <- DuckDBExecutor$new(dataframes)
+ withr::defer(executor$cleanup())
+
+ result <- executor$execute_query(
+ "SELECT u.name, s.score FROM users u JOIN scores s ON u.id = s.id"
+ )
+ expect_s3_class(result, "data.frame")
+ expect_equal(nrow(result), 5)
+ expect_true("name" %in% names(result))
+ expect_true("score" %in% names(result))
+ })
+
+ it("enforces require_all_columns per table in test_query()", {
+ users <- new_users_df()
+ dataframes <- list(users = users)
+ executor <- DuckDBExecutor$new(dataframes)
+ withr::defer(executor$cleanup())
+
+ # Query that drops a column should fail with require_all_columns = TRUE
+ expect_error(
+ executor$test_query(
+ "SELECT id, name FROM users",
+ "users",
+ require_all_columns = TRUE
+ ),
+ class = "querychat_missing_columns_error"
+ )
+
+ # Full select should pass
+ expect_no_error(
+ executor$test_query(
+ "SELECT * FROM users",
+ "users",
+ require_all_columns = TRUE
+ )
+ )
+ })
+
+ it("locks down the connection (DDL like CREATE TABLE should fail)", {
+ users <- new_users_df()
+ dataframes <- list(users = users)
+ executor <- DuckDBExecutor$new(dataframes)
+ withr::defer(executor$cleanup())
+
+ expect_error(
+ executor$execute_query("CREATE TABLE new_table (id INTEGER)")
+ )
+ })
+
+ it("returns correct get_db_type()", {
+ executor <- DuckDBExecutor$new(list(users = new_users_df()))
+ withr::defer(executor$cleanup())
+
+ expect_equal(executor$get_db_type(), "DuckDB")
+ })
+
+ it("gets schema for a named table", {
+ executor <- DuckDBExecutor$new(list(users = new_users_df()))
+ withr::defer(executor$cleanup())
+
+ schema <- executor$get_schema("users", categorical_threshold = 20)
+ expect_type(schema, "character")
+ expect_match(schema, "Table: users")
+ expect_match(schema, "id")
+ expect_match(schema, "name")
+ expect_match(schema, "age")
+ })
+})
+
+describe("build_query_executor()", {
+ skip_if_not_installed("duckdb")
+
+ it("returns DataSourceExecutor for a single table", {
+ sources <- list(users = local_data_frame_source(new_users_df(), "users"))
+ executor <- build_query_executor(sources)
+
+ expect_s3_class(executor, "DataSourceExecutor")
+ expect_s3_class(executor, "QueryExecutor")
+ })
+
+ it("returns DuckDBExecutor for multiple DataFrameSources", {
+ sources <- list(
+ users = local_data_frame_source(new_users_df(), "users"),
+ test = local_data_frame_source(new_test_df(), "test")
+ )
+ executor <- build_query_executor(sources)
+ withr::defer(executor$cleanup())
+
+ expect_s3_class(executor, "DuckDBExecutor")
+ expect_s3_class(executor, "QueryExecutor")
+ })
+
+ it("returns DataSourceExecutor for multiple DBISources sharing same connection", {
+ skip_if_not_installed("RSQLite")
+
+ conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+ withr::defer(DBI::dbDisconnect(conn))
+
+ DBI::dbWriteTable(conn, "users", new_users_df())
+ DBI::dbWriteTable(conn, "test_table", new_test_df())
+
+ sources <- list(
+ users = DBISource$new(conn, "users"),
+ test_table = DBISource$new(conn, "test_table")
+ )
+ executor <- build_query_executor(sources)
+
+ expect_s3_class(executor, "DataSourceExecutor")
+ expect_s3_class(executor, "QueryExecutor")
+ })
+})
+
+describe("check_source_compatibility()", {
+ skip_if_not_installed("duckdb")
+
+ it("accepts compatible DataFrameSources", {
+ source1 <- local_data_frame_source(new_users_df(), "users")
+ source2 <- local_data_frame_source(new_test_df(), "test")
+
+ existing <- list(users = source1)
+ expect_no_error(check_source_compatibility(existing, source2, "test"))
+ })
+
+ it("accepts an empty existing list (first table)", {
+ source1 <- local_data_frame_source(new_users_df(), "users")
+ expect_no_error(check_source_compatibility(list(), source1, "users"))
+ })
+
+ it("rejects mixed source types (DataFrameSource + DBISource)", {
+ skip_if_not_installed("RSQLite")
+
+ df_source <- local_data_frame_source(new_users_df(), "users")
+
+ conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+ withr::defer(DBI::dbDisconnect(conn))
+ DBI::dbWriteTable(conn, "test_table", new_test_df())
+ dbi_source <- DBISource$new(conn, "test_table")
+
+ existing <- list(users = df_source)
+ expect_error(
+ check_source_compatibility(existing, dbi_source, "test_table")
+ )
+ })
+})
diff --git a/pkg-r/tests/testthat/test-querychat_module.R b/pkg-r/tests/testthat/test-querychat_module.R
index 99ae477f9..040db90f5 100644
--- a/pkg-r/tests/testthat/test-querychat_module.R
+++ b/pkg-r/tests/testthat/test-querychat_module.R
@@ -17,10 +17,125 @@ test_that("Shiny app example loads without errors", {
})
})
+test_that("mod_server() return includes table() and table_names() for single-table", {
+ skip_if_no_dataframe_engine()
+
+ ds <- local_data_frame_source(new_test_df())
+ executor <- build_query_executor(list(test_table = ds))
+ withr::defer(executor$cleanup())
+
+ client_factory <- function(...) structure(list(), class = "MockChat")
+
+ shiny::testServer(
+ mod_server,
+ args = list(
+ id = "test",
+ data_sources = list(test_table = ds),
+ executor = executor,
+ greeting = "Hello",
+ client = client_factory,
+ tools = "query",
+ enable_bookmarking = FALSE
+ ),
+ {
+ # table_names_fn() returns the table name vector
+ expect_equal(table_names_fn(), "test_table")
+
+ # table_fn() returns a TableAccessor backed by reactive state
+ acc <- table_fn("test_table")
+ expect_true(inherits(acc, "TableAccessor"))
+ expect_equal(acc$table_name, "test_table")
+
+ # TableAccessor$df() works (returns the full data frame when no filter set)
+ df_result <- shiny::isolate(acc$df())
+ expect_equal(nrow(df_result), 5L)
+
+ # Single-table backward compat: first$df/sql/title are still in the return
+ first_state <- tables[[1]]
+ expect_true(is.function(first_state$df))
+ expect_true(is.function(first_state$sql))
+ expect_true(is.function(first_state$title))
+
+ # Verify the returned list exposes table() and table_names()
+ expect_true(is.function(session$returned$table))
+ expect_true(is.function(session$returned$table_names))
+ acc <- session$returned$table("test_table")
+ expect_s3_class(acc, "TableAccessor")
+ expect_equal(session$returned$table_names(), "test_table")
+
+ # Verify backward-compat reactive accessors on the returned list
+ expect_true(is.function(session$returned$df))
+ expect_true(is.function(session$returned$sql))
+ expect_true(is.function(session$returned$title))
+ }
+ )
+})
+
+test_that("mod_server() return includes table() and table_names() for multi-table", {
+ skip_if_no_dataframe_engine()
+
+ ds1 <- local_data_frame_source(new_test_df(), table_name = "tbl_a")
+ ds2 <- local_data_frame_source(new_test_df(), table_name = "tbl_b")
+ data_sources <- list(tbl_a = ds1, tbl_b = ds2)
+ executor <- build_query_executor(data_sources)
+ withr::defer(executor$cleanup())
+
+ result <- NULL
+ client_factory <- function(...) {
+ result <<- "client_called"
+ structure(list(), class = "MockChat")
+ }
+
+ shiny::testServer(
+ mod_server,
+ args = list(
+ id = "test",
+ data_sources = data_sources,
+ executor = executor,
+ greeting = "Hello",
+ client = client_factory,
+ tools = "query",
+ enable_bookmarking = FALSE
+ ),
+ {
+ # table_names_fn() returns all registered table names
+ expect_equal(table_names_fn(), c("tbl_a", "tbl_b"))
+
+ # table_fn() returns a TableAccessor for each table
+ acc_a <- table_fn("tbl_a")
+ expect_true(inherits(acc_a, "TableAccessor"))
+ expect_equal(acc_a$table_name, "tbl_a")
+
+ acc_b <- table_fn("tbl_b")
+ expect_true(inherits(acc_b, "TableAccessor"))
+ expect_equal(acc_b$table_name, "tbl_b")
+
+ # table_fn() errors for unknown names
+ expect_error(table_fn("nonexistent"), class = "rlang_error")
+
+ # Multi-table: single_table_error functions mention qc_vals$table()
+ single_err <- single_table_error("sql")
+ expect_error(single_err(), regexp = "qc_vals\\$table")
+
+ # Verify the returned list exposes table() and table_names()
+ expect_true(is.function(session$returned$table))
+ expect_true(is.function(session$returned$table_names))
+ acc <- session$returned$table("tbl_a")
+ expect_s3_class(acc, "TableAccessor")
+ expect_equal(sort(session$returned$table_names()), c("tbl_a", "tbl_b"))
+
+ # Verify error is surfaced through the public API
+ expect_error(session$returned$table("nonexistent"), "not found")
+ }
+ )
+})
+
test_that("mod_server() passes visualize callback and tools to client factory", {
skip_if_no_dataframe_engine()
ds <- local_data_frame_source(new_test_df())
+ executor <- build_query_executor(list(test_table = ds))
+ withr::defer(executor$cleanup())
captured <- NULL
client_factory <- function(...) {
@@ -32,7 +147,8 @@ test_that("mod_server() passes visualize callback and tools to client factory",
mod_server,
args = list(
id = "test",
- data_source = ds,
+ data_sources = list(test_table = ds),
+ executor = executor,
greeting = "Hello",
client = client_factory,
tools = c("query", "visualize"),
@@ -48,6 +164,80 @@ test_that("mod_server() passes visualize callback and tools to client factory",
)
})
+test_that("mod_server() exposes current_table() starting as NULL", {
+ skip_if_no_dataframe_engine()
+
+ ds <- local_data_frame_source(new_test_df())
+ executor <- build_query_executor(list(test_table = ds))
+ withr::defer(executor$cleanup())
+
+ client_factory <- function(...) structure(list(), class = "MockChat")
+
+ shiny::testServer(
+ mod_server,
+ args = list(
+ id = "test",
+ data_sources = list(test_table = ds),
+ executor = executor,
+ greeting = "Hello",
+ client = client_factory,
+ tools = "query",
+ enable_bookmarking = FALSE
+ ),
+ {
+ expect_true(is.function(session$returned$current_table))
+ expect_null(shiny::isolate(session$returned$current_table()))
+ }
+ )
+})
+
+test_that("mod_server() current_table() updates on update_dashboard and reset_query", {
+ skip_if_no_dataframe_engine()
+
+ ds1 <- local_data_frame_source(new_test_df(), table_name = "tbl_a")
+ ds2 <- local_data_frame_source(new_test_df(), table_name = "tbl_b")
+ data_sources <- list(tbl_a = ds1, tbl_b = ds2)
+ executor <- build_query_executor(data_sources)
+ withr::defer(executor$cleanup())
+
+ captured_callbacks <- NULL
+ client_factory <- function(...) {
+ captured_callbacks <<- list(...)
+ structure(list(), class = "MockChat")
+ }
+
+ shiny::testServer(
+ mod_server,
+ args = list(
+ id = "test",
+ data_sources = data_sources,
+ executor = executor,
+ greeting = "Hello",
+ client = client_factory,
+ tools = "query",
+ enable_bookmarking = FALSE
+ ),
+ {
+ # Initially NULL
+ expect_null(shiny::isolate(session$returned$current_table()))
+
+ # update_dashboard sets it
+ shiny::isolate(
+ captured_callbacks$update_dashboard(
+ query = "SELECT * FROM tbl_a",
+ title = "All of tbl_a",
+ table = "tbl_a"
+ )
+ )
+ expect_equal(shiny::isolate(session$returned$current_table()), "tbl_a")
+
+ # reset_dashboard also sets it
+ shiny::isolate(captured_callbacks$reset_dashboard("tbl_b"))
+ expect_equal(shiny::isolate(session$returned$current_table()), "tbl_b")
+ }
+ )
+})
+
test_that("mod_ui() passes allow_attachments = TRUE to shinychat by default", {
captured <- NULL
local_mocked_bindings(
@@ -78,6 +268,8 @@ test_that("restored viz widgets survive a second bookmark cycle", {
skip_if_no_dataframe_engine()
ds <- local_data_frame_source(new_test_df())
+ executor <- build_query_executor(list(test_table = ds))
+ withr::defer(executor$cleanup())
callbacks <- NULL
bookmark_fn <- NULL
restore_fn <- NULL
@@ -89,7 +281,7 @@ test_that("restored viz widgets survive a second bookmark cycle", {
}
local_mocked_bindings(
- chat_restore = function(id, chat, session) {},
+ chat_restore = function(id, chat, ..., session) {},
.package = "shinychat"
)
local_mocked_bindings(
@@ -102,9 +294,9 @@ test_that("restored viz widgets survive a second bookmark cycle", {
.package = "shiny"
)
local_mocked_bindings(
- restore_viz_widgets = function(data_source, saved_widgets, session) {
+ restore_viz_widgets = function(executor, saved_widgets, session) {
restored_args <<- list(
- data_source = data_source,
+ executor = executor,
saved_widgets = saved_widgets,
session = session
)
@@ -117,7 +309,8 @@ test_that("restored viz widgets survive a second bookmark cycle", {
mod_server,
args = list(
id = "test",
- data_source = ds,
+ data_sources = list(test_table = ds),
+ executor = executor,
greeting = "Hello",
client = client_factory,
tools = c("query", "visualize"),
@@ -145,7 +338,7 @@ test_that("restored viz widgets survive a second bookmark cycle", {
restore_state <- new.env(parent = emptyenv())
restore_state$values <- first_state$values
shiny::isolate(restore_fn(restore_state))
- expect_identical(restored_args$data_source, ds)
+ expect_true(inherits(restored_args$executor, "QueryExecutor"))
expect_equal(restored_args$saved_widgets, saved)
second_state <- new.env(parent = emptyenv())
diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R
index 46bb2b22d..893c08dac 100644
--- a/pkg-r/tests/testthat/test-querychat_tools.R
+++ b/pkg-r/tests/testthat/test-querychat_tools.R
@@ -1,22 +1,35 @@
-test_that("tool_update_dashboard() checks inputs", {
- skip_if_no_dataframe_engine()
+local_executor <- function(df_source, env = parent.frame()) {
+ executor <- build_query_executor(list(test_table = df_source))
+ withr::defer(executor$cleanup(), envir = env)
+ executor
+}
- expect_snapshot(error = TRUE, tool_update_dashboard("foo"))
+test_that("tool_update_dashboard() checks update_fn inputs", {
+ skip_if_no_dataframe_engine()
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
+
expect_snapshot(error = TRUE, {
- tool_update_dashboard(df_source, update_fn = NULL)
- tool_update_dashboard(df_source, update_fn = function(query) {})
- tool_update_dashboard(df_source, update_fn = function(title, extra) {})
+ tool_update_dashboard(executor, "test_table", update_fn = NULL)
+ tool_update_dashboard(
+ executor,
+ "test_table",
+ update_fn = function(query) {}
+ )
+ tool_update_dashboard(
+ executor,
+ "test_table",
+ update_fn = function(title, extra) {}
+ )
})
})
test_that("tool_reset_dashboard() checks inputs", {
- expect_snapshot(error = TRUE, tool_reset_dashboard("not_a_function"))
-})
-
-test_that("tool_query() checks inputs", {
- expect_snapshot(error = TRUE, tool_query("invalid_source"))
+ expect_snapshot(
+ error = TRUE,
+ tool_reset_dashboard("not_a_function", table_names = "t")
+ )
})
describe("querychat_tool_starts_open()", {
@@ -94,9 +107,10 @@ describe("querychat_tool_result()", {
it("returns successful result for valid query action", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table WHERE id = 1",
action = "query"
)
@@ -109,12 +123,14 @@ describe("querychat_tool_result()", {
it("returns successful result for valid update action", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table WHERE value > 20",
title = "High values",
- action = "update"
+ action = "update",
+ table_name = "test_table"
)
expect_s7_class(result, ellmer::ContentToolResult)
@@ -127,9 +143,10 @@ describe("querychat_tool_result()", {
it("returns successful result for reset action", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = NULL,
action = "reset"
)
@@ -141,9 +158,10 @@ describe("querychat_tool_result()", {
it("handles query errors appropriately", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM nonexistent_table",
action = "query"
)
@@ -155,11 +173,13 @@ describe("querychat_tool_result()", {
it("handles update errors appropriately", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "INVALID SQL",
- action = "update"
+ action = "update",
+ table_name = "test_table"
)
expect_s7_class(result, ellmer::ContentToolResult)
@@ -169,9 +189,10 @@ describe("querychat_tool_result()", {
it("formats query results with details block", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table LIMIT 1",
action = "query"
)
@@ -185,12 +206,14 @@ describe("querychat_tool_result()", {
it("formats update results with button HTML", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table",
title = "Test Filter",
- action = "update"
+ action = "update",
+ table_name = "test_table"
)
markdown <- result@extra$display$markdown
@@ -204,9 +227,10 @@ describe("querychat_tool_result()", {
it("formats reset results with button HTML", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = NULL,
action = "reset"
)
@@ -218,12 +242,14 @@ describe("querychat_tool_result()", {
it("includes title in extra display metadata for update action", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table",
title = "Custom Title",
- action = "update"
+ action = "update",
+ table_name = "test_table"
)
expect_equal(result@extra$display$title, "Custom Title")
@@ -231,9 +257,10 @@ describe("querychat_tool_result()", {
it("does not include title for query action", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table",
title = "Should be ignored",
action = "query"
@@ -244,17 +271,18 @@ describe("querychat_tool_result()", {
it("sets open state based on action and tool details option", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
withr::local_options(querychat.tool_details = NULL)
query_result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table",
action = "query"
)
expect_false(query_result@extra$display$open)
reset_result <- querychat_tool_result(
- df_source,
+ executor,
query = NULL,
action = "reset"
)
@@ -263,9 +291,10 @@ describe("querychat_tool_result()", {
it("shows request on error", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "INVALID SQL",
action = "query"
)
@@ -275,9 +304,10 @@ describe("querychat_tool_result()", {
it("hides request on success", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
result <- querychat_tool_result(
- df_source,
+ executor,
query = "SELECT * FROM test_table",
action = "query"
)
@@ -291,7 +321,8 @@ describe("tool_query()", {
it("returns an ellmer tool object", {
df_source <- local_data_frame_source(new_test_df())
- tool <- tool_query(df_source)
+ executor <- local_executor(df_source)
+ tool <- tool_query(executor)
expect_s3_class(tool, "ellmer::ToolDef")
expect_equal(tool@name, "querychat_query")
@@ -299,7 +330,8 @@ describe("tool_query()", {
it("includes database type in description", {
df_source <- local_data_frame_source(new_test_df())
- tool <- tool_query(df_source)
+ executor <- local_executor(df_source)
+ tool <- tool_query(executor)
# DataFrameSource uses DuckDB
expect_match(tool@description, "DuckDB|duckdb", ignore.case = TRUE)
@@ -307,7 +339,8 @@ describe("tool_query()", {
it("creates a working tool function", {
df_source <- local_data_frame_source(new_test_df())
- tool <- tool_query(df_source)
+ executor <- local_executor(df_source)
+ tool <- tool_query(executor)
# Execute the tool function
result <- tool(query = "SELECT * FROM test_table LIMIT 1")
@@ -322,8 +355,13 @@ describe("tool_update_dashboard()", {
it("returns an ellmer tool object", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
- tool <- tool_update_dashboard(df_source)
+ tool <- tool_update_dashboard(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {}
+ )
expect_s3_class(tool, "ellmer::ToolDef")
expect_equal(tool@name, "querychat_update_dashboard")
@@ -331,8 +369,13 @@ describe("tool_update_dashboard()", {
it("includes database type in description", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
- tool <- tool_update_dashboard(df_source)
+ tool <- tool_update_dashboard(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {}
+ )
# DataFrameSource uses DuckDB
expect_match(tool@description, "DuckDB|duckdb", ignore.case = TRUE)
@@ -340,40 +383,47 @@ describe("tool_update_dashboard()", {
it("creates a working tool function", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
res_update <- NULL
- tool <- tool_update_dashboard(df_source, function(query, title) {
- res_update <<- list(query = query, title = title)
- })
+ tool <- tool_update_dashboard(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {
+ res_update <<- list(query = query, title = title, table = table)
+ }
+ )
res_tool <- tool(
query = "SELECT * FROM test_table WHERE id > 2",
- title = "Filtered View"
+ title = "Filtered View",
+ table = "test_table"
)
expect_s7_class(res_tool, ellmer::ContentToolResult)
expect_equal(res_update$query, "SELECT * FROM test_table WHERE id > 2")
expect_equal(res_update$title, "Filtered View")
+ expect_equal(res_update$table, "test_table")
})
})
describe("tool_reset_dashboard()", {
- reset_fn <- function() {
- "Reset executed"
- }
-
it("returns an ellmer tool object", {
- tool <- tool_reset_dashboard(reset_fn)
+ tool <- tool_reset_dashboard(
+ reset_fn = function(table) {},
+ table_names = c("test_table")
+ )
expect_s3_class(tool, "ellmer::ToolDef")
expect_equal(tool@name, "querychat_reset_dashboard")
})
it("uses the provided reset function", {
- tool <- tool_reset_dashboard(reset_fn)
+ reset_fn <- function(table) paste("Reset executed for", table)
+ tool <- tool_reset_dashboard(reset_fn, table_names = c("test_table"))
expect_s3_class(tool, "ellmer::ToolDef")
- expect_equal(tool(), "Reset executed")
+ expect_equal(tool("test_table"), "Reset executed for test_table")
})
})
@@ -382,43 +432,59 @@ describe("tool_update_dashboard_impl()", {
it("returns a function", {
df_source <- local_data_frame_source(new_test_df())
- current_query <- shiny::reactiveVal("SELECT * FROM test_table")
- current_title <- shiny::reactiveVal("All Data")
+ executor <- local_executor(df_source)
- impl_fn <- tool_update_dashboard_impl(df_source)
+ impl_fn <- tool_update_dashboard_impl(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {}
+ )
expect_type(impl_fn, "closure")
})
it("updates reactive values on successful query", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
res_update <- NULL
- impl_fn <- tool_update_dashboard_impl(df_source, function(query, title) {
- res_update <<- list(query = query, title = title)
- })
+ impl_fn <- tool_update_dashboard_impl(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {
+ res_update <<- list(query = query, title = title, table = table)
+ }
+ )
res_tool <- impl_fn(
query = "SELECT * FROM test_table WHERE id < 3",
- title = "First Two"
+ title = "First Two",
+ table = "test_table"
)
expect_equal(res_update$query, "SELECT * FROM test_table WHERE id < 3")
expect_equal(res_update$title, "First Two")
+ expect_equal(res_update$table, "test_table")
expect_null(res_tool@error)
})
it("does not update reactive values on query error", {
df_source <- local_data_frame_source(new_test_df())
+ executor <- local_executor(df_source)
res_update <- NULL
- impl_fn <- tool_update_dashboard_impl(df_source, function(query, title) {
- res_update <<- list(query = query, title = title)
- })
+ impl_fn <- tool_update_dashboard_impl(
+ executor,
+ table_names = "test_table",
+ update_fn = function(query, title, table) {
+ res_update <<- list(query = query, title = title, table = table)
+ }
+ )
res_tool <- impl_fn(
query = "INVALID SQL",
- title = "Should Not Update"
+ title = "Should Not Update",
+ table = "test_table"
)
# `update_fn` was not called
@@ -427,3 +493,39 @@ describe("tool_update_dashboard_impl()", {
expect_s3_class(res_tool@error, class = "error")
})
})
+
+describe("get_schema_result_display()", {
+ it("returns a sentinel span with data-table attribute", {
+ result <- GetSchemaResult(
+ value = "Table: orders\nColumns:\n- id (INTEGER)",
+ table_name = "orders"
+ )
+ html <- get_schema_result_display(result)
+ html_str <- as.character(html)
+ expect_true(grepl("qc-schema-collector", html_str))
+ expect_true(grepl('data-table="orders"', html_str))
+ expect_true(
+ grepl("display:none", html_str) || grepl("display: none", html_str)
+ )
+ })
+
+ it("embeds schema text in data-schema attribute", {
+ schema <- "Table: orders\nColumns:\n- id (INTEGER)"
+ result <- GetSchemaResult(value = schema, table_name = "orders")
+ html <- get_schema_result_display(result)
+ html_str <- as.character(html)
+ expect_true(grepl("data-schema", html_str))
+ expect_true(grepl("orders", html_str))
+ })
+
+ it("includes querychat-schema-display HTML dependency", {
+ result <- GetSchemaResult(
+ value = "Table: t\nColumns:\n- x (TEXT)",
+ table_name = "t"
+ )
+ html <- get_schema_result_display(result)
+ deps <- htmltools::findDependencies(html)
+ dep_names <- vapply(deps, function(d) d$name, character(1))
+ expect_true("querychat-schema-display" %in% dep_names)
+ })
+})
diff --git a/pkg-r/tests/testthat/test-viz-tool.R b/pkg-r/tests/testthat/test-viz-tool.R
index ed5104541..89964ff0c 100644
--- a/pkg-r/tests/testthat/test-viz-tool.R
+++ b/pkg-r/tests/testthat/test-viz-tool.R
@@ -134,10 +134,12 @@ describe("tool_visualize_dashboard()", {
callback_data <<- data
}
)
- suppressWarnings(tool(
- ggsql = "SELECT * FROM test_table VISUALISE value AS x DRAW histogram",
- title = "Test"
- ))
+ suppressWarnings(
+ tool(
+ ggsql = "SELECT * FROM test_table VISUALISE value AS x DRAW histogram",
+ title = "Test"
+ )
+ )
expect_type(callback_data, "list")
expect_true(all(c("ggsql", "title", "widget_id") %in% names(callback_data)))
expect_identical(footer_data$dom_widget_id, footer_data[[3]])
@@ -365,10 +367,12 @@ describe("tool_visualize_dashboard()", {
update_fn = function(data) {}
)
- suppressWarnings(tool(
- ggsql = "SELECT * FROM test_table VISUALISE value AS x DRAW histogram",
- title = "Test"
- ))
+ suppressWarnings(
+ tool(
+ ggsql = "SELECT * FROM test_table VISUALISE value AS x DRAW histogram",
+ title = "Test"
+ )
+ )
expect_identical(
footer_data$dom_widget_id,
diff --git a/pkg-r/vignettes/build.Rmd b/pkg-r/vignettes/build.Rmd
index 860cea0f7..54ed3f5fa 100644
--- a/pkg-r/vignettes/build.Rmd
+++ b/pkg-r/vignettes/build.Rmd
@@ -15,23 +15,16 @@ knitr::opts_chunk$set(
)
```
-While `querychat_app()` provides a quick way to start exploring data, building bespoke Shiny apps with querychat unlocks the full power of integrating natural language data exploration with custom visualizations, layouts, and interactivity. This guide shows you how to integrate querychat into your own Shiny applications and leverage its reactive data outputs to create rich, interactive dashboards.
+While `querychat_app()` provides a quick way to start exploring data, building bespoke Shiny apps with querychat unlocks the full power of integrating natural language data exploration with custom visualizations, layouts, and interactivity. This guide shows you how to integrate querychat into your own Shiny applications and leverage its reactive data outputs to create rich, interactive experiences.
-querychat is a particularly good fit for Shiny apps that have:
-
-1. **A single data source** (or a set of related tables that can be joined)
-2. **Multiple filters** that let users slice and explore the data in different ways
-3. **Several visualizations and outputs** that all depend on the same filtered data
-
-In these apps, querychat can replace or augment your filtering UI by allowing users to describe what they want to see in natural language. Instead of building complex filter controls, users can simply ask questions like "show me customers from California who spent over $1000 last quarter" and querychat will generate the appropriate SQL query.
+querychat lets users ask questions of their data in plain language β filtering, sorting, summarizing, joining across tables, and creating visualizations β all without needing to write SQL or navigate complex filter UIs. You can use it as the primary exploration interface in a standalone app, or embed it alongside curated views in an existing dashboard to let users go deeper than the views you designed.
This is especially valuable when:
- Your data has many columns and building a UI for all possible filters would be overwhelming
- Users want to explore ad-hoc combinations of filters that you didn't anticipate
-- You want to make data exploration more accessible to users who aren't comfortable with traditional filtering UIs
-
-If you have an existing app with a reactive data frame that flows through multiple outputs, querychat can be a natural addition to provide an alternative way to filter that data.
+- You have multiple related tables that users may want to query and join
+- You want to make data exploration more accessible to non-technical users
## Starter template
@@ -419,18 +412,75 @@ This is equivalent to the user asking the LLM to "reset" or "show all data".
## Multiple tables
-Currently, you have two options for exploring multiple tables in querychat:
+querychat can work with multiple related tables in a single chat interface, letting users query across tables, join data, and filter any table independently. Register additional tables with `$add_table()` after creating the `QueryChat` instance, then access per-table state through the `$table()` method.
-1. Join the tables into a single table before passing to querychat
-2. Use multiple querychat instances in the same app
+### Registering tables
-The first option makes it possible to chat with multiple tables inside a single chat interface, whereas the second option requires a separate chat interface for each table.
+Pass the first table when creating `QueryChat`, then call `$add_table()` for each additional table:
-::: {.alert .alert-info}
-### Multiple filtered tables
+```{r}
+library(querychat)
-We do intend on supporting multiple filtered tables in a future release -- if you're interested in this feature, please upvote [the relevant issue](https://github.com/posit-dev/querychat/issues/6)
-:::
+qc <- QueryChat$new(orders, "orders")
+qc$add_table(customers, "customers")
+qc$add_table(products, "products")
+```
+
+The LLM can query any registered table and write joins across them. You can inspect which tables are registered with `qc$table_names()`.
+
+### Per-table reactive access
+
+When working with multiple tables, access filtered data and SQL for each table individually using `$table()`:
+
+```{r}
+server <- function(input, output, session) {
+ qc_vals <- qc$server()
+
+ output$orders_table <- renderDataTable({
+ qc_vals$table("orders")$df()
+ })
+
+ output$orders_sql <- renderText({
+ qc_vals$table("orders")$sql()
+ })
+
+ output$customers_table <- renderDataTable({
+ qc_vals$table("customers")$df()
+ })
+}
+```
+
+Each table has its own `$df()`, `$sql()`, and `$title()` reactives that update independently when the user filters that specific table.
+
+### Tracking the active table
+
+Use `$current_table()` to find out which table the LLM most recently queried. This is useful for auto-switching a tabbed UI to the relevant table:
+
+```{r}
+observe({
+ tbl <- qc_vals$current_table()
+ req(tbl)
+ nav_select("table_tabs", selected = tbl)
+})
+```
+
+### Data dictionary
+
+When working with multiple related tables, providing a [data dictionary](context.html#data-dictionary) is strongly recommended. It tells the LLM how tables relate to each other, which columns are keys, and what domain terms mean β all of which help it write accurate joins and queries.
+
+```{r}
+qc <- QueryChat$new(
+ orders, "orders",
+ data_dict = "data-dict.yaml"
+)
+qc$add_table(customers, "customers")
+```
+
+See [Provide context](context.html#data-dictionary) for the full data dictionary format.
+
+### Separate chat interfaces
+
+If your tables are truly independent (not related), you may prefer separate `QueryChat` instances, each with its own chat interface:
app.R
diff --git a/pkg-r/vignettes/context.Rmd b/pkg-r/vignettes/context.Rmd
index aac5efe8b..e66413cf5 100644
--- a/pkg-r/vignettes/context.Rmd
+++ b/pkg-r/vignettes/context.Rmd
@@ -15,70 +15,135 @@ knitr::opts_chunk$set(
)
```
-querychat automatically gathers information about your table to help the LLM write accurate SQL queries. This includes column names and types, numerical ranges, and categorical value examples. (All of this information is provided to the LLM as part of the **system prompt** -- a string of text containing instructions and context for the LLM to consider when responding to user queries.)
+querychat automatically gathers schema information about your tables β column names, types, numerical ranges, and categorical values β and makes it available to the LLM on demand via the `querychat_get_schema` [tool](tools.html#schema-retrieval). The LLM calls this tool before writing SQL to understand the structure of the tables it's querying.
Importantly, we are **not** sending your raw data to the LLM and asking it to do complicated math. The LLM only needs to understand the structure and schema of your data in order to write SQL queries.
-You can get even better results by customizing the system prompt in three ways:
+You can get even better results by providing additional context:
-1. Add a [data description](#data-description) to provide more context about what the data represents
-2. Add [custom instructions](#extra-instructions) to guide the LLM's behavior
-3. Use a fully [custom prompt template](#custom-template) if you want complete control (useful if you want to be certain the model cannot see any literal values from your data)
+1. Add a [data dictionary](#data-dictionary) to describe tables, columns, relationships, and domain terminology (recommended)
+2. Add a [data description](#data-description) for a simpler alternative when working with a single, straightforward table
+3. Add [custom instructions](#extra-instructions) to guide the LLM's behavior
+4. Use a fully [custom prompt template](#custom-template) if you want complete control
```{r}
library(querychat)
library(palmerpenguins)
```
-## Default prompt
+## Data dictionary {#data-dictionary}
-For full visibility into the system prompt that querychat generates for the LLM, you can inspect the `system_prompt` field. This is useful for debugging and understanding exactly what context the LLM is using:
+A **data dictionary** is a YAML file that describes your tables, columns, relationships, and domain-specific terminology. It's the recommended way to provide context, especially when working with [multiple tables](build.html#multiple-tables) or when your data has domain-specific meaning that isn't obvious from column names alone.
```{r}
-qc <- querychat(penguins)
-cat(qc$system_prompt)
+qc <- QueryChat$new(
+ orders, "orders",
+ data_dict = "data-dict.yaml"
+)
+qc$add_table(customers, "customers")
```
-By default, the system prompt contains the following components:
+### Format
+
+A data dictionary has three top-level sections: `tables`, `relationships`, and `glossary`.
+
+```yaml
+# data-dict.yaml
+version: "0.2.0"
+
+tables:
+ orders:
+ description: One row per customer order.
+ columns:
+ - name: order_id
+ type: number(id)
+ constraints: [primary_key]
+ description: Unique order identifier.
+ - name: customer_id
+ type: number(id)
+ constraints: [foreign_key]
+ description: References customers.id.
+ - name: total
+ type: number(quantity)
+ description: Order total in USD.
+ - name: status
+ type: enum
+ values: [pending, shipped, delivered, cancelled]
+ description: Current order status.
+
+ customers:
+ description: One row per customer.
+ columns:
+ - name: id
+ type: number(id)
+ constraints: [primary_key]
+ description: Unique customer identifier.
+ - name: name
+ type: string
+ description: Full name.
+ - name: region
+ type: string
+ description: Geographic sales region.
+
+relationships:
+ - description: Each order belongs to one customer.
+ cardinality: many-to-one
+ join: orders.customer_id = customers.id
+
+glossary:
+ AOV: Average order value β total revenue divided by number of orders.
+ churn: A customer who has not placed an order in the last 90 days.
+```
+
+#### Tables
+
+Each entry under `tables` describes one table. The key must match the table name you pass to `QueryChat$new()` or `$add_table()`.
+
+- **`description`**: What this table represents (one sentence is usually enough).
+- **`columns`**: A list of column annotations. Each column can have:
+ - `name`: Column name (must match the actual column)
+ - `type`: Semantic type hint β `string`, `number`, `number(id)`, `number(quantity)`, `date`, `enum`
+ - `constraints`: Optional list β `primary_key`, `foreign_key`
+ - `description`: What this column means in plain English
+ - `values`: For `enum` columns, the list of possible values
+
+Columns listed in the data dictionary are excluded from the auto-generated schema (since your description supersedes the auto-detected metadata). Columns not listed are still auto-detected as usual.
+
+#### Relationships
+
+The `relationships` section tells the LLM how to join tables. Each entry has:
+
+- `description`: A plain-English description of the relationship
+- `cardinality`: `one-to-one`, `one-to-many`, or `many-to-one`
+- `join`: The join condition (e.g., `orders.customer_id = customers.id`)
+
+#### Glossary
+
+The `glossary` section defines domain-specific terms that users might use in their questions. This helps the LLM translate business language into correct SQL.
-1. The basic set of behaviors and guidelines the LLM must follow in order for querychat to work properly, including how to use [tools](tools.html) to execute queries and update the app.
-2. The SQL schema of the data frame you provided. This includes:
- - Column names
- - Data types (integer, real, boolean, date/datetime, text)
- - For text columns with less than 10 unique values, we assume they are categorical variables and include the list of values
- - For integer and real columns, we include the range
-3. A [data description](#data-description) (if provided via `data_description`)
-4. [Additional instructions](#additional-instructions) you want to use to guide querychat's behavior (if provided via `extra_instructions`).
## Data description {#data-description}
-If your column names are descriptive, querychat may already work well without additional context. However, if your columns are named `x`, `V1`, `value`, etc., you should provide a data description. Use the `data_description` parameter for this:
+For simple single-table use cases where a full data dictionary would be overkill, you can provide a **data description** β a free-form markdown file or string that describes what the data represents. Use the `data_description` parameter:
```{r}
-qc <- querychat(
+qc <- QueryChat$new(
penguins,
data_description = "data_description.md"
)
-
-cat(qc$system_prompt)
```
-querychat doesn't need this information in any particular format -- just provide what a human would find helpful:
+querychat doesn't need this in any particular format β just provide what a human would find helpful:
```markdown
-This dataset contains information about Palmer Archipelago penguins,
-collected for studying penguin populations.
+This dataset contains information about Palmer Archipelago penguins.
- species: Penguin species (Adelie, Chinstrap, Gentoo)
- island: Island where observed (Torgersen, Biscoe, Dream)
- bill_length_mm: Bill length in millimeters
-- bill_depth_mm: Bill depth in millimeters
-- flipper_length_mm: Flipper length in millimeters
- body_mass_g: Body mass in grams
-- sex: Penguin sex (male, female)
-- year: Year of observation
```
## Additional instructions {#extra-instructions}
@@ -86,12 +151,10 @@ collected for studying penguin populations.
You can add custom instructions to guide the LLM's behavior using the `extra_instructions` parameter:
```{r}
-qc <- querychat(
+qc <- QueryChat$new(
penguins,
extra_instructions = "instructions.md"
)
-
-cat(qc$system_prompt)
```
Or as a string:
@@ -99,22 +162,29 @@ Or as a string:
```{r}
instructions <- "
- Use British spelling conventions
-- Stay on topic and only discuss the data dashboard
+- Stay on topic and only discuss the data
- Refuse to answer unrelated questions
"
-qc <- querychat(
+qc <- QueryChat$new(
penguins,
extra_instructions = instructions
)
-
-cat(qc$system_prompt)
```
::: {.alert .alert-warning}
LLMs may not always follow your instructions perfectly. Test extensively when changing instructions or models.
:::
+## Default prompt
+
+For full visibility into the system prompt that querychat generates for the LLM, you can inspect the `system_prompt` field. This is useful for debugging and understanding exactly what context the LLM is working with:
+
+```{r}
+qc <- QueryChat$new(penguins)
+cat(qc$system_prompt)
+```
+
## Custom template {#custom-template}
If you want more control over the system prompt, you can provide a custom prompt template using the `prompt_template` parameter. This is for more advanced users who want to fully customize the LLM's behavior. See the [QueryChat reference](../reference/QueryChat.html) for details on the available template variables.
diff --git a/pkg-r/vignettes/tools.Rmd b/pkg-r/vignettes/tools.Rmd
index 373bac17c..c1a735aa3 100644
--- a/pkg-r/vignettes/tools.Rmd
+++ b/pkg-r/vignettes/tools.Rmd
@@ -19,17 +19,26 @@ querychat combines [tool calling](https://ellmer.tidyverse.org/articles/tool-cal
One important thing to understand generally about querychat's tools is they are R functions, and that execution happens on _your machine_, not on the LLM provider's side. In other words, the SQL queries generated by the LLM are executed locally in the R process running the app.
-querychat provides the LLM access to three tool groups:
+querychat provides the LLM access to four tool groups:
-1. **Data updating** - Filter and sort data (without sending results to the LLM).
-2. **Data analysis** - Calculate summaries and return results for interpretation by the LLM.
-3. **Data visualization** - Create charts inline in the chat.
+1. **Schema retrieval** - Fetch table structure before writing SQL.
+2. **Data updating** - Filter and sort data (without sending results to the LLM).
+3. **Data analysis** - Calculate summaries and return results for interpretation by the LLM.
+4. **Data visualization** - Create charts inline in the chat.
```{r}
library(querychat)
library(palmerpenguins)
```
+## Schema retrieval {#schema-retrieval}
+
+Before writing any SQL query, the LLM calls the `querychat_get_schema` tool to retrieve column names, types, value ranges, and descriptions for a specific table. This on-demand approach means the LLM only fetches schema for the tables it actually needs, keeping the system prompt lean and startup fast β especially when working with [multiple tables](build.html#multiple-tables) or large databases.
+
+If you've provided a [data dictionary](context.html#data-dictionary), the schema response includes your column descriptions and relationship information. Columns annotated in the data dictionary are excluded from the auto-generated schema metadata (since your description supersedes it).
+
+This tool is always registered and cannot be disabled.
+
## Data updating {#data-updating}
When a user asks to "Show me..." or "Filter to..." or "Sort by...", the LLM requests a call to the `update_dashboard` tool with an appropriate SQL query as input. An important constraint is that the query must return all original schema columns (typically using `SELECT *`). When called, querychat will both set a reactive value holding [the current SQL query](build.html#sql-query) and execute the query to get the result. The result of query then used to set a reactive value holding the [filtered/sorted data frame](build.html#filtered-data). Thanks to reactivity, this will automatically update any views depending on this data frame, such as the data table displayed in the UI.
@@ -71,7 +80,7 @@ This tool:
2. Renders the `VISUALISE` clause as an interactive chart
3. Displays the chart inline in the chat
-Unlike the data updating tools, visualization queries don't affect the dashboard filter.
+Unlike the data updating tools, visualization queries don't affect the active data filter.
They query the full dataset independently, and each call produces a new inline chart message in the chat.
The inline chart includes controls for fullscreen viewing, saving as PNG/SVG, and a "Show Query" toggle that reveals the underlying ggsql code.
@@ -107,6 +116,7 @@ If you'd like to better understand how the tools work and how the LLM is prompte
**Prompts:**
+- [`prompts/tool-get-schema.md`](https://github.com/posit-dev/querychat/blob/main/pkg-r/inst/prompts/tool-get-schema.md)
- [`prompts/tool-update-dashboard.md`](https://github.com/posit-dev/querychat/blob/main/pkg-r/inst/prompts/tool-update-dashboard.md)
- [`prompts/tool-reset-dashboard.md`](https://github.com/posit-dev/querychat/blob/main/pkg-r/inst/prompts/tool-reset-dashboard.md)
- [`prompts/tool-query.md`](https://github.com/posit-dev/querychat/blob/main/pkg-r/inst/prompts/tool-query.md)
diff --git a/pyproject.toml b/pyproject.toml
index 373c9804d..bec6902f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
"chevron",
"sqlalchemy>=2.0.0", # Using 2.0+ for improved type hints and API
"great-tables>=0.16.0",
+ "pyyaml",
]
classifiers = [
"Programming Language :: Python",