diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md index 7c730473..607da5f7 100644 --- a/pkg-py/CHANGELOG.md +++ b/pkg-py/CHANGELOG.md @@ -41,7 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Improvements -* Chat greetings now use shinychat's greeting API (requires shinychat >= 0.4.0). A provided `greeting` renders instantly when the app loads, and when no `greeting` is given one is generated on demand without being added to the conversation history. Generated greetings are now preserved across bookmark/restore. (#249) +* Chat greetings now use shinychat's greeting API (requires shinychat >= 0.4.0). A provided `greeting` renders instantly when the app loads, and when no `greeting` is given one is generated on demand — now **schema-aware**, so it can describe the data it's about to help you explore — without being added to the conversation history. Generated greetings are preserved across bookmark/restore. Tables passed to `QueryChat()` are described in the greeting automatically; opt additional tables in with `include_in_greeting=True` on `add_table()`/`add_tables()`, or fine-tune which tables and which template the greeting uses via `qc.greeter`. (#249, #261) * The system prompt is now lighter: full schema is no longer embedded upfront. Instead the LLM fetches per-table schema on demand via the new `querychat_get_schema` tool — and only when it needs to. When a `DataDict` is provided, the tool skips columns that already have descriptions, so the LLM only pays for what isn't already documented. (#195) * The query tool result card now starts collapsed by default. Users can still expand it to see the SQL query and results. Set `QUERYCHAT_TOOL_DETAILS=expanded` to restore the previous behavior. (#239) diff --git a/pkg-py/docs/greet.qmd b/pkg-py/docs/greet.qmd index 65636af9..7b0d6811 100644 --- a/pkg-py/docs/greet.qmd +++ b/pkg-py/docs/greet.qmd @@ -74,3 +74,37 @@ qc = QueryChat( ) app = qc.app() ``` + +### Greetings with multiple tables + +The generated greeting is *schema-aware*: querychat shares the schema of the +relevant tables with the model so the opening message can describe the data +it's about to help you explore. Tables passed to `QueryChat()` are included in +the greeting automatically. + +Tables added later with `add_table()` or `add_tables()` are **not** included by +default — pass `include_in_greeting=True` to opt them in: + +```python +qc = QueryChat(orders, "orders") # included automatically +qc.add_table(customers, "customers") # not included by default +qc.add_table(products, "products", include_in_greeting=True) # opted in + +qc.greeter.tables +#> ['orders', 'products'] +``` + +For `add_tables()`, `include_in_greeting` can also be a list of names +selecting which of the added tables to include: + +```python +qc.add_tables(engine, include_in_greeting=["orders", "customers"]) +``` + +You can also set the included tables directly, or swap in a custom greeting +template, through `qc.greeter`: + +```python +qc.greeter.tables = ["orders", "customers"] +qc.greeter.prompt = "my-greeting-template.md" +``` diff --git a/pkg-py/src/querychat/_dash.py b/pkg-py/src/querychat/_dash.py index 5d4c4df1..024dfeaa 100644 --- a/pkg-py/src/querychat/_dash.py +++ b/pkg-py/src/querychat/_dash.py @@ -298,6 +298,7 @@ def ui( client_factory=self._client_factory, greeting=self.greeting, query_executor=self._require_query_executor("ui"), + greeting_client_factory=self._build_greeting_client, ) return html.Div( @@ -569,7 +570,9 @@ async def handle_chat( if not state.initialize_greeting_if_preset(): greeting = "" - async for chunk in stream_response_async(state.client, GREETING_PROMPT): + async for chunk in stream_response_async( + state.build_greeting_client(), GREETING_PROMPT + ): greeting += chunk state.set_greeting(greeting) diff --git a/pkg-py/src/querychat/_gradio.py b/pkg-py/src/querychat/_gradio.py index 18a87cb8..635802c0 100644 --- a/pkg-py/src/querychat/_gradio.py +++ b/pkg-py/src/querychat/_gradio.py @@ -288,7 +288,9 @@ def initialize_greeting(state_dict: AppStateDict): if not state.initialize_greeting_if_preset(): greeting = "" - for chunk in stream_response(state.client, GREETING_PROMPT): + for chunk in stream_response( + state.build_greeting_client(), GREETING_PROMPT + ): greeting += chunk state.set_greeting(greeting) diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index d3b4c612..6dabf558 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -32,12 +32,12 @@ validate_source_group_compatibility, ) from ._querychat_core import ( - GREETING_PROMPT, AppState, AppStateDict, create_app_state, warn_multi_table_flat_accessor, ) +from ._querychat_greeter import QueryChatGreeter from ._system_prompt import QueryChatSystemPrompt from ._utils import MISSING, MISSING_TYPE, is_ibis_backend, is_ibis_table from ._viz_utils import has_viz_deps, has_viz_tool @@ -114,6 +114,7 @@ def __init__( self._client_console = None self._system_prompt: QueryChatSystemPrompt | None = None + self._greeter: QueryChatGreeter | None = None if data_source is not None: if table_name is None: @@ -123,7 +124,7 @@ def __init__( raise ValueError( "table_name is required when data_source is provided" ) - self.add_table(data_source, table_name) + self.add_table(data_source, table_name, include_in_greeting=True) def _build_system_prompt( self, @@ -319,10 +320,50 @@ def client( def generate_greeting(self, *, echo: Literal["none", "output"] = "none") -> str: """Generate a welcome greeting for the chat.""" self._require_initialized("generate_greeting") - chat = create_client(self._client_spec) - if self._system_prompt is not None: - chat.system_prompt = self._system_prompt.render(self.tools) - return str(chat.chat(GREETING_PROMPT, echo=echo)) + return self.greeter.generate(echo=echo) + + @property + def greeter(self) -> QueryChatGreeter: + """Greeting configuration and generator for this QueryChat instance.""" + if self._greeter is None: + self._greeter = QueryChatGreeter(self) + return self._greeter + + def _build_greeting_client( + self, client_spec: str | chatlas.Chat | None = None + ) -> chatlas.Chat: + """ + Build a fresh chat client configured with the greeting system prompt. + + ``client_spec`` overrides the instance client spec so the greeting is + generated with the same provider/model as the session client (for + example, when ``server(client=...)`` overrides it). + """ + tbls = [n for n in self.greeter.tables if n in self._data_sources] + sources = {n: self._data_sources[n] for n in tbls} + # Keep a dict if it describes an included table, or if it is a global + # (table-less) dict carrying a dict-level description. Drop the + # cross-table global fields (relationships, glossary) so a curated + # greeting subset can't leak excluded-table prose; per-table entries are + # scoped to the included tables at render time. + greeting_dicts = [ + dd.model_copy(update={"relationships": [], "glossary": {}}) + for dd in self._data_dicts + if any(n in tbls for n in dd.tables) + or (not dd.tables and dd.description) + ] + greeting_prompt_obj = QueryChatSystemPrompt( + prompt_template=self.greeter.prompt, + data_sources=sources, + data_description=self._data_description, + extra_instructions=None, + categorical_threshold=self._categorical_threshold, + data_dicts=greeting_dicts, + ) + chat = create_client(client_spec or self._client_spec) + chat.set_turns([]) + chat.system_prompt = greeting_prompt_obj.render(None) + return chat def console( self, @@ -381,6 +422,7 @@ def add_table( table_name: str, *, replace: bool = False, + include_in_greeting: bool = False, ) -> None: """ Add or replace a table in the QueryChat instance. @@ -394,9 +436,13 @@ def add_table( replace If True, replace an existing table with the same name. If False (default), raise ValueError if the table already exists. + include_in_greeting + If True, include this table's schema in the greeting system prompt. Raises ------ + TypeError + If include_in_greeting is not a bool. ValueError If table_name already exists (and replace=False) or is invalid. RuntimeError @@ -409,6 +455,12 @@ def add_table( "Add all tables before calling .server() or .app()." ) + if not isinstance(include_in_greeting, bool): + raise TypeError( + "include_in_greeting must be True or False, got " + f"{type(include_in_greeting).__name__}." + ) + if not is_pins_board(data_source) and not re.match( r"^[a-zA-Z][a-zA-Z0-9_]*$", table_name ): @@ -445,12 +497,16 @@ def add_table( self._query_executor.cleanup() self._query_executor = None + if include_in_greeting and table_name not in self.greeter.tables: + self.greeter.tables = [*self.greeter.tables, table_name] + def add_tables( # noqa: PLR0912 self, data_source: sqlalchemy.Engine | SQLBackend, tables: list[str] | None = None, *, replace: bool = False, + include_in_greeting: bool | list[str] = False, ) -> None: """ Add multiple tables from a SQLAlchemy engine or Ibis backend in a single call. @@ -471,6 +527,10 @@ def add_tables( # noqa: PLR0912 If ``True``, replace any existing table whose name appears in ``tables``. If ``False`` (default), raise ``ValueError`` if any name already exists. + include_in_greeting + ``True`` to include all added tables in the greeting, ``False`` (default) + for none, or a list of table names to include. Any other type raises + ``TypeError``. Raises ------ @@ -537,6 +597,18 @@ def normalized_builder(name: str) -> DataSource: if table_name in self._data_sources and not replace: raise ValueError(f"Table '{table_name}' already exists") + if isinstance(include_in_greeting, bool): + greeting_names = list(tables) if include_in_greeting else [] + elif isinstance(include_in_greeting, list) and all( + isinstance(name, str) for name in include_in_greeting + ): + greeting_names = [name for name in include_in_greeting if name in tables] + else: + raise TypeError( + "include_in_greeting must be True, False, or a list of table " + f"names, got {type(include_in_greeting).__name__}." + ) + normalized = {name: normalized_builder(name) for name in tables} staged: dict[str, DataSource] = {} @@ -559,6 +631,12 @@ def normalized_builder(name: str) -> DataSource: self._query_executor.cleanup() self._query_executor = None + new_greeting = list(self.greeter.tables) + for name in greeting_names: + if name not in new_greeting: + new_greeting.append(name) + self.greeter.tables = new_greeting + def remove_table(self, table_name: str) -> None: """ Remove a table from the QueryChat instance. @@ -597,6 +675,10 @@ def remove_table(self, table_name: str) -> None: self._build_system_prompt(data_sources=next_data_sources) self._data_sources = next_data_sources + if self._greeter is not None: + self._greeter.tables = [ + n for n in self._greeter.tables if n != table_name + ] if self._query_executor is not None: with contextlib.suppress(Exception): self._query_executor.cleanup() @@ -888,6 +970,7 @@ def _deserialize_state(self, state_data: AppStateDict | None) -> AppState: client_factory=self._client_factory, greeting=self.greeting, query_executor=self._require_query_executor("_deserialize_state"), + greeting_client_factory=self._build_greeting_client, ) if state_data: state.update_from_dict(state_data) diff --git a/pkg-py/src/querychat/_querychat_core.py b/pkg-py/src/querychat/_querychat_core.py index 87d110ab..59cae534 100644 --- a/pkg-py/src/querychat/_querychat_core.py +++ b/pkg-py/src/querychat/_querychat_core.py @@ -108,8 +108,6 @@ def format_tool_result(result: ContentToolResult) -> str: return "" - - def format_query_error(e: Exception) -> str: """Format a query error with helpful guidance.""" error_msg = str(e).lower() @@ -145,6 +143,7 @@ class AppState: client: Chat query_executor: QueryExecutor | None = None greeting: Optional[str] = None + greeting_client_factory: Optional[Callable[[], Chat]] = None active_table: str | None = None # sql, title, error are per-table properties backed by _table_states @@ -245,7 +244,10 @@ def get_display_messages(self) -> list[DisplayMessage]: if text_parts: text = "\n\n".join(text_parts) - # Skip the greeting prompt - it's an internal message + # Hide the synthetic greeting prompt that older releases injected + # as a user turn onto the shared client. New sessions generate + # greetings on a separate client and never create this turn, but + # state serialized by such releases still restores it verbatim. if turn.role == "user" and text == GREETING_PROMPT: continue messages.append({"role": turn.role, "content": text}) @@ -271,6 +273,15 @@ def initialize_greeting_if_preset(self) -> bool: return True return False + def build_greeting_client(self) -> Chat: + """Build a fresh chat client configured with the greeting system prompt.""" + if self.greeting_client_factory is None: + raise RuntimeError( + "greeting_client_factory is not set on this AppState. " + "Pass greeting_client_factory to create_app_state()." + ) + return self.greeting_client_factory() + def to_dict(self) -> AppStateDict: """Serialize state to dict for framework state stores.""" return { @@ -317,6 +328,7 @@ def create_app_state( client_factory: ClientFactory, greeting: Optional[str] = None, query_executor: QueryExecutor | None = None, + greeting_client_factory: Optional[Callable[[], Chat]] = None, ) -> AppState: """Create AppState with callbacks connected via holder pattern.""" state_holder: dict[str, AppState | None] = {"state": None} @@ -339,6 +351,7 @@ def reset_callback(_table: str) -> None: client=client, query_executor=query_executor, greeting=greeting, + greeting_client_factory=greeting_client_factory, ) state_holder["state"] = state return state diff --git a/pkg-py/src/querychat/_querychat_greeter.py b/pkg-py/src/querychat/_querychat_greeter.py new file mode 100644 index 00000000..af989e5a --- /dev/null +++ b/pkg-py/src/querychat/_querychat_greeter.py @@ -0,0 +1,57 @@ +"""Greeting generation for QueryChat instances.""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Literal + +from ._querychat_core import GREETING_PROMPT + +if TYPE_CHECKING: + from ._querychat_base import QueryChatBase + + +class QueryChatGreeter: + """Controls greeting generation for a QueryChat instance. Access via ``qc.greeter``.""" + + def __init__(self, parent: QueryChatBase) -> None: + self._parent = parent + self._tables: list[str] = [] + self._prompt: str | Path = Path(__file__).parent / "prompts" / "greeting.md" + + @property + def tables(self) -> list[str]: + """Table names whose context to include in the greeting.""" + return self._tables + + @tables.setter + def tables(self, value: list[str]) -> None: + if isinstance(value, str): + raise TypeError( + "greeter.tables must be a list of table names, not a single " + f"string. Did you mean [{value!r}]?" + ) + if not isinstance(value, list) or not all( + isinstance(name, str) for name in value + ): + raise TypeError( + "greeter.tables must be a list of table names, got " + f"{type(value).__name__}." + ) + self._tables = value + + @property + def prompt(self) -> str | Path: + """The greeting template (string or file path).""" + return self._prompt + + @prompt.setter + def prompt(self, value: str | Path) -> None: + self._prompt = value + + def generate(self, *, echo: Literal["none", "output"] = "none") -> str: + """Generate a greeting using the greeting system prompt.""" + chat = self._parent._build_greeting_client() + txt = str(chat.chat(GREETING_PROMPT, echo=echo)) + self._parent.greeting = txt + return txt diff --git a/pkg-py/src/querychat/_shiny.py b/pkg-py/src/querychat/_shiny.py index f616671f..c68f563d 100644 --- a/pkg-py/src/querychat/_shiny.py +++ b/pkg-py/src/querychat/_shiny.py @@ -330,6 +330,7 @@ def app_server(input: Inputs, output: Outputs, session: Session): client=self._create_session_client, enable_bookmarking=enable_bookmarking, tools=self.tools, + greeting_client_fn=self._build_greeting_client, ) @reactive.calc @@ -439,7 +440,12 @@ def ui(self, *, id: Optional[str] = None, **kwargs): A UI component. """ - return mod_ui(id or self.id, preload_viz=has_viz_tool(self.tools), greeting=self.greeting, **kwargs) + return mod_ui( + id or self.id, + preload_viz=has_viz_tool(self.tools), + greeting=self.greeting, + **kwargs, + ) def server( self, @@ -525,13 +531,18 @@ def title(): ) self._require_initialized("server") - resolved_client_spec = self._client_spec if isinstance(client, MISSING_TYPE) else client + resolved_client_spec = ( + self._client_spec if isinstance(client, MISSING_TYPE) else client + ) def create_session_client(**kwargs) -> chatlas.Chat: return self._create_session_client( client_spec=resolved_client_spec, **kwargs ) + def create_greeting_client() -> chatlas.Chat: + return self._build_greeting_client(client_spec=resolved_client_spec) + self._mark_server_initialized() return mod_server( id or self.id, @@ -541,6 +552,7 @@ def create_session_client(**kwargs) -> chatlas.Chat: client=create_session_client, enable_bookmarking=enable_bookmarking, tools=self.tools, + greeting_client_fn=create_greeting_client, ) @@ -821,6 +833,7 @@ def _ensure_server_started(self) -> None: client=self._create_session_client, enable_bookmarking=self._enable_bookmarking, tools=self.tools, + greeting_client_fn=self._build_greeting_client, ) def sidebar( @@ -882,7 +895,12 @@ def ui(self, *, id: Optional[str] = None, **kwargs): A UI component. """ - result = mod_ui(id or self.id, preload_viz=has_viz_tool(self.tools), greeting=self.greeting, **kwargs) + result = mod_ui( + id or self.id, + preload_viz=has_viz_tool(self.tools), + greeting=self.greeting, + **kwargs, + ) self._ensure_server_started() return result @@ -998,10 +1016,12 @@ def table(self, name: str) -> TableAccessor: qc.add_table(customers, "customers") qc.sidebar() + @render.data_frame def orders_table(): return qc.table("orders").df() + @render.data_frame def customers_table(): return qc.table("customers").df() diff --git a/pkg-py/src/querychat/_shiny_module.py b/pkg-py/src/querychat/_shiny_module.py index f0b3e8d4..0747f693 100644 --- a/pkg-py/src/querychat/_shiny_module.py +++ b/pkg-py/src/querychat/_shiny_module.py @@ -67,7 +67,6 @@ class TableState(Generic[IntoFrameT]): df: Callable[[], IntoFrameT] - @module.ui def mod_ui(*, preload_viz: bool = False, greeting: str | None = None, **kwargs): css_path = Path(__file__).parent / "static" / "css" / "styles.css" @@ -215,6 +214,7 @@ def mod_server( client: Callable[..., chatlas.Chat], enable_bookmarking: bool, tools: set[str] | None = None, + greeting_client_fn: Callable[[], chatlas.Chat] | None = None, ) -> ServerValues[IntoFrameT]: # Holds a generated greeting so it can be saved and restored on bookmark. # Static greetings live in the UI (chat_ui(greeting=)) and persist already. @@ -347,7 +347,11 @@ async def _handle_greeting_requested(): GreetWarning, stacklevel=2, ) - greeting_client = client(tools=None) + greeting_client = ( + greeting_client_fn() + if greeting_client_fn is not None + else client(tools=None) + ) stream = await greeting_client.stream_async(GREETING_PROMPT, echo="none") await chat_ui.set_greeting( shinychat.chat_greeting(stream, dismissible=False) @@ -409,9 +413,7 @@ async def _on_restore(x: RestoreState) -> None: ) ) if "querychat_viz_widgets" in vals: - restored = restore_viz_widgets( - executor, vals["querychat_viz_widgets"] - ) + restored = restore_viz_widgets(executor, vals["querychat_viz_widgets"]) viz_widgets[:] = restored if len(table_states) == 1: @@ -443,7 +445,9 @@ def _multi_table_df() -> IntoFrameT: return ServerValues( df=_multi_table_df, sql=_MultiTableWarnReactive(primary_state.sql, "sql", primary_name, table_list), # type: ignore[arg-type] - title=_MultiTableWarnReactive(primary_state.title, "title", primary_name, table_list), # type: ignore[arg-type] + title=_MultiTableWarnReactive( + primary_state.title, "title", primary_name, table_list + ), # type: ignore[arg-type] tables=table_states, client=chat, data_sources=data_sources, diff --git a/pkg-py/src/querychat/_streamlit.py b/pkg-py/src/querychat/_streamlit.py index b8093b83..77b6f1b6 100644 --- a/pkg-py/src/querychat/_streamlit.py +++ b/pkg-py/src/querychat/_streamlit.py @@ -225,6 +225,7 @@ def _get_state(self) -> AppState: ), greeting=self.greeting, query_executor=self._require_query_executor("_get_state"), + greeting_client_factory=self._build_greeting_client, ) return st.session_state[self._state_key] @@ -291,7 +292,8 @@ def ui(self) -> None: with st.chat_message("assistant"): placeholder = st.empty() placeholder.markdown("*Preparing your data assistant...*") - for chunk in stream_response(state.client, GREETING_PROMPT): + greeting_client = state.build_greeting_client() + for chunk in stream_response(greeting_client, GREETING_PROMPT): greeting += chunk placeholder.markdown(greeting, unsafe_allow_html=True) state.set_greeting(greeting) diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index a7e17d6c..1caafb6f 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -33,7 +33,7 @@ def __init__( elif data_source is not None: self._data_sources = {data_source.table_name: data_source} else: - raise ValueError("Either data_source or data_sources must be provided") + self._data_sources = {} self._data_dicts: list[DataDict] = data_dicts or [] @@ -102,8 +102,18 @@ def escape_attr(val: str) -> str: if dd.description: attrs += f' description="{escape_attr(dd.description)}"' - body = yaml.dump(d, default_flow_style=False, allow_unicode=True, sort_keys=False).rstrip() if d else "" - blocks.append(f"\n{body}\n" if body else f"") + body = ( + yaml.dump( + d, default_flow_style=False, allow_unicode=True, sort_keys=False + ).rstrip() + if d + else "" + ) + blocks.append( + f"\n{body}\n" + if body + else f"" + ) unclaimed = [n for n in self._data_sources if n not in all_claimed] if unclaimed: @@ -115,7 +125,12 @@ def escape_attr(val: str) -> str: else None ) tables[name] = {"description": desc} if desc else None - yaml_str = yaml.dump({"tables": tables}, default_flow_style=False, allow_unicode=True, sort_keys=False).rstrip() + yaml_str = yaml.dump( + {"tables": tables}, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + ).rstrip() blocks.append(f"\n{yaml_str}\n") return "\n\n".join(blocks) @@ -131,17 +146,31 @@ def render(self, tools: set[str] | None) -> str: Fully rendered system prompt string """ - first_source = next(iter(self._data_sources.values())) - db_type = first_source.get_db_type() + first_source = next(iter(self._data_sources.values()), None) + db_type = first_source.get_db_type() if first_source is not None else "SQL" + # Data dicts can carry global (table-less) descriptions, so they may + # render even when no tables are selected (e.g. a generic greeting). has_dicts = bool(self._data_dicts) + semantic_views = ( + first_source.get_semantic_views_description() + if first_source is not None + else "" + ) + tables_overview = ( + "" + if has_dicts + else (self._generate_tables_overview() if first_source is not None else "") + ) + data_dicts = self._generate_data_dicts_yaml() if has_dicts else "" context = { "db_type": db_type, "is_duck_db": db_type.lower() == "duckdb", - "semantic_views": first_source.get_semantic_views_description(), + "has_tables": first_source is not None, + "semantic_views": semantic_views, "has_data_dicts": has_dicts, - "data_dicts": self._generate_data_dicts_yaml() if has_dicts else "", - "tables_overview": "" if has_dicts else self._generate_tables_overview(), + "data_dicts": data_dicts, + "tables_overview": tables_overview, "data_description": self.data_description, "extra_instructions": self.extra_instructions, "has_tool_update": "update" in tools if tools else False, diff --git a/pkg-py/src/querychat/prompts/greeting.md b/pkg-py/src/querychat/prompts/greeting.md new file mode 100644 index 00000000..9ac394ff --- /dev/null +++ b/pkg-py/src/querychat/prompts/greeting.md @@ -0,0 +1,70 @@ +You are a friendly data assistant. Write a warm welcome greeting for a user who is about to explore their data. + +{{#has_tables}} +You have access to a {{db_type}} database with the following tables: + +{{/has_tables}} +{{^has_tables}} +You have access to a {{db_type}} database. + +{{/has_tables}} +{{#has_data_dicts}} +{{{data_dicts}}} + +{{/has_data_dicts}} +{{^has_data_dicts}} +{{#has_tables}} + +{{{tables_overview}}} + + +{{/has_tables}} +{{/has_data_dicts}} +{{#data_description}} + +{{{data_description}}} + + +{{/data_description}} +Your greeting should be brief, warm, and focused on what the user can do with this data. Mention 2–4 concrete things the user might want to explore or ask about. + +### Providing Suggestions for Next Steps + +#### Suggestion Syntax + +Use `` tags to create clickable suggestion buttons in the UI. The text inside should be a complete, actionable suggestion that users can click to continue the conversation. + +**List format (most common):** +``` +
    +
  • Show me examples of …
  • +
  • What are the key differences between …
  • +
  • Explain how …
  • +
+``` + +Use explicit HTML `
    `/`
  • ` tags instead of markdown list markers (`*`, `-`). Markdown lists work when formatted correctly, but omitting the space after the marker (e.g., `-` instead of `- `) silently breaks the list parse, so HTML tags are more reliable. + +**Grouped suggestions:** +``` +##### Explore the data +
      +
    • What tables are available?
    • +
    • What columns does … have?
    • +
    + +##### Analyze the data +
      +
    • What's the average …?
    • +
    • How many …?
    • +
    +``` + +#### Suggestion Guidelines + +- Use list format with 2–4 concrete, actionable suggestions grouped under `#####` headings +- Write suggestions as complete, natural prompts (not fragments) +- Include at least one suggestion encouraging the user to explore what data and questions are available +- Never use nested lists for suggestions — group them under headings instead +- Never use generic phrases like "If you'd like to..." — provide concrete suggestions +- Never refer to suggestions as "prompts" — call them "suggestions" or "ideas" or similar diff --git a/pkg-py/tests/test_querychat.py b/pkg-py/tests/test_querychat.py index 3293e356..c7491abb 100644 --- a/pkg-py/tests/test_querychat.py +++ b/pkg-py/tests/test_querychat.py @@ -1,4 +1,6 @@ import os +import tempfile +from pathlib import Path from unittest.mock import patch import ibis @@ -7,6 +9,7 @@ import pytest from querychat import QueryChat from querychat._datasource import IbisSource, PolarsLazySource +from sqlalchemy import create_engine, text @pytest.fixture(autouse=True) @@ -93,8 +96,8 @@ def test_querychat_client_has_system_prompt(sample_df): assert "test_table" in qc.system_prompt -def test_generate_greeting_uses_querychat_system_prompt(sample_df): - """generate_greeting() should use the dataset-aware querychat system prompt.""" +def test_generate_greeting_uses_greeting_system_prompt(sample_df): + """generate_greeting() should use the lean greeting prompt, not the main query prompt.""" qc = QueryChat( data_source=sample_df, table_name="test_table", @@ -112,6 +115,7 @@ def fake_chat(self, *args, **kwargs): assert greeting == "Hello from querychat" assert seen["system_prompt"] is not None assert "test_table" in seen["system_prompt"] + assert "data dashboard chatbot" not in seen["system_prompt"] def test_generate_greeting_does_not_register_querychat_tools(sample_df): @@ -152,7 +156,9 @@ def test_querychat_with_polars_lazyframe(): assert isinstance(qc._data_sources["test_table"], PolarsLazySource) # Query should return a native polars LazyFrame - result = qc._data_sources["test_table"].execute_query("SELECT * FROM test_table WHERE id = 2") + result = qc._data_sources["test_table"].execute_query( + "SELECT * FROM test_table WHERE id = 2" + ) assert isinstance(result, pl.LazyFrame) # Collect to verify @@ -185,7 +191,9 @@ def test_querychat_with_ibis_table(): assert isinstance(qc._data_sources["test_table"], IbisSource) # Query should return an ibis.Table - result = qc._data_sources["test_table"].execute_query("SELECT * FROM test_table WHERE id = 2") + result = qc._data_sources["test_table"].execute_query( + "SELECT * FROM test_table WHERE id = 2" + ) assert isinstance(result, ibis.Table) # Execute to verify results @@ -194,3 +202,228 @@ def test_querychat_with_ibis_table(): assert executed["name"].iloc[0] == "Bob" finally: conn.disconnect() + + +@pytest.fixture +def sqlite_engine(): + """SQLite engine with two tables for add_tables greeting tests.""" + temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") # noqa: SIM115 + temp_db.close() + engine = create_engine(f"sqlite:///{temp_db.name}") + with engine.begin() as conn: + conn.execute(text("CREATE TABLE orders (id INTEGER, amount REAL)")) + conn.execute(text("CREATE TABLE customers (id INTEGER, name TEXT)")) + conn.execute(text("INSERT INTO orders VALUES (1, 100.0), (2, 200.0)")) + conn.execute(text("INSERT INTO customers VALUES (1, 'Alice'), (2, 'Bob')")) + yield engine + engine.dispose() + Path(temp_db.name).unlink() + + +def test_greeter_tables_contains_constructor_table(sample_df): + """Constructor table is always present in greeter.tables.""" + qc = QueryChat(data_source=sample_df, table_name="test_table") + assert "test_table" in qc.greeter.tables + + +def test_constructor_greeting_survives_greeter_mutations(sample_df): + """Setting greeter.tables or greeter.prompt must not clear qc.greeting.""" + qc = QueryChat(data_source=sample_df, table_name="test_table", greeting="Hello!") + assert qc.greeting == "Hello!" + + qc.greeter.tables = ["test_table"] + assert qc.greeting == "Hello!" + + qc.greeter.prompt = "Custom prompt" + assert qc.greeting == "Hello!" + + +def test_greeter_tables_setter_rejects_bare_string(sample_df): + """Assigning a bare string to greeter.tables raises instead of char-iterating.""" + qc = QueryChat(data_source=sample_df, table_name="test_table") + with pytest.raises(TypeError, match="list of table names"): + qc.greeter.tables = "test_table" # type: ignore[assignment] + + +def test_add_table_include_in_greeting(sample_df): + """add_table with include_in_greeting=True adds the name; default False does not.""" + qc = QueryChat() + qc.add_table(sample_df, "base_table", include_in_greeting=False) + + extra = pd.DataFrame({"x": [1, 2]}) + qc.add_table(extra, "included", include_in_greeting=True) + + extra2 = pd.DataFrame({"y": [3, 4]}) + qc.add_table(extra2, "excluded") + + assert "included" in qc.greeter.tables + assert "excluded" not in qc.greeter.tables + assert "base_table" not in qc.greeter.tables + + +def test_add_table_include_in_greeting_invalid_type(sample_df): + """add_table rejects non-bool include_in_greeting.""" + qc = QueryChat() + with pytest.raises(TypeError, match="include_in_greeting"): + qc.add_table(sample_df, "base_table", include_in_greeting="yes") # type: ignore[arg-type] + + +def test_add_tables_include_in_greeting_true(sqlite_engine): + """add_tables with include_in_greeting=True adds all tables to greeter.""" + qc = QueryChat() + qc.add_tables(sqlite_engine, include_in_greeting=True) + assert "orders" in qc.greeter.tables + assert "customers" in qc.greeter.tables + + +def test_add_tables_include_in_greeting_false(sqlite_engine): + """add_tables with include_in_greeting=False (default) adds no tables to greeter.""" + qc = QueryChat() + qc.add_tables(sqlite_engine, include_in_greeting=False) + assert "orders" not in qc.greeter.tables + assert "customers" not in qc.greeter.tables + + +def test_add_tables_include_in_greeting_list(sqlite_engine): + """add_tables with a list only adds the named subset to greeter.tables.""" + qc = QueryChat() + qc.add_tables(sqlite_engine, include_in_greeting=["orders"]) + assert "orders" in qc.greeter.tables + assert "customers" not in qc.greeter.tables + + +def test_add_tables_include_in_greeting_str_rejected(sqlite_engine): + """add_tables rejects a bare string; a list of names is required.""" + qc = QueryChat() + with pytest.raises(TypeError, match="include_in_greeting"): + qc.add_tables(sqlite_engine, include_in_greeting="orders") # type: ignore[arg-type] + + +def test_add_tables_include_in_greeting_invalid_type(sqlite_engine): + """add_tables rejects non-bool, non-list include_in_greeting.""" + qc = QueryChat() + with pytest.raises(TypeError, match="include_in_greeting"): + qc.add_tables(sqlite_engine, include_in_greeting=1) # type: ignore[arg-type] + + +def test_add_tables_invalid_greeting_leaves_state_unchanged(sqlite_engine): + """A rejected include_in_greeting must not register any tables.""" + qc = QueryChat() + with pytest.raises(TypeError, match="include_in_greeting"): + qc.add_tables(sqlite_engine, include_in_greeting=1) # type: ignore[arg-type] + assert qc._data_sources == {} + assert qc.greeter.tables == [] + # A subsequent valid call still succeeds (no half-registered tables). + qc.add_tables(sqlite_engine, include_in_greeting=True) + assert set(qc.greeter.tables) == {"orders", "customers"} + + +def test_greeting_prompt_omits_dicts_for_excluded_tables(sqlite_engine, tmp_path): + """A dict describing only excluded tables is dropped from the greeting prompt.""" + orders_yaml = tmp_path / "orders.yaml" + orders_yaml.write_text( + "name: orders_dict\ndescription: ORDERS_DICT_DESC\n" + "tables:\n orders:\n description: Orders info\n" + ) + customers_yaml = tmp_path / "customers.yaml" + customers_yaml.write_text( + "name: customers_dict\ndescription: CUSTOMERS_DICT_DESC\n" + "tables:\n customers:\n description: Customers info\n" + ) + + qc = QueryChat(data_dict=[str(orders_yaml), str(customers_yaml)]) + qc.add_tables(sqlite_engine, include_in_greeting=["orders"]) + + prompt = qc._build_greeting_client().system_prompt + assert prompt is not None + assert "ORDERS_DICT_DESC" in prompt + assert "CUSTOMERS_DICT_DESC" not in prompt + + +def test_greeting_prompt_keeps_global_dict_desc_drops_global_fields( + sqlite_engine, tmp_path +): + """A table-less dict keeps its description; relationships/glossary are dropped.""" + global_yaml = tmp_path / "global.yaml" + global_yaml.write_text( + "name: domain\ndescription: GLOBAL_DOMAIN_DESC\nglossary:\n ARR: GLOSSARY_ARR_DEF\n" + ) + orders_yaml = tmp_path / "orders.yaml" + orders_yaml.write_text( + "name: orders_dict\ndescription: ORDERS_DICT_DESC\n" + "tables:\n orders:\n description: Orders info\n" + "relationships:\n - join: orders.id = customers.id\n description: REL_DESC\n" + ) + + qc = QueryChat(data_dict=[str(global_yaml), str(orders_yaml)]) + qc.add_tables(sqlite_engine, include_in_greeting=["orders"]) + + prompt = qc._build_greeting_client().system_prompt + assert prompt is not None + assert "GLOBAL_DOMAIN_DESC" in prompt + assert "ORDERS_DICT_DESC" in prompt + assert "GLOSSARY_ARR_DEF" not in prompt + assert "REL_DESC" not in prompt + + +def test_generate_greeting_sets_greeting_and_returns_text(sample_df): + """generate_greeting() returns the mocked text and sets qc.greeting.""" + qc = QueryChat(data_source=sample_df, table_name="test_table") + seen: dict[str, str | None] = {} + + def fake_chat(self, *args, **kwargs): + seen["system_prompt"] = self.system_prompt + return "Generated greeting" + + with patch("chatlas.Chat.chat", fake_chat): + result = qc.generate_greeting() + + assert result == "Generated greeting" + assert qc.greeting == "Generated greeting" + assert seen["system_prompt"] is not None + assert "test_table" in seen["system_prompt"] + assert "data dashboard chatbot" not in seen["system_prompt"] + + +def test_generate_greeting_with_empty_tables(sample_df): + """Clearing greeter.tables produces a generic greeting without raising.""" + qc = QueryChat(data_source=sample_df, table_name="test_table") + qc.greeter.tables = [] + + prompt = qc._build_greeting_client().system_prompt + assert prompt is not None + assert "following tables" not in prompt + assert "SQL SQL" not in prompt + + with patch("chatlas.Chat.chat", return_value="Generic greeting"): + result = qc.generate_greeting() + + assert result == "Generic greeting" + assert qc.greeting == "Generic greeting" + + +def test_greeting_prompt_keeps_global_dict_desc_with_no_tables(sample_df, tmp_path): + """A global dict description renders even in a table-less generic greeting.""" + global_yaml = tmp_path / "global.yaml" + global_yaml.write_text( + "name: domain\ndescription: GLOBAL_DOMAIN_DESC\nglossary:\n ARR: GLOSSARY_ARR_DEF\n" + ) + qc = QueryChat(data_source=sample_df, table_name="test_table", data_dict=str(global_yaml)) + qc.greeter.tables = [] + + prompt = qc._build_greeting_client().system_prompt + assert prompt is not None + assert "GLOBAL_DOMAIN_DESC" in prompt + assert "GLOSSARY_ARR_DEF" not in prompt + assert "following tables" not in prompt + + +def test_remove_table_prunes_greeter_tables(sqlite_engine): + """remove_table drops the removed name from greeter.tables.""" + qc = QueryChat() + qc.add_tables(sqlite_engine, include_in_greeting=True) + assert "orders" in qc.greeter.tables + + qc.remove_table("orders") + assert "orders" not in qc.greeter.tables + assert "customers" in qc.greeter.tables diff --git a/pkg-py/tests/test_state.py b/pkg-py/tests/test_state.py index d9fcfef2..ad7ab8d2 100644 --- a/pkg-py/tests/test_state.py +++ b/pkg-py/tests/test_state.py @@ -556,6 +556,23 @@ def test_multiple_messages(self, data_source, mock_client): assert messages[0] == {"role": "user", "content": "Question"} assert messages[1] == {"role": "assistant", "content": "Answer"} + def test_legacy_greeting_prompt_turn_is_hidden(self, data_source, mock_client): + """ + State serialized by older releases injected GREETING_PROMPT as a user + turn on the shared client; it must stay hidden after restore. + """ + from chatlas import Turn + from querychat._querychat_core import GREETING_PROMPT + + turns = [ + Turn(role="user", contents=GREETING_PROMPT), + Turn(role="assistant", contents="Welcome!"), + ] + mock_client.get_turns.return_value = turns + state = AppState(data_sources={"test_table": data_source}, client=mock_client) + messages = state.get_display_messages() + assert messages == [{"role": "assistant", "content": "Welcome!"}] + class TestTypedDicts: def test_app_state_dict_structure(self): diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md index 7775a851..d2d35e22 100644 --- a/pkg-r/NEWS.md +++ b/pkg-r/NEWS.md @@ -33,7 +33,7 @@ ## Improvements -* Chat greetings now use shinychat's greeting API (requires shinychat >= 0.4.0). A provided `greeting` renders instantly when the app loads, and when no `greeting` is given one is generated on demand without being added to the conversation history. Generated greetings are now preserved across bookmark/restore. (#249) +* Chat greetings now use shinychat's greeting API (requires shinychat >= 0.4.0). A provided `greeting` renders instantly when the app loads, and when no `greeting` is given one is generated on demand — now **schema-aware**, so it can describe the data it's about to help you explore — without being added to the conversation history. Generated greetings are preserved across bookmark/restore. Tables passed to `QueryChat$new()` are described in the greeting automatically; opt additional tables in with `include_in_greeting = TRUE` on `$add_table()`/`$add_tables()`, or fine-tune which tables and which template the greeting uses via `qc$greeter`. (#249, #261) * The system prompt is now lighter: full schema is no longer embedded upfront. Instead the LLM fetches per-table schema on demand via the new `querychat_get_schema` tool — and only when it needs to. When a `data_dict` is provided, the tool skips columns that already have descriptions, so the LLM only pays for what isn't already documented. (#195) diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 576d9abc..650a4a8b 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -103,6 +103,44 @@ QueryChat <- R6::R6Class( .extra_instructions = NULL, .categorical_threshold = NULL, .data_dicts = list(), + .greeter = NULL, + + build_greeting_client = function(client_spec = NULL) { + data_sources <- private$.data_sources + tbls <- intersect(self$greeter$tables, names(data_sources)) + sources <- data_sources[tbls] + # Keep a dict if it describes an included table, or if it is a global + # (table-less) dict carrying a dict-level description. Drop the + # cross-table global fields (relationships, glossary) so a curated greeting + # subset can't leak excluded-table prose; per-table entries are scoped to + # the included tables at render time. + greeting_dicts <- Filter( + function(dd) { + length(intersect(names(dd$tables), tbls)) > 0 || + (length(dd$tables) == 0 && !is.null(dd$description)) + }, + private$.data_dicts + ) + greeting_dicts <- lapply(greeting_dicts, function(dd) { + dd$relationships <- NULL + dd$glossary <- NULL + dd + }) + greeting_prompt_obj <- QueryChatSystemPrompt$new( + prompt_template = self$greeter$prompt, + data_sources = sources, + data_description = private$.data_description, + extra_instructions = NULL, + categorical_threshold = private$.categorical_threshold, + data_dicts = greeting_dicts + ) + spec <- client_spec %||% private$.client_spec + chat <- as_querychat_client(spec) + chat <- chat$clone() + chat$set_turns(list()) + chat$set_system_prompt(greeting_prompt_obj$render(tools = NULL)) + chat + }, require_initialized = function(method_name) { if (length(private$.data_sources) == 0) { @@ -365,6 +403,7 @@ QueryChat <- R6::R6Class( private$.data_sources[[normalized$table_name]] <- normalized private$auto_fill_data_description() private$build_system_prompt() + self$greeter$tables <- c(self$greeter$tables, normalized$table_name) self$id <- id %||% sprintf("querychat_%s", normalized$table_name) } else { # Deferred pattern: data_source is NULL @@ -397,12 +436,20 @@ QueryChat <- R6::R6Class( #' @param table_name The SQL table name for this data source. #' @param replace Whether to replace an existing table with this name. #' Default is `FALSE`. + #' @param include_in_greeting Whether to include this table in the greeting + #' context. Default is `FALSE`. #' #' @return Invisibly returns `self` for chaining. - add_table = function(data_source, table_name, replace = FALSE) { + add_table = function( + data_source, + table_name, + replace = FALSE, + include_in_greeting = FALSE + ) { if (private$.server_initialized) { cli::cli_abort("Cannot add tables after server initialization.") } + check_bool(include_in_greeting) check_sql_table_name(table_name) if (table_name %in% names(private$.data_sources) && !replace) { cli::cli_abort( @@ -447,6 +494,10 @@ QueryChat <- R6::R6Class( self$id <- sprintf("querychat_%s", table_name) } + if (isTRUE(include_in_greeting)) { + self$greeter$tables <- c(self$greeter$tables, table_name) + } + invisible(self) }, @@ -463,9 +514,18 @@ QueryChat <- R6::R6Class( #' by `DBI::dbListTables(conn)` are used. #' @param replace Whether to replace existing tables with the same name. #' Default is `FALSE`. + #' @param include_in_greeting Whether to include added tables in the greeting + #' context. `TRUE` includes all tables; `FALSE` (default) includes none; + #' a character vector includes only those named tables (intersected with + #' the tables being added). Any other type raises an error. #' #' @return Invisibly returns `self` for chaining. - add_tables = function(conn, tables = NULL, replace = FALSE) { + add_tables = function( + conn, + tables = NULL, + replace = FALSE, + include_in_greeting = FALSE + ) { if (private$.server_initialized) { cli::cli_abort("Cannot add tables after server initialization.") } @@ -490,6 +550,22 @@ QueryChat <- R6::R6Class( } } + if ( + !rlang::is_bool(include_in_greeting) && + !is.character(include_in_greeting) + ) { + cli::cli_abort( + "{.arg include_in_greeting} must be {.code TRUE}, {.code FALSE}, or a character vector of table names." + ) + } + greeting_tbls <- if (isTRUE(include_in_greeting)) { + tables + } else if (is.character(include_in_greeting)) { + intersect(include_in_greeting, tables) + } else { + character() + } + normalized <- stats::setNames( lapply(tables, function(tbl) normalize_data_source(conn, tbl)), tables @@ -532,6 +608,10 @@ QueryChat <- R6::R6Class( private$.query_executor <- NULL } + if (length(greeting_tbls) > 0) { + self$greeter$tables <- c(self$greeter$tables, greeting_tbls) + } + invisible(self) }, @@ -559,6 +639,12 @@ QueryChat <- R6::R6Class( ] private$build_system_prompt(data_sources = next_sources) private$.data_sources <- next_sources + if (!is.null(private$.greeter)) { + private$.greeter$tables <- setdiff( + private$.greeter$tables, + table_name + ) + } if (!is.null(private$.query_executor)) { tryCatch(private$.query_executor$cleanup(), error = function(e) NULL) private$.query_executor <- NULL @@ -873,7 +959,12 @@ QueryChat <- R6::R6Class( if (!is.null(data_source)) { tbl_name <- private$.deferred_table_name %||% names(private$.data_sources)[[1]] - self$add_table(data_source, tbl_name, replace = TRUE) + self$add_table( + data_source, + tbl_name, + replace = TRUE, + include_in_greeting = TRUE + ) } private$require_initialized("$server") @@ -893,6 +984,10 @@ QueryChat <- R6::R6Class( ) } + greeting_client_fn <- function() { + private$build_greeting_client(client_spec = resolved_client_spec) + } + result <- mod_server( id %||% self$id, data_sources = private$.data_sources, @@ -900,6 +995,7 @@ QueryChat <- R6::R6Class( greeting = self$greeting, client = create_session_client, tools = self$tools, + greeting_client_fn = greeting_client_fn, enable_bookmarking = enable_bookmarking ) result @@ -913,8 +1009,7 @@ QueryChat <- R6::R6Class( #' @return The greeting string in Markdown format. generate_greeting = function(echo = c("none", "output")) { private$require_initialized("$generate_greeting") - chat <- private$create_session_client() - as.character(chat$chat(GREETING_PROMPT, echo = echo)) + self$greeter$generate(echo = echo) }, #' @description @@ -932,6 +1027,20 @@ QueryChat <- R6::R6Class( } ), active = list( + #' @field greeter The QueryChatGreeter controlling greeting generation; + #' access its `$tables` and `$prompt`. + greeter = function(value) { + if (!missing(value)) { + # The greeter is read-only. Sub-field assignments like + # `qc$greeter$tables <- x` mutate the greeter by reference and + # trigger a write-back of the (unchanged) binding, which we ignore. + return(invisible(value)) + } + private$.greeter <- private$.greeter %||% + QueryChatGreeter$new(parent = self) + private$.greeter + }, + #' @field system_prompt Get the system prompt. system_prompt = function() { private$require_initialized("$system_prompt") diff --git a/pkg-r/R/QueryChatGreeter.R b/pkg-r/R/QueryChatGreeter.R new file mode 100644 index 00000000..105b6f05 --- /dev/null +++ b/pkg-r/R/QueryChatGreeter.R @@ -0,0 +1,58 @@ +#' QueryChatGreeter +#' +#' @description +#' Controls greeting generation for a [QueryChat] instance. Access via +#' `qc$greeter`. +#' +#' @noRd +QueryChatGreeter <- R6::R6Class( + "QueryChatGreeter", + private = list( + .parent = NULL, + .tables = NULL, + .prompt = NULL + ), + public = list( + #' @description Create a new QueryChatGreeter. + #' @param parent The owning QueryChat instance. + initialize = function(parent) { + private$.parent <- parent + private$.tables <- character() + private$.prompt <- system.file( + "prompts", + "greeting.md", + package = "querychat" + ) + }, + + #' @description Generate a greeting using the greeting system prompt. + #' @param echo Whether to echo the output (`"none"` or `"output"`). + #' @return The greeting string. + generate = function(echo = c("none", "output")) { + echo <- rlang::arg_match(echo) + chat <- private$.parent$.__enclos_env__$private$build_greeting_client() + txt <- as.character(chat$chat(GREETING_PROMPT, echo = echo)) + private$.parent$greeting <- txt + txt + } + ), + active = list( + #' @field tables Character vector of table names whose context to include in + #' the greeting. Changes affect the next generated greeting. + tables = function(value) { + if (missing(value)) { + return(private$.tables) + } + private$.tables <- value + }, + + #' @field prompt The greeting template (string or file path). Changes affect + #' the next generated greeting. + prompt = function(value) { + if (missing(value)) { + return(private$.prompt) + } + private$.prompt <- value + } + ) +) diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 4e530929..fbf40979 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -83,18 +83,22 @@ QueryChatSystemPrompt <- R6::R6Class( #' #' @return A character string containing the rendered system prompt. render = function(tools) { - first_source <- self$data_sources[[1]] - db_type <- first_source$get_db_type() + # data_sources may be empty for a greeting with no included tables. + has_sources <- length(self$data_sources) > 0 + first_source <- if (has_sources) self$data_sources[[1]] else NULL + db_type <- if (has_sources) first_source$get_db_type() else "SQL" + # Data dicts can carry global (table-less) descriptions, so they may + # render even when no tables are selected (e.g. a generic greeting). has_dicts <- length(self$data_dicts) > 0 semantic_views <- "" - if (inherits(first_source, "DBISource")) { + if (has_sources && inherits(first_source, "DBISource")) { semantic_views <- first_source$get_semantic_views_description() } # Compute schema for backward compat with templates using {{schema}} schema <- "" - if (grepl("\\{\\{[{#^/]?\\s*schema\\b", self$template)) { + if (has_sources && grepl("\\{\\{[{#^/]?\\s*schema\\b", self$template)) { schema <- first_source$get_schema( categorical_threshold = self$categorical_threshold ) @@ -103,6 +107,7 @@ QueryChatSystemPrompt <- R6::R6Class( context <- list( db_type = db_type, is_duck_db = tolower(db_type) == "duckdb", + has_tables = has_sources, semantic_views = semantic_views, schema = schema, has_data_dicts = has_dicts, diff --git a/pkg-r/R/querychat_module.R b/pkg-r/R/querychat_module.R index cb1c7ff8..592ebcad 100644 --- a/pkg-r/R/querychat_module.R +++ b/pkg-r/R/querychat_module.R @@ -43,6 +43,7 @@ mod_server <- function( greeting, client, tools, + greeting_client_fn = NULL, enable_bookmarking = FALSE ) { shiny::moduleServer(id, function(input, output, session) { @@ -155,7 +156,11 @@ mod_server <- function( "i" = "For faster startup, lower cost, and determinism, consider providing a {.arg greeting} to {.fn QueryChat}.", "i" = "You can use your {.help querychat::QueryChat} object's {.fn $generate_greeting} method to generate a greeting." )) - greeting_client <- client(tools = NULL) + greeting_client <- if (!is.null(greeting_client_fn)) { + greeting_client_fn() + } else { + client(tools = NULL) + } stream <- greeting_client$stream_async(GREETING_PROMPT) p <- shinychat::chat_set_greeting( "chat", diff --git a/pkg-r/inst/prompts/greeting.md b/pkg-r/inst/prompts/greeting.md new file mode 100644 index 00000000..9ac394ff --- /dev/null +++ b/pkg-r/inst/prompts/greeting.md @@ -0,0 +1,70 @@ +You are a friendly data assistant. Write a warm welcome greeting for a user who is about to explore their data. + +{{#has_tables}} +You have access to a {{db_type}} database with the following tables: + +{{/has_tables}} +{{^has_tables}} +You have access to a {{db_type}} database. + +{{/has_tables}} +{{#has_data_dicts}} +{{{data_dicts}}} + +{{/has_data_dicts}} +{{^has_data_dicts}} +{{#has_tables}} + +{{{tables_overview}}} + + +{{/has_tables}} +{{/has_data_dicts}} +{{#data_description}} + +{{{data_description}}} + + +{{/data_description}} +Your greeting should be brief, warm, and focused on what the user can do with this data. Mention 2–4 concrete things the user might want to explore or ask about. + +### Providing Suggestions for Next Steps + +#### Suggestion Syntax + +Use `` tags to create clickable suggestion buttons in the UI. The text inside should be a complete, actionable suggestion that users can click to continue the conversation. + +**List format (most common):** +``` +
      +
    • Show me examples of …
    • +
    • What are the key differences between …
    • +
    • Explain how …
    • +
    +``` + +Use explicit HTML `
      `/`
    • ` tags instead of markdown list markers (`*`, `-`). Markdown lists work when formatted correctly, but omitting the space after the marker (e.g., `-` instead of `- `) silently breaks the list parse, so HTML tags are more reliable. + +**Grouped suggestions:** +``` +##### Explore the data +
        +
      • What tables are available?
      • +
      • What columns does … have?
      • +
      + +##### Analyze the data +
        +
      • What's the average …?
      • +
      • How many …?
      • +
      +``` + +#### Suggestion Guidelines + +- Use list format with 2–4 concrete, actionable suggestions grouped under `#####` headings +- Write suggestions as complete, natural prompts (not fragments) +- Include at least one suggestion encouraging the user to explore what data and questions are available +- Never use nested lists for suggestions — group them under headings instead +- Never use generic phrases like "If you'd like to..." — provide concrete suggestions +- Never refer to suggestions as "prompts" — call them "suggestions" or "ideas" or similar diff --git a/pkg-r/man/QueryChat.Rd b/pkg-r/man/QueryChat.Rd index c6f759de..915b4f51 100644 --- a/pkg-r/man/QueryChat.Rd +++ b/pkg-r/man/QueryChat.Rd @@ -109,6 +109,9 @@ qc <- QueryChat$new(con, "mtcars") \section{Active bindings}{ \if{html}{\out{
      }} \describe{ + \item{\code{greeter}}{The QueryChatGreeter controlling greeting generation; +access its \verb{$tables} and \verb{$prompt}.} + \item{\code{system_prompt}}{Get the system prompt.} \item{\code{data_source}}{Removed. Use \verb{$add_table()} and \verb{$remove_table()} to manage tables.} @@ -227,7 +230,12 @@ or \code{FALSE} to never clean up automatically.} Add a table to this QueryChat instance. \subsection{Usage}{ \if{html}{\out{
      }} - \preformatted{QueryChat$add_table(data_source, table_name, replace = FALSE)} + \preformatted{QueryChat$add_table( + data_source, + table_name, + replace = FALSE, + include_in_greeting = FALSE +)} \if{html}{\out{
      }} } \subsection{Arguments}{ @@ -237,6 +245,8 @@ or \code{FALSE} to never clean up automatically.} \item{\code{table_name}}{The SQL table name for this data source.} \item{\code{replace}}{Whether to replace an existing table with this name. Default is \code{FALSE}.} + \item{\code{include_in_greeting}}{Whether to include this table in the greeting +context. Default is \code{FALSE}.} } \if{html}{\out{
      }} } @@ -256,7 +266,12 @@ system prompt exactly once after all tables have been staged, avoiding N-1 spurious intermediate rebuilds. \subsection{Usage}{ \if{html}{\out{
      }} - \preformatted{QueryChat$add_tables(conn, tables = NULL, replace = FALSE)} + \preformatted{QueryChat$add_tables( + conn, + tables = NULL, + replace = FALSE, + include_in_greeting = FALSE +)} \if{html}{\out{
      }} } \subsection{Arguments}{ @@ -268,6 +283,10 @@ individual data frames or other sources via \verb{$add_table()}.} by \code{DBI::dbListTables(conn)} are used.} \item{\code{replace}}{Whether to replace existing tables with the same name. Default is \code{FALSE}.} + \item{\code{include_in_greeting}}{Whether to include added tables in the greeting +context. \code{TRUE} includes all tables; \code{FALSE} (default) includes none; +a character vector includes only those named tables (intersected with +the tables being added). Any other type raises an error.} } \if{html}{\out{}} } diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index 99313737..ec6922df 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -1039,3 +1039,276 @@ describe("QueryChat$add_tables()", { expect_length(multi_table_warns, 1L) }) }) + +describe("QueryChatGreeter", { + skip_if_no_dataframe_engine() + + local_multi_table_conn_greeter <- function(env = parent.frame()) { + skip_if_not_installed("RSQLite") + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn), envir = env) + DBI::dbWriteTable( + conn, + "orders", + data.frame(id = 1:2, amount = c(9.99, 4.50)) + ) + DBI::dbWriteTable( + conn, + "customers", + data.frame(id = 1:2, name = c("Alice", "Bob")) + ) + conn + } + + it("constructor table is always present in greeter$tables", { + qc <- local_querychat(new_test_df(), "test_table", greeting = "hi") + expect_true("test_table" %in% qc$greeter$tables) + }) + + it("user-supplied greeting survives construction and mutation of greeter fields", { + qc <- local_querychat( + new_test_df(), + "test_table", + greeting = "Preset greeting" + ) + expect_equal(qc$greeting, "Preset greeting") + + qc$greeter$tables <- c("test_table", "other") + expect_equal(qc$greeting, "Preset greeting") + + qc$greeter$prompt <- "New prompt text" + expect_equal(qc$greeting, "Preset greeting") + }) + + it("add_table with include_in_greeting = TRUE adds to greeter$tables", { + qc <- local_querychat(new_test_df(), "base_table", greeting = "hi") + extra <- new_test_df() + qc$add_table(extra, "extra_table", include_in_greeting = TRUE) + expect_true("extra_table" %in% qc$greeter$tables) + }) + + it("add_table with default include_in_greeting does NOT add to greeter$tables", { + qc <- local_querychat(new_test_df(), "base_table", greeting = "hi") + extra <- new_test_df() + qc$add_table(extra, "hidden_table") + expect_false("hidden_table" %in% qc$greeter$tables) + }) + + it("add_table with non-logical include_in_greeting errors", { + qc <- local_querychat(new_test_df(), "base_table", greeting = "hi") + extra <- new_test_df() + expect_error( + qc$add_table(extra, "extra_table", include_in_greeting = "yes"), + "include_in_greeting" + ) + }) + + it("add_tables with include_in_greeting = TRUE adds all tables", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + suppressWarnings(qc$add_tables(conn, include_in_greeting = TRUE)) + expect_true("orders" %in% qc$greeter$tables) + expect_true("customers" %in% qc$greeter$tables) + }) + + it("add_tables with include_in_greeting = FALSE adds no tables to greeter", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + suppressWarnings(qc$add_tables(conn, include_in_greeting = FALSE)) + expect_false("orders" %in% qc$greeter$tables) + expect_false("customers" %in% qc$greeter$tables) + }) + + it("add_tables with include_in_greeting as character includes only named subset", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + suppressWarnings(qc$add_tables(conn, include_in_greeting = "orders")) + expect_true("orders" %in% qc$greeter$tables) + expect_false("customers" %in% qc$greeter$tables) + }) + + it("add_tables with non-logical, non-character include_in_greeting errors", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + expect_error( + suppressWarnings(qc$add_tables(conn, include_in_greeting = 1)), + "include_in_greeting" + ) + }) + + it("add_tables leaves state unchanged when include_in_greeting is invalid", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + expect_error( + suppressWarnings(qc$add_tables(conn, include_in_greeting = 1)), + "include_in_greeting" + ) + expect_false("orders" %in% qc$greeter$tables) + expect_false("customers" %in% qc$greeter$tables) + suppressWarnings(qc$add_tables(conn, include_in_greeting = TRUE)) + expect_true("orders" %in% qc$greeter$tables) + expect_true("customers" %in% qc$greeter$tables) + }) + + it("greeting prompt omits dicts that describe only excluded tables", { + conn <- local_multi_table_conn_greeter() + orders_yaml <- withr::local_tempfile(fileext = ".yaml") + writeLines( + c( + "name: orders_dict", + "description: ORDERS_DICT_DESC", + "tables:", + " orders:", + " description: Orders info" + ), + orders_yaml + ) + customers_yaml <- withr::local_tempfile(fileext = ".yaml") + writeLines( + c( + "name: customers_dict", + "description: CUSTOMERS_DICT_DESC", + "tables:", + " customers:", + " description: Customers info" + ), + customers_yaml + ) + + qc <- QueryChat$new( + NULL, + "placeholder", + greeting = "hi", + data_dict = list(orders_yaml, customers_yaml) + ) + qc$add_tables(conn, include_in_greeting = "orders") + + prompt <- qc$.__enclos_env__$private$build_greeting_client()$get_system_prompt() + expect_true(grepl("ORDERS_DICT_DESC", prompt)) + expect_false(grepl("CUSTOMERS_DICT_DESC", prompt)) + }) + + it("greeting prompt keeps a global dict description but drops relationships/glossary", { + conn <- local_multi_table_conn_greeter() + global_yaml <- withr::local_tempfile(fileext = ".yaml") + writeLines( + c( + "name: domain", + "description: GLOBAL_DOMAIN_DESC", + "glossary:", + " ARR: GLOSSARY_ARR_DEF" + ), + global_yaml + ) + orders_yaml <- withr::local_tempfile(fileext = ".yaml") + writeLines( + c( + "name: orders_dict", + "description: ORDERS_DICT_DESC", + "tables:", + " orders:", + " description: Orders info", + "relationships:", + " - join: orders.id = customers.id", + " description: REL_DESC" + ), + orders_yaml + ) + + qc <- QueryChat$new( + NULL, + "placeholder", + greeting = "hi", + data_dict = list(global_yaml, orders_yaml) + ) + qc$add_tables(conn, include_in_greeting = "orders") + + prompt <- qc$.__enclos_env__$private$build_greeting_client()$get_system_prompt() + expect_true(grepl("GLOBAL_DOMAIN_DESC", prompt)) + expect_true(grepl("ORDERS_DICT_DESC", prompt)) + expect_false(grepl("GLOSSARY_ARR_DEF", prompt)) + expect_false(grepl("REL_DESC", prompt)) + }) + + it("generate_greeting() uses greeting system prompt, writes to qc$greeting, returns text", { + client <- mock_ellmer_chat_client( + public = list( + chat = function(message, ...) { + expect_equal(message, GREETING_PROMPT) + "Hello from greeting mock!" + } + ) + ) + + qc <- QueryChat$new(new_test_df(), "test_table", client = client) + withr::defer(qc$cleanup()) + + greeting_client <- qc$.__enclos_env__$private$build_greeting_client() + greeting_system_prompt <- greeting_client$get_system_prompt() + expect_false(grepl("querychat_get_schema", greeting_system_prompt)) + expect_true(grepl("test_table", greeting_system_prompt)) + + result <- qc$generate_greeting() + + expect_equal(result, "Hello from greeting mock!") + expect_equal(qc$greeting, "Hello from greeting mock!") + }) + + it("generate_greeting() with empty greeter$tables succeeds without error", { + client <- mock_ellmer_chat_client( + public = list( + chat = function(message, ...) "Generic greeting with no tables." + ) + ) + + qc <- QueryChat$new(new_test_df(), "test_table", client = client) + withr::defer(qc$cleanup()) + + qc$greeter$tables <- character() + + prompt <- qc$.__enclos_env__$private$build_greeting_client()$get_system_prompt() + expect_false(grepl("following tables", prompt)) + expect_false(grepl("SQL SQL", prompt)) + + expect_no_error(qc$generate_greeting()) + expect_equal(qc$greeting, "Generic greeting with no tables.") + }) + + it("greeting prompt keeps a global dict description with no tables included", { + global_yaml <- withr::local_tempfile(fileext = ".yaml") + writeLines( + c( + "name: domain", + "description: GLOBAL_DOMAIN_DESC", + "glossary:", + " ARR: GLOSSARY_ARR_DEF" + ), + global_yaml + ) + + qc <- QueryChat$new( + new_test_df(), + "test_table", + greeting = "hi", + data_dict = list(global_yaml) + ) + withr::defer(qc$cleanup()) + qc$greeter$tables <- character() + + prompt <- qc$.__enclos_env__$private$build_greeting_client()$get_system_prompt() + expect_true(grepl("GLOBAL_DOMAIN_DESC", prompt)) + expect_false(grepl("GLOSSARY_ARR_DEF", prompt)) + expect_false(grepl("following tables", prompt)) + }) + + it("remove_table prunes the table from greeter$tables", { + conn <- local_multi_table_conn_greeter() + qc <- QueryChat$new(NULL, "placeholder", greeting = "hi") + suppressWarnings(qc$add_tables(conn, include_in_greeting = TRUE)) + expect_true("orders" %in% qc$greeter$tables) + + qc$remove_table("orders") + expect_false("orders" %in% qc$greeter$tables) + expect_true("customers" %in% qc$greeter$tables) + }) +}) diff --git a/pkg-r/vignettes/greet.Rmd b/pkg-r/vignettes/greet.Rmd index 35649dc5..42a7f575 100644 --- a/pkg-r/vignettes/greet.Rmd +++ b/pkg-r/vignettes/greet.Rmd @@ -77,3 +77,37 @@ querychat_app( greeting = "penguins_greeting.md" ) ``` + +## Greetings with multiple tables + +The generated greeting is *schema-aware*: querychat shares the schema of the +relevant tables with the model so the opening message can describe the data +it's about to help you explore. Tables passed to `QueryChat$new()` are included +in the greeting automatically. + +Tables added later with `$add_table()` or `$add_tables()` are **not** included +by default — pass `include_in_greeting = TRUE` to opt them in: + +```{r} +qc <- QueryChat$new(orders, "orders") # included automatically +qc$add_table(customers, "customers") # not included by default +qc$add_table(products, "products", include_in_greeting = TRUE) # opted in + +qc$greeter$tables +#> [1] "orders" "products" +``` + +For `$add_tables()`, `include_in_greeting` can also be a character vector +naming which of the added tables to include: + +```{r} +qc$add_tables(con, include_in_greeting = c("orders", "customers")) +``` + +You can also set the included tables directly, or swap in a custom greeting +template, through `qc$greeter`: + +```{r} +qc$greeter$tables <- c("orders", "customers") +qc$greeter$prompt <- "my-greeting-template.md" +```