Skip to content

Commit 2c8df8a

Browse files
committed
feat(spanner): add dialect-aware prompt generation
- Updates SQLGenBasePromptGenerator to detect Spanner dialect (GoogleSQL vs PostgreSQL) from db config. - Adds specific prompt templates for Spanner GoogleSQL (using backticks) and Spanner PostgreSQL (using quotes).
1 parent 2ba6d1c commit 2c8df8a

1 file changed

Lines changed: 61 additions & 1 deletion

File tree

evalbench/generators/prompts/sqlgenbase.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,21 +185,81 @@
185185
{USER_PROMPT}
186186
"""
187187

188+
SPANNER_GSQL_PROMPT_TEMPLATE_WITH_RULES = """You are a Cloud Spanner GoogleSQL expert.
189+
190+
The database structure is defined by the following table schemas:
191+
192+
**************************
193+
{SCHEMA}
194+
**************************
195+
196+
Please generate a GoogleSQL query for Cloud Spanner for the following question following these rules:
197+
- Output the query only without any explanation.
198+
- Do not use markdown code blocks around the outputted query.
199+
- Use backticks (`) around table and column names if they contain spaces or special characters, but avoid unnecessary quoting.
200+
- Spanner GoogleSQL does NOT support double quotes for identifiers.
201+
202+
SQL generation rules:
203+
- Use aliases for tables to avoid ambiguity.
204+
- Ensure that you are selecting the correct columns based on the provided schema.
205+
206+
Think step by step about generating a correct GoogleSQL query!
207+
208+
**************************
209+
210+
Here is the natural language question for generating SQL:
211+
{USER_PROMPT}"""
212+
213+
SPANNER_PG_PROMPT_TEMPLATE_WITH_RULES = """You are a Cloud Spanner PostgreSQL expert.
214+
215+
The database structure is defined by the following table schemas:
216+
217+
**************************
218+
{SCHEMA}
219+
**************************
220+
221+
Please generate a PostgreSQL query for Cloud Spanner for the following question following these rules:
222+
- Output the query only without any explanation.
223+
- Do not use markdown code blocks around the outputted query.
224+
- Always use quotes around table and column names.
225+
226+
SQL generation rules:
227+
- Use aliases for tables to avoid ambiguity.
228+
- Ensure that you are selecting the correct columns based on the provided schema.
229+
230+
Think step by step about generating a correct PostgreSQL query!
231+
232+
**************************
233+
234+
Here is the natural language question for generating SQL:
235+
{USER_PROMPT}"""
236+
188237
_PROMPTS_BY_DIALECT = {
189238
"sqlite": SQLITE_PROMPT_TEMPLATE_WITH_RULES,
190239
"postgres": PG_PROMPT_TEMPLATE_WITH_RULES,
191240
"mysql": MYSQL_PROMPT_TEMPLATE_WITH_RULES,
192241
"sqlserver": SQLSERVER_PROMPT_TEMPLATE_WITH_RULES,
193242
"bigquery": BIGQUERY_PROMPT_TEMPLATE_WITH_RULES,
194243
"mongodb": MONGODB_PROMPT_TEMPLATE_WITH_RULES,
244+
"spanner_pg": SPANNER_PG_PROMPT_TEMPLATE_WITH_RULES,
245+
"spanner_gsql": SPANNER_GSQL_PROMPT_TEMPLATE_WITH_RULES,
195246
}
196247

197248

198249
class SQLGenBasePromptGenerator(PromptGenerator):
199250
def __init__(self, db: DB, promptgenerator_config):
200251
super().__init__(db, promptgenerator_config)
201252
self.db = db
202-
self.base_prompt = _PROMPTS_BY_DIALECT[db.db_type]
253+
254+
# Dialect-aware prompt selection for Spanner
255+
if db.db_type == "spanner":
256+
dialect = db.config.get("dialect", "").lower()
257+
if "pg" in dialect or "postgres" in dialect:
258+
self.base_prompt = SPANNER_PG_PROMPT_TEMPLATE_WITH_RULES
259+
else:
260+
self.base_prompt = SPANNER_GSQL_PROMPT_TEMPLATE_WITH_RULES
261+
else:
262+
self.base_prompt = _PROMPTS_BY_DIALECT[db.db_type]
203263

204264
def setup(self):
205265
self.schema = self.db.get_ddl_from_db()

0 commit comments

Comments
 (0)