Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit 30be632

Browse files
committed
fix edge cases for DO, VACUUM, REVOKE, permissions/roles and multi role parsing
1 parent 509b927 commit 30be632

8 files changed

Lines changed: 773 additions & 211 deletions

File tree

README.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,27 @@ stmt = (sqlglot.select(["id", "name"])
113113
diff = sqlglot.diff("SELECT id FROM users", "SELECT user_id FROM users")
114114
```
115115

116+
### Unix Pipes
117+
118+
libsqlglot can be used in Unix pipelines via the Python CLI:
119+
120+
```bash
121+
# Basic transpilation
122+
echo "SELECT \`id\` FROM \`users\`" | python3 -m libsqlglot -r mysql -w postgres
123+
# Output: SELECT "id" FROM "users"
124+
125+
# Pipeline with grep
126+
cat queries.sql | python3 -m libsqlglot -r mysql -w bigquery | grep "SELECT"
127+
128+
# Process multiple files
129+
cat *.sql | python3 -m libsqlglot -r sqlserver -w postgres > output.sql
130+
131+
# Combine with other tools
132+
find . -name "*.sql" -exec cat {} \; | python3 -m libsqlglot -r mysql -w postgres | wc -l
133+
```
134+
135+
The CLI reads SQL from stdin and writes transpiled SQL to stdout, making it composable with standard Unix tools.
136+
116137
See [Supported SQL dialects](#supported-sql-dialects) for all available dialect names.
117138

118139
**Python API**: `parse()`, `parse_one()`, `generate()`, `transpile()`, `optimize()`, `diff()`, `.sql()`, `.find_all()`, `.walk()`, `select()` builder.
@@ -218,7 +239,7 @@ cmake --build build
218239

219240
## Architecture
220241

221-
Header-only design: you only pay for what you use. 19 header files, no `.cpp`. See `include/libsqlglot/` for the full layout. Core files: `parser.h` (4016 lines), `generator.h` (2092), `expression.h` (1376, 115 expression types). Entry point is `transpiler.h` (86 lines).
242+
Header-only design: you only pay for what you use. 19 header files, no `.cpp`. See `include/libsqlglot/` for the full layout. Core files: `parser.h` (4157 lines), `generator.h` (2137), `expression.h` (1385, 115 expression types). Entry point is `transpiler.h` (86 lines).
222243

223244
### Memory management
224245

@@ -591,9 +612,9 @@ These dialects inherit features from a compatible base dialect and add specific
591612

592613
# Contributing
593614

594-
libsqlglot is a solo project. Bug reports, test cases, and dialect edge cases are welcome via GitHub issues.
615+
libsqlglot is a solo project. Bug reports, test cases, and dialect edge cases are welcome via GitHub issues. If you have a dialect you wish to see added, please open an issue or PR.
595616

596-
If you find a query that parses incorrectly, or a dialect transformation that produces wrong output, please open an issue with the input SQL, source dialect, target dialect, and expected output.
617+
If a query parses incorrectly, or a dialect transformation that produces wrong output, please open an issue with the input SQL, source dialect, target dialect, expected output and any other pertinent details.
597618

598619
Pull requests are considered but there is no guarantee of merge. The codebase is intentionally small and opinionated.
599620

include/libsqlglot/expression.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,24 +1121,33 @@ struct CreateIndexAdvStmt : Expression {
11211121
/// DO block (PostgreSQL anonymous code block)
11221122
struct DoBlockStmt : Expression {
11231123
std::string language; // plpgsql, sql, etc.
1124-
std::vector<Expression*> statements; // Block body
1124+
bool language_explicit = false; // Was LANGUAGE explicitly specified?
1125+
std::vector<Expression*> statements; // Block body (parsed statements)
1126+
std::string raw_body; // Raw body text (for dollar-quoted strings)
1127+
std::string delimiter; // Dollar quote delimiter (e.g., "$$", "$custom$")
11251128

11261129
DoBlockStmt()
11271130
: Expression(ExprType::DO_BLOCK) {}
11281131
};
11291132

11301133
/// ANALYZE statement
11311134
struct AnalyzeStmt : Expression {
1132-
std::string table; // Optional table name
1135+
std::string table; // Primary table name
1136+
std::vector<std::string> tables; // Multiple tables (if supported)
11331137
std::vector<std::string> columns; // Optional column list
1138+
bool verbose = false; // VERBOSE option
1139+
bool local = false; // LOCAL option (MySQL)
1140+
bool no_write_to_binlog = false; // NO_WRITE_TO_BINLOG (MySQL)
1141+
bool has_table_keyword = false; // Was TABLE keyword used? (MySQL)
11341142

11351143
AnalyzeStmt()
11361144
: Expression(ExprType::ANALYZE_STMT) {}
11371145
};
11381146

11391147
/// VACUUM statement (PostgreSQL-specific)
11401148
struct VacuumStmt : Expression {
1141-
std::string table; // Optional table name
1149+
std::string table; // Primary table name
1150+
std::vector<std::string> tables; // Multiple tables (if supported)
11421151
std::vector<std::string> columns; // Optional column list (requires ANALYZE)
11431152

11441153
// Boolean options
@@ -1159,7 +1168,7 @@ struct VacuumStmt : Expression {
11591168
IndexCleanup index_cleanup = IndexCleanup::AUTO;
11601169

11611170
// Use parenthesized syntax (PostgreSQL 9.0+) vs legacy syntax
1162-
bool use_parenthesized_syntax = true;
1171+
bool use_parenthesized_syntax = false; // Default to legacy syntax
11631172

11641173
VacuumStmt()
11651174
: Expression(ExprType::VACUUM_STMT) {}

include/libsqlglot/generator.h

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,24 +1646,66 @@ class Generator {
16461646

16471647
void visit_do_block(const DoBlockStmt* stmt) {
16481648
sql_ << "DO";
1649-
if (!stmt->language.empty() && stmt->language != "plpgsql") {
1649+
if (stmt->language_explicit && !stmt->language.empty()) {
16501650
sql_ << " LANGUAGE " << stmt->language;
16511651
}
1652-
sql_ << " $$";
1653-
for (auto* s : stmt->statements) {
1652+
1653+
// Use raw_body if available (from dollar-quoted strings)
1654+
if (!stmt->raw_body.empty()) {
16541655
sql_ << " ";
1655-
visit(s);
1656-
sql_ << ";";
1656+
if (!stmt->delimiter.empty()) {
1657+
sql_ << stmt->delimiter;
1658+
} else {
1659+
sql_ << "$$";
1660+
}
1661+
sql_ << stmt->raw_body;
1662+
if (!stmt->delimiter.empty()) {
1663+
sql_ << stmt->delimiter;
1664+
} else {
1665+
sql_ << "$$";
1666+
}
1667+
} else {
1668+
// Fallback to parsed statements
1669+
sql_ << " $$";
1670+
for (auto* s : stmt->statements) {
1671+
sql_ << " ";
1672+
visit(s);
1673+
sql_ << ";";
1674+
}
1675+
sql_ << " $$";
16571676
}
1658-
sql_ << " $$";
16591677
}
16601678

16611679
void visit_analyze(const AnalyzeStmt* stmt) {
16621680
sql_ << "ANALYZE";
1663-
if (!stmt->table.empty()) {
1664-
sql_ << " TABLE " << stmt->table;
1681+
1682+
// MySQL options: LOCAL or NO_WRITE_TO_BINLOG
1683+
if (stmt->local) {
1684+
sql_ << " LOCAL";
1685+
} else if (stmt->no_write_to_binlog) {
1686+
sql_ << " NO_WRITE_TO_BINLOG";
1687+
}
1688+
1689+
// PostgreSQL option: VERBOSE
1690+
if (stmt->verbose) {
1691+
sql_ << " VERBOSE";
1692+
}
1693+
1694+
// MySQL: TABLE keyword
1695+
if (stmt->has_table_keyword) {
1696+
sql_ << " TABLE";
1697+
}
1698+
1699+
// Table names (comma-separated)
1700+
if (!stmt->tables.empty()) {
1701+
for (size_t i = 0; i < stmt->tables.size(); ++i) {
1702+
if (i > 0) sql_ << ",";
1703+
sql_ << " " << stmt->tables[i];
1704+
}
1705+
1706+
// Column list (only on first table)
16651707
if (!stmt->columns.empty()) {
1666-
sql_ << " (";
1708+
sql_ << "(";
16671709
for (size_t i = 0; i < stmt->columns.size(); ++i) {
16681710
if (i > 0) sql_ << ", ";
16691711
sql_ << stmt->columns[i];
@@ -1741,13 +1783,16 @@ class Generator {
17411783
if (stmt->analyze) sql_ << " ANALYZE";
17421784
}
17431785

1744-
// Table name
1745-
if (!stmt->table.empty()) {
1746-
sql_ << " " << stmt->table;
1786+
// Table names (comma-separated)
1787+
if (!stmt->tables.empty()) {
1788+
for (size_t i = 0; i < stmt->tables.size(); ++i) {
1789+
if (i > 0) sql_ << ",";
1790+
sql_ << " " << stmt->tables[i];
1791+
}
17471792

1748-
// Column list (only with ANALYZE)
1749-
if (stmt->analyze && !stmt->columns.empty()) {
1750-
sql_ << " (";
1793+
// Column list (allowed with or without ANALYZE)
1794+
if (!stmt->columns.empty()) {
1795+
sql_ << "(";
17511796
for (size_t i = 0; i < stmt->columns.size(); ++i) {
17521797
if (i > 0) sql_ << ", ";
17531798
sql_ << stmt->columns[i];
@@ -1780,7 +1825,7 @@ class Generator {
17801825
case GrantStmt::PrivilegeType::CREATEDB: return "CREATEDB";
17811826
case GrantStmt::PrivilegeType::REPLICATION: return "REPLICATION";
17821827
case GrantStmt::PrivilegeType::BYPASSRLS: return "BYPASSRLS";
1783-
case GrantStmt::PrivilegeType::ALL: return "ALL PRIVILEGES";
1828+
case GrantStmt::PrivilegeType::ALL: return "ALL";
17841829
case GrantStmt::PrivilegeType::ALL_PRIVILEGES: return "ALL PRIVILEGES";
17851830
case GrantStmt::PrivilegeType::INDEX: return "INDEX";
17861831
case GrantStmt::PrivilegeType::DEBUG: return "DEBUG";

0 commit comments

Comments
 (0)