From 34d337e54ca02ff3bad163219a1ceefa185b6f6f Mon Sep 17 00:00:00 2001 From: whirlun Date: Fri, 6 Mar 2026 16:10:46 -0800 Subject: [PATCH 1/4] add databricks JSON accessors --- src/parser/mod.rs | 25 +++++- tests/sqlparser_databricks.rs | 153 ++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eaaa95ec8..0a999aa7c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4191,8 +4191,9 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), + // path segments in SF dot notation can be unquoted or double-quoted; + // Databricks also supports backtick-quoted identifiers + quote_style: quote_style @ (Some('"') | Some('`') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. keyword: _, @@ -4210,6 +4211,15 @@ impl<'a> Parser<'a> { } } + fn parse_json_path_bracket_key(&mut self) -> Result { + // Databricks supports [*] wildcard accessor + if self.consume_token(&Token::Mul) { + Ok(Expr::Wildcard(AttachedToken::empty())) + } else { + self.parse_expr() + } + } + fn parse_json_access(&mut self, expr: Expr) -> Result { let path = self.parse_json_path()?; Ok(Expr::JsonAccess { @@ -4223,13 +4233,20 @@ impl<'a> Parser<'a> { loop { match self.next_token().token { Token::Colon if path.is_empty() => { - path.push(self.parse_json_path_object_key()?); + if self.peek_token_ref().token == Token::LBracket { + self.next_token(); + let key = self.parse_json_path_bracket_key()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::Bracket { key }); + } else { + path.push(self.parse_json_path_object_key()?); + } } Token::Period if !path.is_empty() => { path.push(self.parse_json_path_object_key()?); } Token::LBracket => { - let key = self.parse_expr()?; + let key = self.parse_json_path_bracket_key()?; self.expect_token(&Token::RBracket)?; path.push(JsonPathElem::Bracket { key }); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 24d06ef2f..2c2c91575 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -600,3 +600,156 @@ fn parse_databricks_struct_type() { _ => unreachable!(), } } + +// https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html +#[test] +fn parse_databricks_json_accessor() { + // Basic colon accessor — unquoted field names are case-insensitive + databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); + + // Unquoted field access is case-insensitive. Bracket notation (`raw:['OWNER']`) also + // parses successfully; its AST is asserted below. + databricks().verified_only_select("SELECT raw:OWNER FROM store_data"); + databricks() + .parse_sql_statements("SELECT raw:['OWNER'] FROM store_data") + .unwrap(); + + // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`Zip Code` FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code" FROM store_data"#, + ); + // A colon inside a string literal key is parsed as part of the string, not as an operator. + databricks() + .parse_sql_statements("SELECT raw:['fb:testid'] FROM store_data") + .unwrap(); + + // Dot notation + databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); + + // String-key bracket notation after a dot segment + databricks() + .verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data"); + + // Integer-index bracket notation + databricks() + .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data"); + + // Wildcard [*] — including chained and mixed positions + databricks().verified_only_select( + "SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \ + raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \ + raw:store.basket[0][2].b AS subfield FROM store_data", + ); + + // Dot access following a wildcard bracket + databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data"); + + // Double-colon cast — type keyword normalises to upper case + databricks().one_statement_parses_to( + "SELECT raw:store.bicycle.price::double FROM store_data", + "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", + ); + + // --- AST structure assertions --- + + // Simple dot access + assert_eq!( + databricks().verified_expr("raw:owner"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "owner".to_owned(), + quoted: false, + }], + }, + } + ); + + // Multi-level dot access + assert_eq!( + databricks().verified_expr("raw:store.bicycle"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "store".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bicycle".to_owned(), + quoted: false, + }, + ], + }, + } + ); + + // Dot path followed by an integer-index bracket + assert_eq!( + databricks().verified_expr("raw:store.fruit[0]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "store".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "fruit".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::value(number("0")), + }, + ], + }, + } + ); + + // [*] is stored as Expr::Wildcard inside a Bracket element + assert_eq!( + databricks().verified_expr("raw:store.basket[*]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "store".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "basket".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::Wildcard(AttachedToken::empty()), + }, + ], + }, + } + ); + + // raw:['OWNER'] — bracket as the first path element (directly after the colon) + let select = databricks() + .parse_sql_statements("SELECT raw:['OWNER'] FROM t") + .unwrap(); + if let Statement::Query(q) = &select[0] { + if let SetExpr::Select(sel) = q.body.as_ref() { + assert_eq!( + sel.projection[0], + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::value(Value::SingleQuotedString("OWNER".to_owned())), + }], + }, + }) + ); + } + } +} From c7a11276e70d4ca731af9c1e201d6019e48ffcca Mon Sep 17 00:00:00 2001 From: whirlun Date: Sun, 8 Mar 2026 14:03:39 -0700 Subject: [PATCH 2/4] preserve colon for syntax roundtrip --- src/parser/mod.rs | 6 ++++ tests/sqlparser_databricks.rs | 52 ++++++++++++++++------------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0a999aa7c..d4b85cc23 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4234,6 +4234,12 @@ impl<'a> Parser<'a> { match self.next_token().token { Token::Colon if path.is_empty() => { if self.peek_token_ref().token == Token::LBracket { + // A bracket element directly after the colon, e.g. `raw:['field']`. + // Push an empty Dot so the display re-emits the leading `:` for syntax roundtrip. + path.push(JsonPathElem::Dot { + key: String::new(), + quoted: false, + }); self.next_token(); let key = self.parse_json_path_bracket_key()?; self.expect_token(&Token::RBracket)?; diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 2c2c91575..a1cb34ba0 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -607,22 +607,16 @@ fn parse_databricks_json_accessor() { // Basic colon accessor — unquoted field names are case-insensitive databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); - // Unquoted field access is case-insensitive. Bracket notation (`raw:['OWNER']`) also - // parses successfully; its AST is asserted below. - databricks().verified_only_select("SELECT raw:OWNER FROM store_data"); - databricks() - .parse_sql_statements("SELECT raw:['OWNER'] FROM store_data") - .unwrap(); + // Unquoted field access is case-insensitive; bracket notation is case-sensitive. + databricks().verified_only_select( + "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data", + ); // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. databricks().one_statement_parses_to( - "SELECT raw:`zip code`, raw:`Zip Code` FROM store_data", - r#"SELECT raw:"zip code", raw:"Zip Code" FROM store_data"#, + "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#, ); - // A colon inside a string literal key is parsed as part of the string, not as an operator. - databricks() - .parse_sql_statements("SELECT raw:['fb:testid'] FROM store_data") - .unwrap(); // Dot notation databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); @@ -733,23 +727,23 @@ fn parse_databricks_json_accessor() { } ); - // raw:['OWNER'] — bracket as the first path element (directly after the colon) - let select = databricks() - .parse_sql_statements("SELECT raw:['OWNER'] FROM t") - .unwrap(); - if let Statement::Query(q) = &select[0] { - if let SetExpr::Select(sel) = q.body.as_ref() { - assert_eq!( - sel.projection[0], - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![JsonPathElem::Bracket { - key: Expr::value(Value::SingleQuotedString("OWNER".to_owned())), - }], + // raw:['OWNER'] — bracket directly after the colon. An empty-key sentinel Dot is prepended + // so that the display re-emits the leading `:`, enabling a correct round-trip. + assert_eq!( + databricks().verified_expr("raw:['OWNER']"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("raw"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: String::new(), + quoted: false, }, - }) - ); + JsonPathElem::Bracket { + key: Expr::value(Value::SingleQuotedString("OWNER".to_owned())), + }, + ], + }, } - } + ); } From 6397d3cff0b5f4c5ac32163c3529123e96efae99 Mon Sep 17 00:00:00 2001 From: whirlun Date: Mon, 9 Mar 2026 13:37:26 -0700 Subject: [PATCH 3/4] add JsonPathElem::ColonBracket variant and remove AST tests --- src/ast/mod.rs | 11 ++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 32 +++-------- tests/sqlparser_databricks.rs | 102 ---------------------------------- 4 files changed, 20 insertions(+), 126 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e201f7842..458d89add 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -651,6 +651,14 @@ pub enum JsonPathElem { /// The expression used as the bracket key (string or numeric expression). key: Expr, }, + /// Access an object field using colon bracket notation + /// e.g. `obj:['foo']` + /// + /// See + ColonBracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, } /// A JSON path. @@ -685,6 +693,9 @@ impl fmt::Display for JsonPath { JsonPathElem::Bracket { key } => { write!(f, "[{key}]")?; } + JsonPathElem::ColonBracket { key } => { + write!(f, ":[{key}]")?; + } } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 57d57b249..61d3926bb 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1797,6 +1797,7 @@ impl Spanned for JsonPathElem { match self { JsonPathElem::Dot { .. } => Span::empty(), JsonPathElem::Bracket { key } => key.span(), + JsonPathElem::ColonBracket { key } => key.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d4b85cc23..6007d5257 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4211,15 +4211,6 @@ impl<'a> Parser<'a> { } } - fn parse_json_path_bracket_key(&mut self) -> Result { - // Databricks supports [*] wildcard accessor - if self.consume_token(&Token::Mul) { - Ok(Expr::Wildcard(AttachedToken::empty())) - } else { - self.parse_expr() - } - } - fn parse_json_access(&mut self, expr: Expr) -> Result { let path = self.parse_json_path()?; Ok(Expr::JsonAccess { @@ -4232,27 +4223,20 @@ impl<'a> Parser<'a> { let mut path = Vec::new(); loop { match self.next_token().token { + Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => { + self.next_token(); + let key = self.parse_wildcard_expr()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::ColonBracket { key }); + } Token::Colon if path.is_empty() => { - if self.peek_token_ref().token == Token::LBracket { - // A bracket element directly after the colon, e.g. `raw:['field']`. - // Push an empty Dot so the display re-emits the leading `:` for syntax roundtrip. - path.push(JsonPathElem::Dot { - key: String::new(), - quoted: false, - }); - self.next_token(); - let key = self.parse_json_path_bracket_key()?; - self.expect_token(&Token::RBracket)?; - path.push(JsonPathElem::Bracket { key }); - } else { - path.push(self.parse_json_path_object_key()?); - } + path.push(self.parse_json_path_object_key()?); } Token::Period if !path.is_empty() => { path.push(self.parse_json_path_object_key()?); } Token::LBracket => { - let key = self.parse_json_path_bracket_key()?; + let key = self.parse_wildcard_expr()?; self.expect_token(&Token::RBracket)?; path.push(JsonPathElem::Bracket { key }); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index a1cb34ba0..041430656 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,106 +644,4 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::double FROM store_data", "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); - - // --- AST structure assertions --- - - // Simple dot access - assert_eq!( - databricks().verified_expr("raw:owner"), - Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "owner".to_owned(), - quoted: false, - }], - }, - } - ); - - // Multi-level dot access - assert_eq!( - databricks().verified_expr("raw:store.bicycle"), - Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "store".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "bicycle".to_owned(), - quoted: false, - }, - ], - }, - } - ); - - // Dot path followed by an integer-index bracket - assert_eq!( - databricks().verified_expr("raw:store.fruit[0]"), - Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "store".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "fruit".to_owned(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::value(number("0")), - }, - ], - }, - } - ); - - // [*] is stored as Expr::Wildcard inside a Bracket element - assert_eq!( - databricks().verified_expr("raw:store.basket[*]"), - Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "store".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "basket".to_owned(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::Wildcard(AttachedToken::empty()), - }, - ], - }, - } - ); - - // raw:['OWNER'] — bracket directly after the colon. An empty-key sentinel Dot is prepended - // so that the display re-emits the leading `:`, enabling a correct round-trip. - assert_eq!( - databricks().verified_expr("raw:['OWNER']"), - Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("raw"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: String::new(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::value(Value::SingleQuotedString("OWNER".to_owned())), - }, - ], - }, - } - ); } From b56a683900da1802c1d19024c7f5c17439e9ed10 Mon Sep 17 00:00:00 2001 From: whirlun Date: Mon, 9 Mar 2026 13:38:24 -0700 Subject: [PATCH 4/4] remove doc link from test --- tests/sqlparser_databricks.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 041430656..79b3d0654 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -601,7 +601,6 @@ fn parse_databricks_struct_type() { } } -// https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html #[test] fn parse_databricks_json_accessor() { // Basic colon accessor — unquoted field names are case-insensitive