diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e201f7842..458d89add 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -651,6 +651,14 @@ pub enum JsonPathElem { /// The expression used as the bracket key (string or numeric expression). key: Expr, }, + /// Access an object field using colon bracket notation + /// e.g. `obj:['foo']` + /// + /// See + ColonBracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, } /// A JSON path. @@ -685,6 +693,9 @@ impl fmt::Display for JsonPath { JsonPathElem::Bracket { key } => { write!(f, "[{key}]")?; } + JsonPathElem::ColonBracket { key } => { + write!(f, ":[{key}]")?; + } } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 57d57b249..61d3926bb 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1797,6 +1797,7 @@ impl Spanned for JsonPathElem { match self { JsonPathElem::Dot { .. } => Span::empty(), JsonPathElem::Bracket { key } => key.span(), + JsonPathElem::ColonBracket { key } => key.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eaaa95ec8..6007d5257 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4191,8 +4191,9 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), + // path segments in SF dot notation can be unquoted or double-quoted; + // Databricks also supports backtick-quoted identifiers + quote_style: quote_style @ (Some('"') | Some('`') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. keyword: _, @@ -4222,6 +4223,12 @@ impl<'a> Parser<'a> { let mut path = Vec::new(); loop { match self.next_token().token { + Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => { + self.next_token(); + let key = self.parse_wildcard_expr()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::ColonBracket { key }); + } Token::Colon if path.is_empty() => { path.push(self.parse_json_path_object_key()?); } @@ -4229,7 +4236,7 @@ impl<'a> Parser<'a> { path.push(self.parse_json_path_object_key()?); } Token::LBracket => { - let key = self.parse_expr()?; + let key = self.parse_wildcard_expr()?; self.expect_token(&Token::RBracket)?; path.push(JsonPathElem::Bracket { key }); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 24d06ef2f..79b3d0654 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -600,3 +600,47 @@ fn parse_databricks_struct_type() { _ => unreachable!(), } } + +#[test] +fn parse_databricks_json_accessor() { + // Basic colon accessor — unquoted field names are case-insensitive + databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); + + // Unquoted field access is case-insensitive; bracket notation is case-sensitive. + databricks().verified_only_select( + "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data", + ); + + // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#, + ); + + // Dot notation + databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); + + // String-key bracket notation after a dot segment + databricks() + .verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data"); + + // Integer-index bracket notation + databricks() + .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data"); + + // Wildcard [*] — including chained and mixed positions + databricks().verified_only_select( + "SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \ + raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \ + raw:store.basket[0][2].b AS subfield FROM store_data", + ); + + // Dot access following a wildcard bracket + databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data"); + + // Double-colon cast — type keyword normalises to upper case + databricks().one_statement_parses_to( + "SELECT raw:store.bicycle.price::double FROM store_data", + "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", + ); +}