Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,14 @@ pub enum JsonPathElem {
/// The expression used as the bracket key (string or numeric expression).
key: Expr,
},
/// Access an object field using colon bracket notation
/// e.g. `obj:['foo']`
///
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>
ColonBracket {
/// The expression used as the bracket key (string or numeric expression).
key: Expr,
},
}

/// A JSON path.
Expand Down Expand Up @@ -685,6 +693,9 @@ impl fmt::Display for JsonPath {
JsonPathElem::Bracket { key } => {
write!(f, "[{key}]")?;
}
JsonPathElem::ColonBracket { key } => {
write!(f, ":[{key}]")?;
}
}
}
Ok(())
Expand Down
1 change: 1 addition & 0 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,7 @@ impl Spanned for JsonPathElem {
match self {
JsonPathElem::Dot { .. } => Span::empty(),
JsonPathElem::Bracket { key } => key.span(),
JsonPathElem::ColonBracket { key } => key.span(),
}
}
}
Expand Down
13 changes: 10 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4191,8 +4191,9 @@ impl<'a> Parser<'a> {
match token.token {
Token::Word(Word {
value,
// path segments in SF dot notation can be unquoted or double-quoted
quote_style: quote_style @ (Some('"') | None),
// path segments in SF dot notation can be unquoted or double-quoted;
// Databricks also supports backtick-quoted identifiers
quote_style: quote_style @ (Some('"') | Some('`') | None),
// some experimentation suggests that snowflake permits
// any keyword here unquoted.
keyword: _,
Expand Down Expand Up @@ -4222,14 +4223,20 @@ impl<'a> Parser<'a> {
let mut path = Vec::new();
loop {
match self.next_token().token {
Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => {
self.next_token();
let key = self.parse_wildcard_expr()?;
self.expect_token(&Token::RBracket)?;
path.push(JsonPathElem::ColonBracket { key });
}
Token::Colon if path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
Token::Period if !path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
Token::LBracket => {
let key = self.parse_expr()?;
let key = self.parse_wildcard_expr()?;
self.expect_token(&Token::RBracket)?;

path.push(JsonPathElem::Bracket { key });
Expand Down
44 changes: 44 additions & 0 deletions tests/sqlparser_databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,47 @@ fn parse_databricks_struct_type() {
_ => unreachable!(),
}
}

#[test]
fn parse_databricks_json_accessor() {
// Basic colon accessor — unquoted field names are case-insensitive
databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data");

// Unquoted field access is case-insensitive; bracket notation is case-sensitive.
databricks().verified_only_select(
"SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data",
);

// Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output.
databricks().one_statement_parses_to(
"SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data",
r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#,
);

// Dot notation
databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data");

// String-key bracket notation after a dot segment
databricks()
.verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data");

// Integer-index bracket notation
databricks()
.verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data");

// Wildcard [*] — including chained and mixed positions
databricks().verified_only_select(
"SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \
raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \
raw:store.basket[0][2].b AS subfield FROM store_data",
);

// Dot access following a wildcard bracket
databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data");

// Double-colon cast — type keyword normalises to upper case
databricks().one_statement_parses_to(
"SELECT raw:store.bicycle.price::double FROM store_data",
"SELECT raw:store.bicycle.price::DOUBLE FROM store_data",
);
}