Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ use sqlparser::ast::{
};

use datafusion_common::{
DFSchema, Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err,
plan_err,
DFSchema, Diagnostic, Result, ScalarValue, internal_datafusion_err, internal_err,
not_impl_err, plan_err,
};

use datafusion_expr::expr::ScalarFunction;
Expand Down Expand Up @@ -108,6 +108,44 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
StackEntry::Operator(op) => {
let right = eval_stack.pop().unwrap();
let left = eval_stack.pop().unwrap();
// Warn when `expr = NULL` or `NULL = expr` is used.
// Such comparisons always evaluate to NULL (never TRUE or
// FALSE), so the user almost certainly meant `IS NULL` /
// `IS NOT NULL` instead.
if matches!(op, BinaryOperator::Eq | BinaryOperator::NotEq) {
let right_is_null =
matches!(right, Expr::Literal(ScalarValue::Null, _));
let left_is_null =
matches!(left, Expr::Literal(ScalarValue::Null, _));
if right_is_null || left_is_null {
// Point the span at the non-null operand (the
// column or expression being compared), because
// the NULL literal itself carries no span.
let span = if right_is_null {
left.spans().and_then(|s| s.first())
} else {
right.spans().and_then(|s| s.first())
};
let (op_str, suggestion) =
if matches!(op, BinaryOperator::Eq) {
("=", "IS NULL")
} else {
("<>", "IS NOT NULL")
};
let mut warning = Diagnostic::new_warning(
format!(
"'{op_str} NULL' will always be NULL \
(null comparisons never return true or false)"
),
span,
);
warning.add_help(
format!("use '{suggestion}' instead"),
None,
);
planner_context.add_warning(warning);
}
}
let expr = self.build_logical_expr(op, left, right, schema)?;
eval_stack.push(expr);
}
Expand Down
35 changes: 34 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, Mutex};
use std::vec;

use crate::utils::make_decimal_type;
Expand Down Expand Up @@ -253,6 +253,11 @@ impl IdentNormalizer {
/// This helps resolve scoping issues of CTEs.
/// By using cloning, a subquery can inherit CTEs from the outer query
/// and can also define its own private CTEs without affecting the outer query.
///
/// Warnings (non-fatal [`Diagnostic`]s) are shared across all clones via
/// [`Arc`], so warnings collected in subquery contexts are visible from the
/// top-level context. Use [`PlannerContext::take_warnings`] after planning to
/// retrieve them.
#[derive(Debug, Clone)]
pub struct PlannerContext {
/// Data types for numbered parameters ($1, $2, etc), if supplied
Expand All @@ -270,6 +275,9 @@ pub struct PlannerContext {
outer_from_schema: Option<DFSchemaRef>,
/// The query schema defined by the table
create_table_schema: Option<DFSchemaRef>,
/// Non-fatal warnings collected during planning. Shared across clones (e.g.
/// for subqueries) so that all warnings surface to the top-level caller.
warnings: Arc<Mutex<Vec<Diagnostic>>>,
}

impl Default for PlannerContext {
Expand All @@ -287,9 +295,34 @@ impl PlannerContext {
outer_queries_schemas_stack: vec![],
outer_from_schema: None,
create_table_schema: None,
warnings: Arc::new(Mutex::new(vec![])),
}
}

/// Add a non-fatal warning [`Diagnostic`] collected during planning.
///
/// Warnings do not halt query execution. Call [`Self::take_warnings`] after
/// planning to retrieve them.
pub fn add_warning(&self, warning: Diagnostic) {
self.warnings
.lock()
.expect("warnings lock poisoned")
.push(warning);
}

/// Drain and return all non-fatal [`Diagnostic`] warnings collected during
/// planning. This is shared across cloned contexts (e.g. subqueries), so
/// calling this on the top-level context returns warnings from the entire
/// query.
pub fn take_warnings(&self) -> Vec<Diagnostic> {
std::mem::take(
&mut self
.warnings
.lock()
.expect("warnings lock poisoned"),
)
}

/// Update the PlannerContext with provided prepare_param_data_types
pub fn with_prepare_param_data_types(
mut self,
Expand Down
109 changes: 107 additions & 2 deletions datafusion/sql/tests/cases/diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use std::{collections::HashMap, sync::Arc};

use datafusion_common::{Diagnostic, Location, Result, Span};
use datafusion_sql::{
parser::{DFParser, DFParserBuilder},
planner::{ParserOptions, SqlToRel},
parser::{DFParser, DFParserBuilder, Statement as DFStatement},
planner::{ParserOptions, PlannerContext, SqlToRel},
};
use regex::Regex;

Expand Down Expand Up @@ -51,6 +51,33 @@ fn do_query(sql: &'static str) -> Diagnostic {
}
}

/// Plan a query that is expected to **succeed** and return any non-fatal
/// [`Diagnostic`] warnings that were collected during planning.
fn do_query_warnings(sql: &'static str) -> Vec<Diagnostic> {
let statement = DFParserBuilder::new(sql)
.build()
.expect("unable to create parser")
.parse_statement()
.expect("unable to parse query");
let options = ParserOptions {
collect_spans: true,
..ParserOptions::default()
};
let state = MockSessionState::default();
let context = MockContextProvider { state };
let sql_to_rel = SqlToRel::new_with_options(&context, options);
let mut planner_context = PlannerContext::new();
match statement {
DFStatement::Statement(s) => {
sql_to_rel
.sql_statement_to_plan_with_context(*s, &mut planner_context)
.expect("expected planning to succeed");
}
_ => panic!("expected a SQL statement"),
}
planner_context.take_warnings()
}

/// Given a query that contains tag delimited spans, returns a mapping from the
/// span name to the [`Span`]. Tags are comments of the form `/*tag*/`. In case
/// you want the same location to open two spans, or close open and open
Expand Down Expand Up @@ -390,3 +417,81 @@ fn test_syntax_error() -> Result<()> {
},
}
}

// ── = NULL warning tests ──────────────────────────────────────────────────────

#[test]
fn test_eq_null_warning_in_where() -> Result<()> {
let query = "SELECT * FROM person WHERE /*col*/first_name/*col*/ = NULL";
let spans = get_spans(query);
let warnings = do_query_warnings(query);
assert_eq!(warnings.len(), 1);
let w = &warnings[0];
assert_snapshot!(
w.message,
@"'= NULL' will always be NULL (null comparisons never return true or false)"
);
assert_eq!(w.span, Some(spans["col"]));
assert_snapshot!(w.helps[0].message, @"use 'IS NULL' instead");
Ok(())
}

#[test]
fn test_null_eq_warning_in_where() -> Result<()> {
// NULL on the left side
let query = "SELECT * FROM person WHERE NULL = /*col*/first_name/*col*/";
let spans = get_spans(query);
let warnings = do_query_warnings(query);
assert_eq!(warnings.len(), 1);
let w = &warnings[0];
assert_snapshot!(
w.message,
@"'= NULL' will always be NULL (null comparisons never return true or false)"
);
assert_eq!(w.span, Some(spans["col"]));
assert_snapshot!(w.helps[0].message, @"use 'IS NULL' instead");
Ok(())
}

#[test]
fn test_neq_null_warning_in_where() -> Result<()> {
// <> NULL also warrants a warning
let query = "SELECT * FROM person WHERE /*col*/first_name/*col*/ <> NULL";
let spans = get_spans(query);
let warnings = do_query_warnings(query);
assert_eq!(warnings.len(), 1);
let w = &warnings[0];
assert_snapshot!(
w.message,
@"'<> NULL' will always be NULL (null comparisons never return true or false)"
);
assert_eq!(w.span, Some(spans["col"]));
assert_snapshot!(w.helps[0].message, @"use 'IS NOT NULL' instead");
Ok(())
}

#[test]
fn test_is_null_no_warning() -> Result<()> {
// IS NULL is the correct form – no warning expected
let query = "SELECT * FROM person WHERE first_name IS NULL";
let warnings = do_query_warnings(query);
assert!(
warnings.is_empty(),
"expected no warnings for IS NULL, got: {warnings:?}"
);
Ok(())
}

#[test]
fn test_multiple_eq_null_warnings() -> Result<()> {
// Multiple = NULL comparisons in the same query each produce a warning
let query =
"SELECT * FROM person WHERE first_name = NULL AND last_name = NULL";
let warnings = do_query_warnings(query);
assert_eq!(warnings.len(), 2);
let expected =
"'= NULL' will always be NULL (null comparisons never return true or false)";
assert_eq!(warnings[0].message, expected);
assert_eq!(warnings[1].message, expected);
Ok(())
}