From 168dc6d05e689624bcb435b0470291981c814918 Mon Sep 17 00:00:00 2001 From: wilbyang Date: Fri, 6 Mar 2026 09:15:26 +0100 Subject: [PATCH] feat: emit warning diagnostic when comparing with NULL using = or <> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #14434 Comparing a column against NULL with `=` or `<>` always produces NULL (never TRUE or FALSE), which is almost certainly a mistake. The user almost certainly meant `IS NULL` or `IS NOT NULL` instead. This commit: - Adds `warnings: Arc>>` to `PlannerContext` so non-fatal diagnostics can be collected during planning without halting execution. The `Arc` ensures all clones (e.g. subquery contexts) share the same backing store, so warnings from nested queries surface to the top-level caller. - Detects `expr = NULL` / `NULL = expr` / `expr <> NULL` in the binary expression stack machine and pushes a `Diagnostic::new_warning` with a help message suggesting `IS NULL` / `IS NOT NULL`. - Adds `PlannerContext::add_warning` and `PlannerContext::take_warnings` as the public API for producing and consuming warnings. - Adds 5 tests in `datafusion/sql/tests/cases/diagnostic.rs` covering the new warning, the negative case (`IS NULL` → no warning), and multiple warnings in one query. --- datafusion/sql/src/expr/mod.rs | 42 ++++++++- datafusion/sql/src/planner.rs | 35 +++++++- datafusion/sql/tests/cases/diagnostic.rs | 109 ++++++++++++++++++++++- 3 files changed, 181 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 7902eed1e6922..eb8ab9fcab8d5 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -27,8 +27,8 @@ use sqlparser::ast::{ }; use datafusion_common::{ - DFSchema, Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err, - plan_err, + DFSchema, Diagnostic, Result, ScalarValue, internal_datafusion_err, internal_err, + not_impl_err, plan_err, }; use datafusion_expr::expr::ScalarFunction; @@ -108,6 +108,44 @@ impl SqlToRel<'_, S> { StackEntry::Operator(op) => { let right = eval_stack.pop().unwrap(); let left = eval_stack.pop().unwrap(); + // Warn when `expr = NULL` or `NULL = expr` is used. + // Such comparisons always evaluate to NULL (never TRUE or + // FALSE), so the user almost certainly meant `IS NULL` / + // `IS NOT NULL` instead. + if matches!(op, BinaryOperator::Eq | BinaryOperator::NotEq) { + let right_is_null = + matches!(right, Expr::Literal(ScalarValue::Null, _)); + let left_is_null = + matches!(left, Expr::Literal(ScalarValue::Null, _)); + if right_is_null || left_is_null { + // Point the span at the non-null operand (the + // column or expression being compared), because + // the NULL literal itself carries no span. + let span = if right_is_null { + left.spans().and_then(|s| s.first()) + } else { + right.spans().and_then(|s| s.first()) + }; + let (op_str, suggestion) = + if matches!(op, BinaryOperator::Eq) { + ("=", "IS NULL") + } else { + ("<>", "IS NOT NULL") + }; + let mut warning = Diagnostic::new_warning( + format!( + "'{op_str} NULL' will always be NULL \ + (null comparisons never return true or false)" + ), + span, + ); + warning.add_help( + format!("use '{suggestion}' instead"), + None, + ); + planner_context.add_warning(warning); + } + } let expr = self.build_logical_expr(op, left, right, schema)?; eval_stack.push(expr); } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 307f28e8ff9ad..9d4add198d65c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -18,7 +18,7 @@ //! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST) use std::collections::HashMap; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::vec; use crate::utils::make_decimal_type; @@ -253,6 +253,11 @@ impl IdentNormalizer { /// This helps resolve scoping issues of CTEs. /// By using cloning, a subquery can inherit CTEs from the outer query /// and can also define its own private CTEs without affecting the outer query. +/// +/// Warnings (non-fatal [`Diagnostic`]s) are shared across all clones via +/// [`Arc`], so warnings collected in subquery contexts are visible from the +/// top-level context. Use [`PlannerContext::take_warnings`] after planning to +/// retrieve them. #[derive(Debug, Clone)] pub struct PlannerContext { /// Data types for numbered parameters ($1, $2, etc), if supplied @@ -270,6 +275,9 @@ pub struct PlannerContext { outer_from_schema: Option, /// The query schema defined by the table create_table_schema: Option, + /// Non-fatal warnings collected during planning. Shared across clones (e.g. + /// for subqueries) so that all warnings surface to the top-level caller. + warnings: Arc>>, } impl Default for PlannerContext { @@ -287,9 +295,34 @@ impl PlannerContext { outer_queries_schemas_stack: vec![], outer_from_schema: None, create_table_schema: None, + warnings: Arc::new(Mutex::new(vec![])), } } + /// Add a non-fatal warning [`Diagnostic`] collected during planning. + /// + /// Warnings do not halt query execution. Call [`Self::take_warnings`] after + /// planning to retrieve them. + pub fn add_warning(&self, warning: Diagnostic) { + self.warnings + .lock() + .expect("warnings lock poisoned") + .push(warning); + } + + /// Drain and return all non-fatal [`Diagnostic`] warnings collected during + /// planning. This is shared across cloned contexts (e.g. subqueries), so + /// calling this on the top-level context returns warnings from the entire + /// query. + pub fn take_warnings(&self) -> Vec { + std::mem::take( + &mut self + .warnings + .lock() + .expect("warnings lock poisoned"), + ) + } + /// Update the PlannerContext with provided prepare_param_data_types pub fn with_prepare_param_data_types( mut self, diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 7a729739469d3..22561e09e324a 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -21,8 +21,8 @@ use std::{collections::HashMap, sync::Arc}; use datafusion_common::{Diagnostic, Location, Result, Span}; use datafusion_sql::{ - parser::{DFParser, DFParserBuilder}, - planner::{ParserOptions, SqlToRel}, + parser::{DFParser, DFParserBuilder, Statement as DFStatement}, + planner::{ParserOptions, PlannerContext, SqlToRel}, }; use regex::Regex; @@ -51,6 +51,33 @@ fn do_query(sql: &'static str) -> Diagnostic { } } +/// Plan a query that is expected to **succeed** and return any non-fatal +/// [`Diagnostic`] warnings that were collected during planning. +fn do_query_warnings(sql: &'static str) -> Vec { + let statement = DFParserBuilder::new(sql) + .build() + .expect("unable to create parser") + .parse_statement() + .expect("unable to parse query"); + let options = ParserOptions { + collect_spans: true, + ..ParserOptions::default() + }; + let state = MockSessionState::default(); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new_with_options(&context, options); + let mut planner_context = PlannerContext::new(); + match statement { + DFStatement::Statement(s) => { + sql_to_rel + .sql_statement_to_plan_with_context(*s, &mut planner_context) + .expect("expected planning to succeed"); + } + _ => panic!("expected a SQL statement"), + } + planner_context.take_warnings() +} + /// Given a query that contains tag delimited spans, returns a mapping from the /// span name to the [`Span`]. Tags are comments of the form `/*tag*/`. In case /// you want the same location to open two spans, or close open and open @@ -390,3 +417,81 @@ fn test_syntax_error() -> Result<()> { }, } } + +// ── = NULL warning tests ────────────────────────────────────────────────────── + +#[test] +fn test_eq_null_warning_in_where() -> Result<()> { + let query = "SELECT * FROM person WHERE /*col*/first_name/*col*/ = NULL"; + let spans = get_spans(query); + let warnings = do_query_warnings(query); + assert_eq!(warnings.len(), 1); + let w = &warnings[0]; + assert_snapshot!( + w.message, + @"'= NULL' will always be NULL (null comparisons never return true or false)" + ); + assert_eq!(w.span, Some(spans["col"])); + assert_snapshot!(w.helps[0].message, @"use 'IS NULL' instead"); + Ok(()) +} + +#[test] +fn test_null_eq_warning_in_where() -> Result<()> { + // NULL on the left side + let query = "SELECT * FROM person WHERE NULL = /*col*/first_name/*col*/"; + let spans = get_spans(query); + let warnings = do_query_warnings(query); + assert_eq!(warnings.len(), 1); + let w = &warnings[0]; + assert_snapshot!( + w.message, + @"'= NULL' will always be NULL (null comparisons never return true or false)" + ); + assert_eq!(w.span, Some(spans["col"])); + assert_snapshot!(w.helps[0].message, @"use 'IS NULL' instead"); + Ok(()) +} + +#[test] +fn test_neq_null_warning_in_where() -> Result<()> { + // <> NULL also warrants a warning + let query = "SELECT * FROM person WHERE /*col*/first_name/*col*/ <> NULL"; + let spans = get_spans(query); + let warnings = do_query_warnings(query); + assert_eq!(warnings.len(), 1); + let w = &warnings[0]; + assert_snapshot!( + w.message, + @"'<> NULL' will always be NULL (null comparisons never return true or false)" + ); + assert_eq!(w.span, Some(spans["col"])); + assert_snapshot!(w.helps[0].message, @"use 'IS NOT NULL' instead"); + Ok(()) +} + +#[test] +fn test_is_null_no_warning() -> Result<()> { + // IS NULL is the correct form – no warning expected + let query = "SELECT * FROM person WHERE first_name IS NULL"; + let warnings = do_query_warnings(query); + assert!( + warnings.is_empty(), + "expected no warnings for IS NULL, got: {warnings:?}" + ); + Ok(()) +} + +#[test] +fn test_multiple_eq_null_warnings() -> Result<()> { + // Multiple = NULL comparisons in the same query each produce a warning + let query = + "SELECT * FROM person WHERE first_name = NULL AND last_name = NULL"; + let warnings = do_query_warnings(query); + assert_eq!(warnings.len(), 2); + let expected = + "'= NULL' will always be NULL (null comparisons never return true or false)"; + assert_eq!(warnings[0].message, expected); + assert_eq!(warnings[1].message, expected); + Ok(()) +}