From 2bdc7ee91736b44b9b4e8e61ea6081b5b1a99b68 Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 7 Feb 2026 17:45:08 -0600 Subject: [PATCH] Fix tokenization of escaped backslashes in SQL string literals (issue #814) The regex pattern for matching single-quoted strings did not handle escaped backslashes (\) properly. This caused strings like '\', '\' to be incorrectly tokenized as a single string with a comma instead of two separate string literals. Changes: - Add \\ to the string literal patterns in SQL_REGEX to match escaped backslashes as valid content within string literals - Add test case for escaped backslash tokenization Fixes #814 --- sqlparse/keywords.py | 4 ++-- tests/test_tokenize.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 874431f4..64834437 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -59,9 +59,9 @@ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), - (r"'(''|\\'|[^'])*'", tokens.String.Single), + (r"'(''|\\'|\\\\|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: - (r'"(""|\\"|[^"])*"', tokens.String.Symbol), + (r'"(""|\\"|\\\\|[^"])*"', tokens.String.Symbol), (r'(""|".*?[^\\]")', tokens.String.Symbol), # sqlite names can be escaped with [square brackets]. left bracket # cannot be preceded by word character or a right bracket -- diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index e368e83e..f22bf9f0 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -99,6 +99,21 @@ def test_single_quotes(): assert repr(p.tokens[0])[:len(tst)] == tst +def test_single_quotes_escaped_backslash(): + # issue 814 - Incorrect Tokenization of Escaped Backslashes + # A string containing an escaped backslash (\\) should be tokenized + # as a single string literal, not split incorrectly. + sql = r"SELECT '\\', '\\'" + tokens = list(lexer.tokenize(sql)) + # Should be: SELECT, ws, '\\', ,, ws, '\\' + assert tokens[0] == (T.Keyword.DML, 'SELECT') + assert tokens[1] == (T.Whitespace, ' ') + assert tokens[2] == (T.String.Single, "'\\\\'") + assert tokens[3] == (T.Punctuation, ',') + assert tokens[4] == (T.Whitespace, ' ') + assert tokens[5] == (T.String.Single, "'\\\\'") + + def test_tokenlist_first(): p = sqlparse.parse(' select foo')[0] first = p.token_first()