Skip to content

Commit 40ed3aa

Browse files
committed
Add hard limits for grouping to prevent DoS attacks.
1 parent 3e593bf commit 40ed3aa

4 files changed

Lines changed: 181 additions & 7 deletions

File tree

CHANGELOG

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
Development Version
22
-------------------
33

4-
Nothing yet.
4+
Bug Fixes
5+
6+
* Add additional protection against denial of service attacks when parsing
7+
very large lists of tuples. This enhances the existing recursion protections
8+
with configurable limits for token processing to prevent DoS through
9+
algorithmic complexity attacks. The new limits (MAX_GROUPING_DEPTH=100,
10+
MAX_GROUPING_TOKENS=10000) can be adjusted or disabled (by setting to None)
11+
if needed for legitimate large SQL statements.
512

613

714
Release 0.5.3 (Dez 10, 2024)

docs/source/api.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,39 @@ The :meth:`~sqlparse.format` function accepts the following keyword arguments.
7373
If ``True`` comma-first notation for column names is used.
7474

7575

76+
Security and Performance Considerations
77+
---------------------------------------
78+
79+
For developers working with very large SQL statements or in security-sensitive
80+
environments, sqlparse includes built-in protections against potential denial
81+
of service (DoS) attacks:
82+
83+
**Grouping Limits**
84+
The parser includes configurable limits to prevent excessive resource
85+
consumption when processing very large or deeply nested SQL structures:
86+
87+
- ``MAX_GROUPING_DEPTH`` (default: 100) - Limits recursion depth during token grouping
88+
- ``MAX_GROUPING_TOKENS`` (default: 10,000) - Limits the number of tokens processed in a single grouping operation
89+
90+
These limits can be modified by changing the constants in ``sqlparse.engine.grouping``
91+
if your application requires processing larger SQL statements. Set a limit to ``None``
92+
to completely disable it. However, increasing these values or disabling limits may
93+
expose your application to DoS vulnerabilities when processing untrusted SQL input.
94+
95+
Example of modifying limits::
96+
97+
import sqlparse.engine.grouping
98+
99+
# Increase limits (use with caution)
100+
sqlparse.engine.grouping.MAX_GROUPING_DEPTH = 200
101+
sqlparse.engine.grouping.MAX_GROUPING_TOKENS = 50000
102+
103+
# Disable limits completely (use with extreme caution)
104+
sqlparse.engine.grouping.MAX_GROUPING_DEPTH = None
105+
sqlparse.engine.grouping.MAX_GROUPING_TOKENS = None
106+
107+
.. warning::
108+
Increasing the grouping limits or disabling them completely may make your
109+
application vulnerable to DoS attacks when processing untrusted SQL input.
110+
Only modify these values if you are certain about the source and size of
111+
your SQL statements.

sqlparse/engine/grouping.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,35 @@
99
from sqlparse import tokens as T
1010
from sqlparse.utils import recurse, imt
1111

12+
# Maximum recursion depth for grouping operations to prevent DoS attacks
13+
# Set to None to disable limit (not recommended for untrusted input)
14+
MAX_GROUPING_DEPTH = 100
15+
16+
# Maximum number of tokens to process in one grouping operation to prevent
17+
# DoS attacks.
18+
# Set to None to disable limit (not recommended for untrusted input)
19+
MAX_GROUPING_TOKENS = 10000
20+
1221
T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
1322
T_STRING = (T.String, T.String.Single, T.String.Symbol)
1423
T_NAME = (T.Name, T.Name.Placeholder)
1524

1625

17-
def _group_matching(tlist, cls):
26+
def _group_matching(tlist, cls, depth=0):
1827
"""Groups Tokens that have beginning and end."""
28+
if MAX_GROUPING_DEPTH is not None and depth > MAX_GROUPING_DEPTH:
29+
return
30+
31+
# Limit the number of tokens to prevent DoS attacks
32+
if MAX_GROUPING_TOKENS is not None \
33+
and len(tlist.tokens) > MAX_GROUPING_TOKENS:
34+
return
35+
1936
opens = []
2037
tidx_offset = 0
21-
for idx, token in enumerate(list(tlist)):
38+
token_list = list(tlist)
39+
40+
for idx, token in enumerate(token_list):
2241
tidx = idx - tidx_offset
2342

2443
if token.is_whitespace:
@@ -31,7 +50,7 @@ def _group_matching(tlist, cls):
3150
# Check inside previously grouped (i.e. parenthesis) if group
3251
# of different type is inside (i.e., case). though ideally should
3352
# should check for all open/close tokens at once to avoid recursion
34-
_group_matching(token, cls)
53+
_group_matching(token, cls, depth + 1)
3554
continue
3655

3756
if token.match(*cls.M_OPEN):
@@ -456,13 +475,23 @@ def _group(tlist, cls, match,
456475
valid_next=lambda t: True,
457476
post=None,
458477
extend=True,
459-
recurse=True
478+
recurse=True,
479+
depth=0
460480
):
461481
"""Groups together tokens that are joined by a middle token. i.e. x < y"""
482+
if MAX_GROUPING_DEPTH is not None and depth > MAX_GROUPING_DEPTH:
483+
return
484+
485+
# Limit the number of tokens to prevent DoS attacks
486+
if MAX_GROUPING_TOKENS is not None \
487+
and len(tlist.tokens) > MAX_GROUPING_TOKENS:
488+
return
462489

463490
tidx_offset = 0
464491
pidx, prev_ = None, None
465-
for idx, token in enumerate(list(tlist)):
492+
token_list = list(tlist)
493+
494+
for idx, token in enumerate(token_list):
466495
tidx = idx - tidx_offset
467496
if tidx < 0: # tidx shouldn't get negative
468497
continue
@@ -471,7 +500,8 @@ def _group(tlist, cls, match,
471500
continue
472501

473502
if recurse and token.is_group and not isinstance(token, cls):
474-
_group(token, cls, match, valid_prev, valid_next, post, extend)
503+
_group(token, cls, match, valid_prev, valid_next,
504+
post, extend, True, depth + 1)
475505

476506
if match(token):
477507
nidx, next_ = tlist.token_next(tidx)

tests/test_dos_prevention.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""Tests for DoS prevention mechanisms in sqlparse."""
2+
3+
import pytest
4+
import sqlparse
5+
import time
6+
7+
8+
class TestDoSPrevention:
9+
"""Test cases to ensure sqlparse is protected against DoS attacks."""
10+
11+
def test_large_tuple_list_performance(self):
12+
"""Test that parsing a large list of tuples doesn't cause DoS."""
13+
# Generate SQL with many tuples (like Django composite primary key queries)
14+
sql = '''
15+
SELECT "composite_pk_comment"."tenant_id", "composite_pk_comment"."comment_id"
16+
FROM "composite_pk_comment"
17+
WHERE ("composite_pk_comment"."tenant_id", "composite_pk_comment"."comment_id") IN ('''
18+
19+
# Generate 5000 tuples - this would previously cause a hang
20+
tuples = []
21+
for i in range(1, 5001):
22+
tuples.append(f"(1, {i})")
23+
24+
sql += ", ".join(tuples) + ")"
25+
26+
# Test should complete quickly (under 5 seconds)
27+
start_time = time.time()
28+
result = sqlparse.format(sql, reindent=True, keyword_case="upper")
29+
execution_time = time.time() - start_time
30+
31+
assert execution_time < 5.0, f"Parsing took too long: {execution_time:.2f}s"
32+
assert len(result) > 0, "Result should not be empty"
33+
assert "SELECT" in result.upper(), "SQL should be properly formatted"
34+
35+
def test_deeply_nested_groups_limited(self):
36+
"""Test that deeply nested groups don't cause stack overflow."""
37+
# Create deeply nested parentheses
38+
sql = "SELECT " + "(" * 200 + "1" + ")" * 200
39+
40+
# Should not raise RecursionError
41+
result = sqlparse.format(sql, reindent=True)
42+
assert "SELECT" in result
43+
assert "1" in result
44+
45+
def test_very_large_token_list_limited(self):
46+
"""Test that very large token lists are handled gracefully."""
47+
# Create a SQL with many identifiers
48+
identifiers = []
49+
for i in range(15000): # More than MAX_GROUPING_TOKENS
50+
identifiers.append(f"col{i}")
51+
52+
sql = f"SELECT {', '.join(identifiers)} FROM table1"
53+
54+
# Should complete without hanging
55+
start_time = time.time()
56+
result = sqlparse.format(sql, reindent=True)
57+
execution_time = time.time() - start_time
58+
59+
assert execution_time < 10.0, f"Parsing took too long: {execution_time:.2f}s"
60+
assert "SELECT" in result
61+
assert "FROM" in result
62+
63+
def test_normal_sql_still_works(self):
64+
"""Test that normal SQL still works correctly after DoS protections."""
65+
sql = """
66+
SELECT u.id, u.name, p.title
67+
FROM users u
68+
JOIN posts p ON u.id = p.user_id
69+
WHERE u.active = 1
70+
AND p.published_at > '2023-01-01'
71+
ORDER BY p.published_at DESC
72+
"""
73+
74+
result = sqlparse.format(sql, reindent=True, keyword_case="upper")
75+
76+
assert "SELECT" in result
77+
assert "FROM" in result
78+
assert "JOIN" in result
79+
assert "WHERE" in result
80+
assert "ORDER BY" in result
81+
82+
def test_reasonable_tuple_list_works(self):
83+
"""Test that reasonable-sized tuple lists still work correctly."""
84+
sql = '''
85+
SELECT id FROM table1
86+
WHERE (col1, col2) IN ('''
87+
88+
# 100 tuples should work fine
89+
tuples = []
90+
for i in range(1, 101):
91+
tuples.append(f"({i}, {i * 2})")
92+
93+
sql += ", ".join(tuples) + ")"
94+
95+
result = sqlparse.format(sql, reindent=True, keyword_case="upper")
96+
97+
assert "SELECT" in result
98+
assert "WHERE" in result
99+
assert "IN" in result
100+
assert "1," in result # First tuple should be there
101+
assert "200" in result # Last tuple should be there

0 commit comments

Comments
 (0)