Skip to content

Commit 60cffa2

Browse files
committed
fix: decode percent-encoded unreserved chars before resolving dot segments in normalize
RFC 3986 Section 6.2.2.2 specifies that percent-encoded unreserved characters should be decoded before applying other normalizations, including dot-segment resolution. The previous order (resolve-then-decode) meant that %2e%2e was not recognized as ".." during path normalization, producing incorrect results like http://example.com/../foo instead of http://example.com/foo.
1 parent 978f2e6 commit 60cffa2

2 files changed

Lines changed: 24 additions & 3 deletions

File tree

src/hyperlink/_url.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,9 +1512,9 @@ def _dec_unres(target):
15121512

15131513
if path:
15141514
if self.path:
1515-
kw["path"] = [
1516-
_dec_unres(p) for p in _resolve_dot_segments(self.path)
1517-
]
1515+
kw["path"] = _resolve_dot_segments(
1516+
[_dec_unres(p) for p in self.path]
1517+
)
15181518
else:
15191519
kw["path"] = (u"",)
15201520
if query:

src/hyperlink/test/test_url.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,6 +1461,27 @@ def test_normalize(self):
14611461
== "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25"
14621462
)
14631463

1464+
def test_normalize_percent_encoded_dot_segments(self):
1465+
# type: () -> None
1466+
# RFC 3986 Section 6.2.2.2: percent-encoded unreserved characters
1467+
# should be decoded BEFORE resolving dot segments.
1468+
# %2e = '.' (unreserved), so %2e%2e should become '..' and
1469+
# be resolved to the parent directory.
1470+
url = URL.from_text("http://example.com/%2e%2e/foo")
1471+
norm = url.normalize()
1472+
assert norm.path == ("foo",)
1473+
assert norm.to_text() == "http://example.com/foo"
1474+
1475+
url2 = URL.from_text("http://example.com/%2e/foo")
1476+
norm2 = url2.normalize()
1477+
assert norm2.path == ("foo",)
1478+
assert norm2.to_text() == "http://example.com/foo"
1479+
1480+
url3 = URL.from_text("http://example.com/foo/%2e%2e/bar")
1481+
norm3 = url3.normalize()
1482+
assert norm3.path == ("bar",)
1483+
assert norm3.to_text() == "http://example.com/bar"
1484+
14641485
def test_str(self):
14651486
# type: () -> None
14661487

0 commit comments

Comments
 (0)