From 3637f5f9b4907c291b9f9be34802e5257773590d Mon Sep 17 00:00:00 2001 From: Ondrej Tuma Date: Wed, 7 Jan 2026 13:22:12 +0100 Subject: [PATCH 1/2] Bad PATH_INFO encoding exception handling #37 --- poorwsgi/request.py | 10 ++++++++- poorwsgi/results.py | 12 ++++++++-- tests/test_application_error.py | 39 +++++++++++++++++++++++++++++++++ tests/test_request.py | 34 ++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 tests/test_application_error.py diff --git a/poorwsgi/request.py b/poorwsgi/request.py index 9e83b90..7f3fd68 100644 --- a/poorwsgi/request.py +++ b/poorwsgi/request.py @@ -175,7 +175,15 @@ def uri(self): @property def path(self): """Path part of url.""" - return self.__environ.get("PATH_INFO").encode("iso-8859-1").decode() + try: + return ( + self.__environ.get("PATH_INFO").encode("iso-8859-1").decode() + ) + except (UnicodeDecodeError, UnicodeEncodeError) as err: + log.warning("Invalid PATH_INFO encoding: %s", err) + raise HTTPException( + HTTP_BAD_REQUEST, error="Invalid PATH_INFO encoding" + ) from err @property def query(self): diff --git a/poorwsgi/results.py b/poorwsgi/results.py index 30214ce..952e506 100644 --- a/poorwsgi/results.py +++ b/poorwsgi/results.py @@ -104,7 +104,7 @@ def internal_server_error(req, *_): in dispatch_table.errors. If poor_Debug variable is to On, Tracaback will be generated. """ - handler = {"module": None, "name": None} + handler = {"module": None, "name": None, "args": None} if req.uri_handler: handler["module"] = req.uri_handler.__module__ handler["name"] = req.uri_handler.__name__ @@ -187,6 +187,12 @@ def bad_request(req, error=None): """ 400 Bad Request server error handler. """ if error: log.warning("400 - Bad Request: %s", error) + path = "[ NOT PARSED ]" + try: + path = req.path + except HTTPException: + pass + content = ( "\n" "\n" @@ -203,10 +209,12 @@ def bad_request(req, error=None): " \n" "

400 - Bad Request

\n" "

Method %s for %s uri.

\n" + "
%s
\n" "
\n" " webmaster: %s \n" " \n" - "" % (req.method, html_escape(req.uri), req.server_admin)) + "" % (req.method, html_escape(path), error or "", + req.server_admin)) return Response(content, status_code=HTTP_BAD_REQUEST) diff --git a/tests/test_application_error.py b/tests/test_application_error.py new file mode 100644 index 0000000..f774af3 --- /dev/null +++ b/tests/test_application_error.py @@ -0,0 +1,39 @@ +"""Unit test for bad request.""" +from io import BytesIO +from time import time + +import pytest + +from poorwsgi.request import Request +from poorwsgi.wsgi import Application + + +def test_keyerror_on_internal_error(monkeypatch): + """Test for KeyError: 'args' on unicode decode error in path.""" + app = Application() + + def mock_path(self): + """Mock of old Request.path property.""" + # We are mocking the old behavior, where UnicodeDecodeError was not + # caught inside path property and propagated to __request__ method. + raise UnicodeDecodeError("utf-8", b"\xc0", 0, 1, "invalid start byte") + + monkeypatch.setattr(Request, "path", property(mock_path)) + + environ = { + "PATH_INFO": "/foo", + "REQUEST_METHOD": "GET", + "SERVER_NAME": "localhost", + "SERVER_PORT": "80", + "wsgi.url_scheme": "http", + "wsgi.input": BytesIO(b""), + "wsgi.errors": BytesIO(), + "REQUEST_STARTTIME": time(), + } + + def start_response(*_): + # This is mock, we check response from app call + pass + + with pytest.raises(UnicodeDecodeError, match="invalid start byte"): + app(environ, start_response) diff --git a/tests/test_request.py b/tests/test_request.py index a6c58b2..ba24c9b 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -351,3 +351,37 @@ def test_empty_form(self, app): assert req.is_body_request is False assert req.mime_type in app.form_mime_types assert isinstance(req.form, EmptyForm) + + +def test_bad_path_info_triggers_400(app): + """Test that bad PATH_INFO encoding is handled and returns 400.""" + captured_status = None + captured_headers = None + + def start_response(status, headers): + nonlocal captured_status, captured_headers + captured_status = status + captured_headers = headers + + # This char in iso-8859-1 is 0xc0, an invalid start byte in utf-8 + bad_char = 'À' + bad_path = f'/foo{bad_char}bar' + + environ = { + 'PATH_INFO': bad_path, + 'REQUEST_METHOD': 'GET', + 'SERVER_NAME': 'localhost', + 'SERVER_PORT': '80', + 'wsgi.url_scheme': 'http', + 'REQUEST_STARTTIME': time() + } + + # The app.__call__ should catch the HTTPException and generate a 400 + response_body = app(environ, start_response) + + assert captured_status == '400 Bad Request' + # Also check body content + body_str = b''.join(response_body).decode() + assert '400' in body_str + assert 'Bad Request' in body_str + assert 'Invalid PATH_INFO encoding' in body_str From 72ff20b40243d7190f05df7bff0b4945796d3d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20T=C5=AFma?= Date: Wed, 7 Jan 2026 14:05:29 +0100 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- poorwsgi/results.py | 1 + tests/test_application_error.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/poorwsgi/results.py b/poorwsgi/results.py index 952e506..be6da52 100644 --- a/poorwsgi/results.py +++ b/poorwsgi/results.py @@ -191,6 +191,7 @@ def bad_request(req, error=None): try: path = req.path except HTTPException: + # If obtaining req.path fails, keep the default placeholder value. pass content = ( diff --git a/tests/test_application_error.py b/tests/test_application_error.py index f774af3..a77ec54 100644 --- a/tests/test_application_error.py +++ b/tests/test_application_error.py @@ -1,4 +1,4 @@ -"""Unit test for bad request.""" +"""Unit test for unicode decode error propagation.""" from io import BytesIO from time import time