From 5ba232f665dea611d4c2a6ef74922e40535a45c2 Mon Sep 17 00:00:00 2001 From: mvaught Date: Thu, 26 Feb 2026 14:15:53 -0500 Subject: [PATCH 1/4] drop python2 and legacy support --- .github/workflows/cicd.yml | 12 +- docs/design.rst | 2 +- docs/index.rst | 2 +- pyproject.toml | 2 +- setup.py | 18 +- src/hyperlink/_socket.py | 53 -- src/hyperlink/_url.py | 731 ++++++++---------- src/hyperlink/hypothesis.py | 107 ++- src/hyperlink/test/common.py | 31 +- src/hyperlink/test/test_common.py | 6 +- src/hyperlink/test/test_decoded_url.py | 51 +- src/hyperlink/test/test_hypothesis.py | 76 +- src/hyperlink/test/test_parse.py | 6 +- .../test/test_scheme_registration.py | 36 +- src/hyperlink/test/test_socket.py | 45 -- src/hyperlink/test/test_url.py | 317 +++----- tox.ini | 78 +- 17 files changed, 608 insertions(+), 965 deletions(-) delete mode 100644 src/hyperlink/_socket.py delete mode 100644 src/hyperlink/test/test_socket.py diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index b5b7e9eb..409cf9ab 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -69,7 +69,7 @@ jobs: - name: Install Python uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.14" - name: Install Tox run: pip install tox; @@ -93,7 +93,7 @@ jobs: - name: Install Python uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.14" - name: Install Tox run: pip install tox; @@ -116,7 +116,7 @@ jobs: - name: Install Python uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.14" - name: Install Tox run: pip install tox; @@ -140,7 +140,7 @@ jobs: - name: Install Python uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.14" - name: Install Tox run: pip install tox; @@ -163,7 +163,7 @@ jobs: - name: Install Python uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.14" - name: Install Tox run: pip install tox; @@ -181,7 +181,7 @@ jobs: timeout-minutes: 30 strategy: matrix: - python: ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9", "pypy2", "pypy3"] + python: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3"] steps: diff --git a/docs/design.rst b/docs/design.rst index 6326cf45..b9cb61ff 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -64,7 +64,7 @@ constructed, instances are not changed. Methods like all return new URL objects. This enables URLs to be used in sets, as well as dictionary keys. -.. _immutable: https://docs.python.org/2/glossary.html#term-immutable +.. _immutable: https://docs.python.org/3/glossary.html#term-immutable .. _multidict: https://en.wikipedia.org/wiki/Multimap .. _query string: https://en.wikipedia.org/wiki/Query_string .. _GET parameters: http://php.net/manual/en/reserved.variables.get.php diff --git a/docs/index.rst b/docs/index.rst index cfc0c47d..3f1dcf43 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,7 +10,7 @@ hyperlink URLs. Based on `RFC 3986`_ and `RFC 3987`_, the Hyperlink URL balances simplicity and correctness for both :ref:`URIs and IRIs `. -Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, 3.7, 3.8, and PyPy. +Hyperlink is tested against Python 3.10, 3.11, 3.12, 3.13, 3.14, and PyPy. For an introduction to the hyperlink library, its background, and URLs in general, see `this talk from PyConWeb 2017`_ (and `the accompanying diff --git a/pyproject.toml b/pyproject.toml index e7efe6ae..402a0e9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,4 +7,4 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 80 -target-version = ["py27"] +target-version = ["py310"] diff --git a/setup.py b/setup.py index f057fb8a..4ee2a1c1 100644 --- a/setup.py +++ b/setup.py @@ -31,23 +31,19 @@ zip_safe=False, license=__license__, platforms="any", - install_requires=["idna>=2.5", 'typing ; python_version<"3.5"'], - python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", + install_requires=["idna>=3.0"], + python_requires=">=3.10", classifiers=[ "Topic :: Utilities", "Intended Audience :: Developers", "Topic :: Software Development :: Libraries", "Development Status :: 5 - Production/Stable", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: PyPy", "License :: OSI Approved :: MIT License", ], diff --git a/src/hyperlink/_socket.py b/src/hyperlink/_socket.py deleted file mode 100644 index 3bcf8970..00000000 --- a/src/hyperlink/_socket.py +++ /dev/null @@ -1,53 +0,0 @@ -try: - from socket import inet_pton -except ImportError: - from typing import TYPE_CHECKING - - if TYPE_CHECKING: # pragma: no cover - pass - else: - # based on https://gist.github.com/nnemkin/4966028 - # this code only applies on Windows Python 2.7 - import ctypes - import socket - - class SockAddr(ctypes.Structure): - _fields_ = [ - ("sa_family", ctypes.c_short), - ("__pad1", ctypes.c_ushort), - ("ipv4_addr", ctypes.c_byte * 4), - ("ipv6_addr", ctypes.c_byte * 16), - ("__pad2", ctypes.c_ulong), - ] - - WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA - WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA - - def inet_pton(address_family, ip_string): - # type: (int, str) -> bytes - addr = SockAddr() - ip_string_bytes = ip_string.encode("ascii") - addr.sa_family = address_family - addr_size = ctypes.c_int(ctypes.sizeof(addr)) - - try: - attribute, size = { - socket.AF_INET: ("ipv4_addr", 4), - socket.AF_INET6: ("ipv6_addr", 16), - }[address_family] - except KeyError: - raise socket.error("unknown address family") - - if ( - WSAStringToAddressA( - ip_string_bytes, - address_family, - None, - ctypes.byref(addr), - ctypes.byref(addr_size), - ) - != 0 - ): - raise socket.error(ctypes.FormatError()) - - return ctypes.string_at(getattr(addr, attribute), size) diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py index 8797b5cc..fcd845cd 100644 --- a/src/hyperlink/_url.py +++ b/src/hyperlink/_url.py @@ -1,5 +1,6 @@ +from __future__ import annotations # -*- coding: utf-8 -*- -u"""Hyperlink provides Pythonic URL parsing, construction, and rendering. +"""Hyperlink provides Pythonic URL parsing, construction, and rendering. Usage is straightforward:: @@ -18,66 +19,53 @@ """ # noqa: E501 import re -import sys import string import socket from socket import AF_INET, AF_INET6 - -try: - from socket import AddressFamily -except ImportError: - AddressFamily = int # type: ignore[assignment,misc] +from socket import AddressFamily from typing import ( Any, - Callable, Dict, - Iterable, Iterator, - List, - Mapping, Optional, - Sequence, - Text, - Tuple, - Type, - TypeVar, - Union, cast, TYPE_CHECKING, - overload, ) +if TYPE_CHECKING: + from typing import ( + Type, + overload, + Iterable, + List, + Mapping, + Sequence, + Tuple, + TypeVar, + Union, + ) + NoneType: Type[None] = type(None) + QueryPairs = Tuple[Tuple[str, Optional[str]], ...] # internal representation + QueryParameters = Union[ + Mapping[str, Optional[str]], + QueryPairs, + Iterable[Tuple[str, Optional[str]]], + ] + T = TypeVar("T") + # Literal is not available in all pythons so we only bring it in for mypy. + from typing import Literal +else: + NoneType: type[None] = type(None) + from unicodedata import normalize -from ._socket import inet_pton +from socket import inet_pton -try: - from collections.abc import Mapping as MappingABC -except ImportError: # Python 2 - from collections import Mapping as MappingABC +from collections.abc import Mapping as MappingABC from idna import encode as idna_encode, decode as idna_decode -PY2 = sys.version_info[0] == 2 -try: - unichr -except NameError: # Py3 - unichr = chr # type: Callable[[int], Text] -NoneType = type(None) # type: Type[None] -QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation -QueryParameters = Union[ - Mapping[Text, Optional[Text]], - QueryPairs, - Iterable[Tuple[Text, Optional[Text]]], -] -T = TypeVar("T") -# Literal is not available in all pythons so we only bring it in for mypy. -if TYPE_CHECKING: - from typing import Literal - - # from boltons.typeutils -def make_sentinel(name="_MISSING", var_name=""): - # type: (str, str) -> object +def make_sentinel(name: str = "_MISSING", var_name: str = "") -> object: """Creates and returns a new **instance** of a new class, suitable for usage as a "sentinel", a kind of singleton often used to indicate a value is missing when ``None`` is a valid input. @@ -108,13 +96,11 @@ def make_sentinel(name="_MISSING", var_name=""): """ class Sentinel(object): - def __init__(self): - # type: () -> None + def __init__(self) -> None: self.name = name self.var_name = var_name - def __repr__(self): - # type: () -> str + def __repr__(self) -> str: if self.var_name: return self.var_name return "%s(%r)" % (self.__class__.__name__, self.name) @@ -122,20 +108,17 @@ def __repr__(self): if var_name: # superclass type hints don't allow str return type, but it is # allowed in the docs, hence the ignore[override] below - def __reduce__(self): - # type: () -> str + def __reduce__(self) -> str: return self.var_name - def __nonzero__(self): - # type: () -> bool + def __bool__(self) -> bool: return False - __bool__ = __nonzero__ - return Sentinel() -_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any +_UNSET: Any = make_sentinel("_UNSET") +_unspecified: Any = _UNSET # RFC 3986 Section 2.3, Unreserved URI Characters @@ -167,7 +150,7 @@ def __nonzero__(self): _HEX_CHAR_MAP = dict( [ - ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap")) + ((a + b).encode("ascii"), chr(int(a + b, 16)).encode("charmap")) for a in string.hexdigits for b in string.hexdigits ] @@ -176,29 +159,28 @@ def __nonzero__(self): # RFC 3986 section 2.2, Reserved Characters # https://tools.ietf.org/html/rfc3986#section-2.2 -_GEN_DELIMS = frozenset(u":/?#[]@") -_SUB_DELIMS = frozenset(u"!$&'()*+,;=") +_GEN_DELIMS = frozenset(":/?#[]@") +_SUB_DELIMS = frozenset("!$&'()*+,;=") _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS -_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%") +_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set("%") _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE -_PATH_SAFE = _USERINFO_SAFE | set(u":@") +_PATH_SAFE = _USERINFO_SAFE | set(":@") _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE _SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":") _SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE -_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?") +_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set("/?") _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE -_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&") +_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set("&") _QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE -_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=") +_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set("=") _QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE -def _make_decode_map(delims, allow_percent=False): - # type: (Iterable[Text], bool) -> Mapping[bytes, bytes] +def _make_decode_map(delims: Iterable[str], allow_percent: bool = False) -> Mapping[bytes, bytes]: ret = dict(_HEX_CHAR_MAP) if not allow_percent: - delims = set(delims) | set([u"%"]) + delims = set(delims) | set(["%"]) for delim in delims: _hexord = "{0:02X}".format(ord(delim)).encode("ascii") _hexord_lower = _hexord.lower() @@ -208,12 +190,11 @@ def _make_decode_map(delims, allow_percent=False): return ret -def _make_quote_map(safe_chars): - # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text] - ret = {} # type: Dict[Union[int, Text], Text] +def _make_quote_map(safe_chars: Iterable[str]) -> Mapping[Union[int, str], str]: + ret: Dict[Union[int, str], str] = {} # v is included in the dict for py3 mostly, because bytestrings # are iterables of ints, of course! - for i, v in zip(range(256), range(256)): + for i, v in zip(list(range(256)), list(range(256))): c = chr(v) if c in safe_chars: ret[c] = ret[v] = c @@ -237,16 +218,15 @@ def _make_quote_map(safe_chars): _UNRESERVED_DECODE_MAP = dict( [ (k, v) - for k, v in _HEX_CHAR_MAP.items() + for k, v in list(_HEX_CHAR_MAP.items()) if v.decode("ascii", "replace") in _UNRESERVED_CHARS ] ) -_ROOT_PATHS = frozenset(((), (u"",))) +_ROOT_PATHS = frozenset(((), ("",))) -def _encode_reserved(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_reserved(text: str, maximal: bool = True) -> str: """A very comprehensive percent encoding for encoding all delimiters. Used for arguments to DecodedURL, where a % means a percent sign, and not the character used by URLs for escaping @@ -254,8 +234,8 @@ def _encode_reserved(text, maximal=True): """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr]) + return "".join( [ _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t for t in text @@ -263,26 +243,24 @@ def _encode_reserved(text, maximal=True): ) -def _encode_path_part(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_path_part(text: str, maximal: bool = True) -> str: "Percent-encode a single segment of a URL path." if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) + return "".join( [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text] ) -def _encode_schemeless_path_part(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_schemeless_path_part(text: str, maximal: bool = True) -> str: """Percent-encode the first segment of a URL path for a URL without a scheme specified. """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) + return "".join( [ _SCHEMELESS_PATH_PART_QUOTE_MAP[t] if t in _SCHEMELESS_PATH_DELIMS @@ -293,13 +271,12 @@ def _encode_schemeless_path_part(text, maximal=True): def _encode_path_parts( - text_parts, # type: Sequence[Text] - rooted=False, # type: bool - has_scheme=True, # type: bool - has_authority=True, # type: bool - maximal=True, # type: bool -): - # type: (...) -> Sequence[Text] + text_parts: Sequence[str], + rooted: bool = False, + has_scheme: bool = True, + has_authority: bool = True, + maximal: bool = True, +) -> Sequence[str]: """ Percent-encode a tuple of path parts into a complete path. @@ -323,10 +300,10 @@ def _encode_path_parts( if not text_parts: return () if rooted: - text_parts = (u"",) + tuple(text_parts) + text_parts = ("",) + tuple(text_parts) # elif has_authority and text_parts: # raise Exception('see rfc above') # TODO: too late to fail like this? - encoded_parts = [] # type: List[Text] + encoded_parts: List[str] = [] if has_scheme: encoded_parts = [ _encode_path_part(part, maximal=maximal) if part else part @@ -343,28 +320,26 @@ def _encode_path_parts( return tuple(encoded_parts) -def _encode_query_key(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_query_key(text: str, maximal: bool = True) -> str: """ Percent-encode a single query string key or value. """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr]) + return "".join( [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text] ) -def _encode_query_value(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_query_value(text: str, maximal: bool = True) -> str: """ Percent-encode a single query string key or value. """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr]) + return "".join( [ _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t for t in text @@ -372,28 +347,26 @@ def _encode_query_value(text, maximal=True): ) -def _encode_fragment_part(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_fragment_part(text: str, maximal: bool = True) -> str: """Quote the fragment part of the URL. Fragments don't have subdelimiters, so the whole URL fragment can be passed. """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) + return "".join( [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text] ) -def _encode_userinfo_part(text, maximal=True): - # type: (Text, bool) -> Text +def _encode_userinfo_part(text: str, maximal: bool = True) -> str: """Quote special characters in either the username or password section of the URL. """ if maximal: bytestr = normalize("NFC", text).encode("utf8") - return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) - return u"".join( + return "".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) + return "".join( [ _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t for t in text @@ -476,9 +449,8 @@ def _encode_userinfo_part(text, maximal=True): def register_scheme( - text, uses_netloc=True, default_port=None, query_plus_is_space=True -): - # type: (Text, bool, Optional[int], bool) -> None + text: str, uses_netloc: bool = True, default_port: Optional[int] = None, query_plus_is_space: bool = True +) -> None: """Registers new scheme information, resulting in correct port and slash behavior from the URL object. There are dozens of standard schemes preregistered, so this function is mostly meant for @@ -527,8 +499,7 @@ def register_scheme( return -def scheme_uses_netloc(scheme, default=None): - # type: (Text, Optional[bool]) -> Optional[bool] +def scheme_uses_netloc(scheme: str, default: Optional[bool] = None) -> Optional[bool]: """Whether or not a URL uses :code:`:` or :code:`://` to separate the scheme from the rest of the URL depends on the scheme's own standard definition. There is no way to infer this behavior @@ -567,16 +538,14 @@ class URLParseError(ValueError): pass -def _optional(argument, default): - # type: (Any, Any) -> Any +def _optional(argument: Any, default: Any) -> Any: if argument is _UNSET: return default else: return argument -def _typecheck(name, value, *types): - # type: (Text, T, Type[Any]) -> T +def _typecheck(name: str, value: T, *types: Type[Any]) -> T: """ Check that the given *value* is one of the given *types*, or raise an exception describing the problem using *name*. @@ -591,14 +560,13 @@ def _typecheck(name, value, *types): return value -def _textcheck(name, value, delims=frozenset(), nullable=False): - # type: (Text, T, Iterable[Text], bool) -> T - if not isinstance(value, Text): +def _textcheck(name: str, value: T, delims: Iterable[str] = frozenset(), nullable: bool = False) -> T: + if not isinstance(value, str): if nullable and value is None: # used by query string values - return value # type: ignore[unreachable] + return value else: - str_name = "unicode" if PY2 else "str" + str_name = "str" exp = str_name + " or NoneType" if nullable else str_name raise TypeError("expected %s for %s, got %r" % (exp, name, value)) if delims and set(value) & set(delims): # TODO: test caching into regexes @@ -606,11 +574,10 @@ def _textcheck(name, value, delims=frozenset(), nullable=False): "one or more reserved delimiters %s present in %s: %r" % ("".join(delims), name, value) ) - return value # type: ignore[return-value] # T vs. Text + return value -def iter_pairs(iterable): - # type: (Iterable[Any]) -> Iterator[Any] +def iter_pairs(iterable: Iterable[Any]) -> Iterator[Any]: """ Iterate over the (key, value) pairs in ``iterable``. @@ -619,12 +586,11 @@ def iter_pairs(iterable): what Python's ``dict()`` constructor does. """ if isinstance(iterable, MappingABC): - iterable = iterable.items() + iterable = list(iterable.items()) return iter(iterable) -def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False): - # type: (Text, bool, bool) -> Text +def _decode_unreserved(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -634,9 +600,8 @@ def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False): def _decode_userinfo_part( - text, normalize_case=False, encode_stray_percents=False -): - # type: (Text, bool, bool) -> Text + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -645,8 +610,7 @@ def _decode_userinfo_part( ) -def _decode_path_part(text, normalize_case=False, encode_stray_percents=False): - # type: (Text, bool, bool) -> Text +def _decode_path_part(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: """ >>> _decode_path_part(u'%61%77%2f%7a') u'aw%2fz' @@ -661,8 +625,7 @@ def _decode_path_part(text, normalize_case=False, encode_stray_percents=False): ) -def _decode_query_key(text, normalize_case=False, encode_stray_percents=False): - # type: (Text, bool, bool) -> Text +def _decode_query_key(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -672,9 +635,8 @@ def _decode_query_key(text, normalize_case=False, encode_stray_percents=False): def _decode_query_value( - text, normalize_case=False, encode_stray_percents=False -): - # type: (Text, bool, bool) -> Text + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -684,9 +646,8 @@ def _decode_query_value( def _decode_fragment_part( - text, normalize_case=False, encode_stray_percents=False -): - # type: (Text, bool, bool) -> Text + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -696,14 +657,13 @@ def _decode_fragment_part( def _percent_decode( - text, # type: Text - normalize_case=False, # type: bool - subencoding="utf-8", # type: Text - raise_subencoding_exc=False, # type: bool - encode_stray_percents=False, # type: bool - _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes] -): - # type: (...) -> Text + text: str, + normalize_case: bool = False, + subencoding: str = "utf-8", + raise_subencoding_exc: bool = False, + encode_stray_percents: bool = False, + _decode_map: Mapping[bytes, bytes] = _HEX_CHAR_MAP, +) -> str: """Convert percent-encoded text characters to their normal, human-readable equivalents. @@ -721,7 +681,7 @@ def _percent_decode( u'abc def' Args: - text: Text with percent-encoding present. + text: str with percent-encoding present. normalize_case: Whether undecoded percent segments, such as encoded delimiters, should be uppercased, per RFC 3986 Section 2.1. See :func:`_decode_path_part` for an example. @@ -730,7 +690,7 @@ def _percent_decode( underlying the percent-decoding should be raised. Returns: - Text: The percent-decoded version of *text*, decoded by *subencoding*. + str: The percent-decoded version of *text*, decoded by *subencoding*. """ try: quoted_bytes = text.encode(subencoding) @@ -775,8 +735,7 @@ def _percent_decode( return text -def _decode_host(host): - # type: (Text) -> Text +def _decode_host(host: str) -> str: """Decode a host from ASCII-encodable text to IDNA-decoded text. If the host text is not ASCII, it is returned unchanged, as it is presumed that it is already IDNA-decoded. @@ -822,7 +781,7 @@ def _decode_host(host): u'm\xe9hmoud.io' """ # noqa: E501 if not host: - return u"" + return "" try: host_bytes = host.encode("ascii") except UnicodeEncodeError: @@ -839,8 +798,7 @@ def _decode_host(host): return host_text -def _resolve_dot_segments(path): - # type: (Sequence[Text]) -> Sequence[Text] +def _resolve_dot_segments(path: Sequence[str]) -> Sequence[str]: """Normalize the URL path by resolving segments of '.' and '..'. For more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_. @@ -853,25 +811,24 @@ def _resolve_dot_segments(path): .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4 """ # noqa: E501 - segs = [] # type: List[Text] + segs: List[str] = [] for seg in path: - if seg == u".": + if seg == ".": pass - elif seg == u"..": + elif seg == "..": if segs: segs.pop() else: segs.append(seg) - if list(path[-1:]) in ([u"."], [u".."]): - segs.append(u"") + if list(path[-1:]) in (["."], [".."]): + segs.append("") return segs -def parse_host(host): - # type: (Text) -> Tuple[Optional[AddressFamily], Text] +def parse_host(host: str) -> Tuple[Optional[AddressFamily], str]: """Parse the host into a tuple of ``(family, host)``, where family is the appropriate :mod:`socket` module constant when the host is an IP address. Family is ``None`` when the host is not an IP. @@ -890,9 +847,9 @@ def parse_host(host): True """ if not host: - return None, u"" + return None, "" - if u":" in host: + if ":" in host: try: inet_pton(AF_INET6, host) except socket.error as se: @@ -900,7 +857,7 @@ def parse_host(host): except UnicodeEncodeError: pass # TODO: this can't be a real host right? else: - family = AF_INET6 # type: Optional[AddressFamily] + family: Optional[AddressFamily] = AF_INET6 else: try: inet_pton(AF_INET, host) @@ -978,33 +935,32 @@ class URL(object): def __init__( self, - scheme=None, # type: Optional[Text] - host=None, # type: Optional[Text] - path=(), # type: Iterable[Text] - query=(), # type: QueryParameters - fragment=u"", # type: Text - port=None, # type: Optional[int] - rooted=None, # type: Optional[bool] - userinfo=u"", # type: Text - uses_netloc=None, # type: Optional[bool] - ): - # type: (...) -> None + scheme: Optional[str] = None, + host: Optional[str] = None, + path: Iterable[str] = (), + query: QueryParameters = (), + fragment: str = "", + port: Optional[int] = None, + rooted: Optional[bool] = None, + userinfo: str = "", + uses_netloc: Optional[bool] = None, + ) -> None: if host is not None and scheme is None: - scheme = u"http" # TODO: why + scheme = "http" # TODO: why if port is None and scheme is not None: port = SCHEME_PORT_MAP.get(scheme) if host and query and not path: # per RFC 3986 6.2.3, "a URI that uses the generic syntax # for authority with an empty path should be normalized to # a path of '/'." - path = (u"",) + path = ("",) # Now that we're done detecting whether they were passed, we can set # them to their defaults: if scheme is None: - scheme = u"" + scheme = "" if host is None: - host = u"" + host = "" if rooted is None: rooted = bool(host) @@ -1019,7 +975,7 @@ def __init__( ) _, self._host = parse_host(_textcheck("host", host, "/?#@")) - if isinstance(path, Text): + if isinstance(path, str): raise TypeError( "expected iterable of text for path, not: %r" % (path,) ) @@ -1052,7 +1008,7 @@ def __init__( # we're definitely using a netloc (there must be a ://). self._rooted = True self._uses_netloc = True - if (not self._rooted) and self.path[:1] == (u"",): + if (not self._rooted) and self.path[:1] == ("",): self._rooted = True self._path = self._path[1:] if not will_have_authority and self._path and not self._rooted: @@ -1061,17 +1017,15 @@ def __init__( # would make the path (erroneously) look like a hostname. self._uses_netloc = False - def get_decoded_url(self, lazy=False): - # type: (bool) -> DecodedURL + def get_decoded_url(self, lazy: bool = False) -> DecodedURL: try: return self._decoded_url except AttributeError: - self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL + self._decoded_url: DecodedURL = DecodedURL(self, lazy=lazy) return self._decoded_url @property - def scheme(self): - # type: () -> Text + def scheme(self) -> str: """The scheme is a string, and the first part of an absolute URL, the part before the first colon, and the part which defines the semantics of the rest of the URL. Examples include "http", @@ -1081,8 +1035,7 @@ def scheme(self): return self._scheme @property - def host(self): - # type: () -> Text + def host(self) -> str: """The host is a string, and the second standard part of an absolute URL. When present, a valid host must be a domain name, or an IP (v4 or v6). It occurs before the first slash, or the second @@ -1091,8 +1044,7 @@ def host(self): return self._host @property - def port(self): - # type: () -> Optional[int] + def port(self) -> Optional[int]: """The port is an integer that is commonly used in connecting to the :attr:`host`, and almost never appears without it. @@ -1115,8 +1067,7 @@ def port(self): return self._port @property - def path(self): - # type: () -> Sequence[Text] + def path(self) -> Sequence[str]: """A tuple of strings, created by splitting the slash-separated hierarchical path. Started by the first slash after the host, terminated by a "?", which indicates the start of the @@ -1125,8 +1076,7 @@ def path(self): return self._path @property - def query(self): - # type: () -> QueryPairs + def query(self) -> QueryPairs: """Tuple of pairs, created by splitting the ampersand-separated mapping of keys and optional values representing non-hierarchical data used to identify the resource. Keys are @@ -1141,8 +1091,7 @@ def query(self): return self._query @property - def fragment(self): - # type: () -> Text + def fragment(self) -> str: """A string, the last part of the URL, indicated by the first "#" after the :attr:`~hyperlink.URL.path` or :attr:`~hyperlink.URL.query`. Enables indirect identification @@ -1151,8 +1100,7 @@ def fragment(self): return self._fragment @property - def rooted(self): - # type: () -> bool + def rooted(self) -> bool: """Whether or not the path starts with a forward slash (``/``). This is taken from the terminology in the BNF grammar, @@ -1164,16 +1112,14 @@ def rooted(self): return self._rooted @property - def userinfo(self): - # type: () -> Text + def userinfo(self) -> str: """The colon-separated string forming the username-password combination. """ return self._userinfo @property - def uses_netloc(self): - # type: () -> Optional[bool] + def uses_netloc(self) -> Optional[bool]: """ Indicates whether ``://`` (the "netloc separator") will appear to separate the scheme from the *path* in cases where no host is present. @@ -1181,15 +1127,13 @@ def uses_netloc(self): return self._uses_netloc @property - def user(self): - # type: () -> Text + def user(self) -> str: """ The user portion of :attr:`~hyperlink.URL.userinfo`. """ - return self.userinfo.split(u":")[0] + return self.userinfo.split(":")[0] - def authority(self, with_password=False, **kw): - # type: (bool, Any) -> Text + def authority(self, with_password: bool = False, **kw: Any) -> str: """Compute and return the appropriate host/port/userinfo combination. >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y') @@ -1204,31 +1148,30 @@ def authority(self, with_password=False, **kw): Defaults to False. Returns: - Text: The authority (network location and user information) portion + str: The authority (network location and user information) portion of the URL. """ # first, a bit of twisted compat with_password = kw.pop("includeSecrets", with_password) if kw: - raise TypeError("got unexpected keyword arguments: %r" % kw.keys()) + raise TypeError("got unexpected keyword arguments: %r" % list(kw.keys())) host = self.host if ":" in host: hostport = ["[" + host + "]"] else: hostport = [self.host] if self.port != SCHEME_PORT_MAP.get(self.scheme): - hostport.append(Text(self.port)) + hostport.append(str(self.port)) authority = [] if self.userinfo: userinfo = self.userinfo - if not with_password and u":" in userinfo: - userinfo = userinfo[: userinfo.index(u":") + 1] + if not with_password and ":" in userinfo: + userinfo = userinfo[: userinfo.index(":") + 1] authority.append(userinfo) - authority.append(u":".join(hostport)) - return u"@".join(authority) + authority.append(":".join(hostport)) + return "@".join(authority) - def __eq__(self, other): - # type: (Any) -> bool + def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented for attr in [ @@ -1249,14 +1192,12 @@ def __eq__(self, other): return True return False - def __ne__(self, other): - # type: (Any) -> bool + def __ne__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented return not self.__eq__(other) - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: return hash( ( self.__class__, @@ -1273,8 +1214,7 @@ def __hash__(self): ) @property - def absolute(self): - # type: () -> bool + def absolute(self) -> bool: """Whether or not the URL is "absolute". Absolute URLs are complete enough to resolve to a network resource without being relative to a base URI. @@ -1290,17 +1230,16 @@ def absolute(self): def replace( self, - scheme=_UNSET, # type: Optional[Text] - host=_UNSET, # type: Optional[Text] - path=_UNSET, # type: Iterable[Text] - query=_UNSET, # type: QueryParameters - fragment=_UNSET, # type: Text - port=_UNSET, # type: Optional[int] - rooted=_UNSET, # type: Optional[bool] - userinfo=_UNSET, # type: Text - uses_netloc=_UNSET, # type: Optional[bool] - ): - # type: (...) -> URL + scheme: Optional[str] = _UNSET, + host: Optional[str] = _UNSET, + path: Iterable[str] = _UNSET, + query: QueryParameters = _UNSET, + fragment: str = _UNSET, + port: Optional[int] = _UNSET, + rooted: Optional[bool] = _UNSET, + userinfo: str = _UNSET, + uses_netloc: Optional[bool] = _UNSET, + ) -> URL: """:class:`URL` objects are immutable, which means that attributes are designed to be set only once, at construction. Instead of modifying an existing URL, one simply creates a copy with the @@ -1355,8 +1294,7 @@ def replace( ) @classmethod - def from_text(cls, text): - # type: (Text) -> URL + def from_text(cls, text: str) -> URL: """Whereas the :class:`URL` constructor is useful for constructing URLs from parts, :meth:`~URL.from_text` supports parsing whole URLs from their string form:: @@ -1390,7 +1328,7 @@ def from_text(cls, text): raise URLParseError("could not parse url: %r" % text) gs = um.groupdict() - au_text = gs["authority"] or u"" + au_text = gs["authority"] or "" au_m = _AUTHORITY_RE.match(au_text) if au_m is None: raise URLParseError( @@ -1402,24 +1340,24 @@ def from_text(cls, text): "invalid host %r in url: %r" % (au_gs["bad_host"], text) ) - userinfo = au_gs["userinfo"] or u"" + userinfo = au_gs["userinfo"] or "" host = au_gs["ipv6_host"] or au_gs["plain_host"] port = au_gs["port"] if port is not None: try: - port = int(port) # type: ignore[assignment] # FIXME, see below + port = int(port) # FIXME, see below except ValueError: if not port: # TODO: excessive? raise URLParseError("port must not be empty: %r" % au_text) raise URLParseError("expected integer for port, not %r" % port) - scheme = gs["scheme"] or u"" - fragment = gs["fragment"] or u"" + scheme = gs["scheme"] or "" + fragment = gs["fragment"] or "" uses_netloc = bool(gs["_netloc_sep"]) if gs["path"]: - path = tuple(gs["path"].split(u"/")) + path = tuple(gs["path"].split("/")) if not path[0]: path = path[1:] rooted = True @@ -1429,14 +1367,14 @@ def from_text(cls, text): path = () rooted = bool(au_text) if gs["query"]: - query = tuple( + query: QueryPairs = tuple( ( - qe.split(u"=", 1) # type: ignore[misc] - if u"=" in qe + qe.split("=", 1) # type: ignore[misc] + if "=" in qe else (qe, None) ) - for qe in gs["query"].split(u"&") - ) # type: QueryPairs + for qe in gs["query"].split("&") + ) else: query = () return cls( @@ -1445,7 +1383,7 @@ def from_text(cls, text): path, query, fragment, - port, # type: ignore[arg-type] # FIXME, see above + port, # FIXME, see above rooted, userinfo, uses_netloc, @@ -1453,15 +1391,14 @@ def from_text(cls, text): def normalize( self, - scheme=True, - host=True, - path=True, - query=True, - fragment=True, - userinfo=True, - percents=True, - ): - # type: (bool, bool, bool, bool, bool, bool, bool) -> URL + scheme: bool = True, + host: bool = True, + path: bool = True, + query: bool = True, + fragment: bool = True, + userinfo: bool = True, + percents: bool = True, + ) -> URL: """Return a new URL object with several standard normalizations applied: @@ -1498,14 +1435,13 @@ def normalize( .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3 .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4 """ # noqa: E501 - kw = {} # type: Dict[str, Any] + kw: Dict[str, Any] = {} if scheme: kw["scheme"] = self.scheme.lower() if host: kw["host"] = self.host.lower() - def _dec_unres(target): - # type: (Text) -> Text + def _dec_unres(target: str) -> str: return _decode_unreserved( target, normalize_case=True, encode_stray_percents=percents ) @@ -1516,7 +1452,7 @@ def _dec_unres(target): _dec_unres(p) for p in _resolve_dot_segments(self.path) ] else: - kw["path"] = (u"",) + kw["path"] = ("",) if query: kw["query"] = [ (_dec_unres(k), _dec_unres(v) if v else v) @@ -1525,14 +1461,13 @@ def _dec_unres(target): if fragment: kw["fragment"] = _dec_unres(self.fragment) if userinfo: - kw["userinfo"] = u":".join( + kw["userinfo"] = ":".join( [_dec_unres(p) for p in self.userinfo.split(":", 1)] ) return self.replace(**kw) - def child(self, *segments): - # type: (Text) -> URL + def child(self, *segments: str) -> URL: """Make a new :class:`URL` where the given path segments are a child of this URL, preserving other parts of the URL, including the query string and fragment. @@ -1559,13 +1494,12 @@ def child(self, *segments): _textcheck("path segment", s) for s in segments ] new_path = tuple(self.path) - if self.path and self.path[-1] == u"": + if self.path and self.path[-1] == "": new_path = new_path[:-1] new_path += tuple(_encode_path_parts(segments, maximal=False)) return self.replace(path=new_path) - def sibling(self, segment): - # type: (Text) -> URL + def sibling(self, segment: str) -> URL: """Make a new :class:`URL` with a single path segment that is a sibling of this URL path. @@ -1581,8 +1515,7 @@ def sibling(self, segment): new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),) return self.replace(path=new_path) - def click(self, href=u""): - # type: (Union[Text, URL]) -> URL + def click(self, href: Union[str, URL] = "") -> URL: """Resolve the given URL relative to this URL. The resulting URI should match what a web browser would @@ -1595,7 +1528,7 @@ def click(self, href=u""): >>> url.click(u'../d/./e').to_text() u'http://localhost/a/b/d/e' - Args (Text): + Args (str): href: A string representing a clicked URL. Return: @@ -1645,9 +1578,8 @@ def click(self, href=u""): fragment=clicked.fragment, ) - def to_uri(self): - # type: () -> URL - u"""Make a new :class:`URL` instance with all non-ASCII characters + def to_uri(self) -> URL: + """Make a new :class:`URL` instance with all non-ASCII characters appropriately percent-encoded. This is useful to do in preparation for sending a :class:`URL` over a network protocol. @@ -1661,7 +1593,7 @@ def to_uri(self): hostname encoded, so that they are all in the standard US-ASCII range. """ - new_userinfo = u":".join( + new_userinfo = ":".join( [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)] ) new_path = _encode_path_parts( @@ -1690,9 +1622,8 @@ def to_uri(self): fragment=_encode_fragment_part(self.fragment, maximal=True), ) - def to_iri(self): - # type: () -> URL - u"""Make a new :class:`URL` instance with all but a few reserved + def to_iri(self) -> URL: + """Make a new :class:`URL` instance with all but a few reserved characters decoded into human-readable format. Percent-encoded Unicode and IDNA-encoded hostnames are @@ -1714,7 +1645,7 @@ def to_iri(self): URL: A new instance with its path segments, query parameters, and hostname decoded for display purposes. """ # noqa: E501 - new_userinfo = u":".join( + new_userinfo = ":".join( [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)] ) host_text = _decode_host(self.host) @@ -1733,8 +1664,7 @@ def to_iri(self): fragment=_decode_fragment_part(self.fragment), ) - def to_text(self, with_password=False): - # type: (bool) -> Text + def to_text(self, with_password: bool = False) -> str: """Render this URL to its textual representation. By default, the URL text will *not* include a password, if one @@ -1752,7 +1682,7 @@ def to_text(self, with_password=False): text. Defaults to False. Returns: - Text: The serialized textual representation of this URL, such as + str: The serialized textual representation of this URL, such as ``u"http://example.com/some/path?some=query"``. The natural counterpart to :class:`URL.from_text()`. @@ -1776,18 +1706,18 @@ def to_text(self, with_password=False): query_parts.append(_encode_query_key(k, maximal=False)) else: query_parts.append( - u"=".join( + "=".join( ( _encode_query_key(k, maximal=False), _encode_query_value(v, maximal=False), ) ) ) - query_string = u"&".join(query_parts) + query_string = "&".join(query_parts) fragment = self.fragment - parts = [] # type: List[Text] + parts: List[str] = [] _add = parts.append if scheme: _add(scheme) @@ -1807,18 +1737,16 @@ def to_text(self, with_password=False): if fragment: _add("#") _add(fragment) - return u"".join(parts) + return "".join(parts) - def __repr__(self): - # type: () -> str + def __repr__(self) -> str: """Convert this URL to an representation that shows all of its constituent parts, as well as being a valid argument to :func:`eval`. """ return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text()) - def _to_bytes(self): - # type: () -> bytes + def _to_bytes(self) -> bytes: """ Allows for direct usage of URL objects with libraries like requests, which automatically stringify URL parameters. See @@ -1826,28 +1754,21 @@ def _to_bytes(self): """ return self.to_uri().to_text().encode("ascii") - if PY2: - __str__ = _to_bytes - __unicode__ = to_text - else: - __bytes__ = _to_bytes - __str__ = to_text + __bytes__ = _to_bytes + __str__ = to_text # # Begin Twisted Compat Code asURI = to_uri asIRI = to_iri @classmethod - def fromText(cls, s): - # type: (Text) -> URL + def fromText(cls, s: str) -> URL: return cls.from_text(s) - def asText(self, includeSecrets=False): - # type: (bool) -> Text + def asText(self, includeSecrets: bool = False) -> str: return self.to_text(with_password=includeSecrets) - def __dir__(self): - # type: () -> Sequence[Text] + def __dir__(self) -> Sequence[str]: try: ret = object.__dir__(self) except AttributeError: @@ -1858,8 +1779,7 @@ def __dir__(self): # # End Twisted Compat Code - def add(self, name, value=None): - # type: (Text, Optional[Text]) -> URL + def add(self, name: str, value: Optional[str] = None) -> URL: """Make a new :class:`URL` instance with a given query argument, *name*, added to it with the value *value*, like so:: @@ -1880,8 +1800,7 @@ def add(self, name, value=None): """ return self.replace(query=self.query + ((name, value),)) - def set(self, name, value=None): - # type: (Text, Optional[Text]) -> URL + def set(self, name: str, value: Optional[str] = None) -> URL: """Make a new :class:`URL` instance with the query parameter *name* set to *value*. All existing occurences, if any are replaced by the single name-value pair. @@ -1909,8 +1828,7 @@ def set(self, name, value=None): q[idx:idx] = [(name, value)] return self.replace(query=q) - def get(self, name): - # type: (Text) -> List[Optional[Text]] + def get(self, name: str) -> List[Optional[str]]: """Get a list of values for the given query parameter, *name*:: >>> url = URL.from_text(u'?x=1&x=2') @@ -1926,18 +1844,17 @@ def get(self, name): name: The name of the query parameter to get. Returns: - List[Optional[Text]]: A list of all the values associated with the + List[Optional[str]]: A list of all the values associated with the key, in string form. """ return [value for (key, value) in self.query if name == key] def remove( self, - name, # type: Text - value=_UNSET, # type: Text - limit=None, # type: Optional[int] - ): - # type: (...) -> URL + name: str, + value: str = _UNSET, + limit: Optional[int] = None, + ) -> URL: """Make a new :class:`URL` instance with occurrences of the query parameter *name* removed, or, if *value* is set, parameters matching *name* and *value*. No exception is raised if the @@ -1983,13 +1900,11 @@ def remove( _EMPTY_URL = URL() -def _replace_plus(text): - # type: (Text) -> Text +def _replace_plus(text: str) -> str: return text.replace("+", "%20") -def _no_op(text): - # type: (Text) -> Text +def _no_op(text: str) -> str: return text @@ -2039,8 +1954,7 @@ class DecodedURL(object): .. versionadded:: 18.0.0 """ - def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None): - # type: (URL, bool, Optional[bool]) -> None + def __init__(self, url: URL = _EMPTY_URL, lazy: bool = False, query_plus_is_space: Optional[bool] = None) -> None: self._url = url if query_plus_is_space is None: query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES @@ -2052,13 +1966,12 @@ def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None): return @classmethod - def from_text(cls, text, lazy=False, query_plus_is_space=None): - # type: (Text, bool, Optional[bool]) -> DecodedURL + def from_text(cls, text: str, lazy: bool = False, query_plus_is_space: Optional[bool] = None) -> DecodedURL: """\ Make a `DecodedURL` instance from any text string containing a URL. Args: - text: Text containing the URL + text: str containing the URL lazy: Whether to pre-decode all parts of the URL to check for validity. Defaults to True. @@ -2067,38 +1980,32 @@ def from_text(cls, text, lazy=False, query_plus_is_space=None): return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space) @property - def encoded_url(self): - # type: () -> URL + def encoded_url(self) -> URL: """Access the underlying :class:`URL` object, which has any special characters encoded. """ return self._url - def to_text(self, with_password=False): - # type: (bool) -> Text + def to_text(self, with_password: bool = False) -> str: "Passthrough to :meth:`~hyperlink.URL.to_text()`" return self._url.to_text(with_password) - def to_uri(self): - # type: () -> URL + def to_uri(self) -> URL: "Passthrough to :meth:`~hyperlink.URL.to_uri()`" return self._url.to_uri() - def to_iri(self): - # type: () -> URL + def to_iri(self) -> URL: "Passthrough to :meth:`~hyperlink.URL.to_iri()`" return self._url.to_iri() - def _clone(self, url): - # type: (URL) -> DecodedURL + def _clone(self, url: URL) -> DecodedURL: return self.__class__( url, # TODO: propagate laziness? query_plus_is_space=self._query_plus_is_space, ) - def click(self, href=u""): - # type: (Union[Text, URL, DecodedURL]) -> DecodedURL + def click(self, href: Union[str, URL, DecodedURL] = "") -> DecodedURL: """Return a new DecodedURL wrapping the result of :meth:`~hyperlink.URL.click()` """ @@ -2108,8 +2015,7 @@ def click(self, href=u""): self._url.click(href=href), ) - def sibling(self, segment): - # type: (Text) -> DecodedURL + def sibling(self, segment: str) -> DecodedURL: """Automatically encode any reserved characters in *segment* and return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.sibling()` @@ -2118,8 +2024,7 @@ def sibling(self, segment): self._url.sibling(_encode_reserved(segment)), ) - def child(self, *segments): - # type: (Text) -> DecodedURL + def child(self, *segments: str) -> DecodedURL: """Automatically encode any reserved characters in *segments* and return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.child()`. @@ -2131,15 +2036,14 @@ def child(self, *segments): def normalize( self, - scheme=True, - host=True, - path=True, - query=True, - fragment=True, - userinfo=True, - percents=True, - ): - # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL + scheme: bool = True, + host: bool = True, + path: bool = True, + query: bool = True, + fragment: bool = True, + userinfo: bool = True, + percents: bool = True, + ) -> DecodedURL: """Return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.normalize()` """ @@ -2150,33 +2054,27 @@ def normalize( ) @property - def absolute(self): - # type: () -> bool + def absolute(self) -> bool: return self._url.absolute @property - def scheme(self): - # type: () -> Text + def scheme(self) -> str: return self._url.scheme @property - def host(self): - # type: () -> Text + def host(self) -> str: return _decode_host(self._url.host) @property - def port(self): - # type: () -> Optional[int] + def port(self) -> Optional[int]: return self._url.port @property - def rooted(self): - # type: () -> bool + def rooted(self) -> bool: return self._url.rooted @property - def path(self): - # type: () -> Sequence[Text] + def path(self) -> Sequence[str]: if not hasattr(self, "_path"): self._path = tuple( [ @@ -2187,8 +2085,7 @@ def path(self): return self._path @property - def query(self): - # type: () -> QueryPairs + def query(self) -> QueryPairs: if not hasattr(self, "_query"): if self._query_plus_is_space: predecode = _replace_plus @@ -2196,7 +2093,7 @@ def query(self): predecode = _no_op self._query = cast( - QueryPairs, + "QueryPairs", tuple( tuple( _percent_decode( @@ -2212,19 +2109,17 @@ def query(self): return self._query @property - def fragment(self): - # type: () -> Text + def fragment(self) -> str: if not hasattr(self, "_fragment"): frag = self._url.fragment self._fragment = _percent_decode(frag, raise_subencoding_exc=True) return self._fragment @property - def userinfo(self): - # type: () -> Union[Tuple[str], Tuple[str, str]] + def userinfo(self) -> "Union[Tuple[str], Tuple[str, str]]": if not hasattr(self, "_userinfo"): self._userinfo = cast( - Union[Tuple[str], Tuple[str, str]], + "Union[Tuple[str], Tuple[str, str]]", tuple( tuple( _percent_decode(p, raise_subencoding_exc=True) @@ -2235,28 +2130,25 @@ def userinfo(self): return self._userinfo @property - def user(self): - # type: () -> Text + def user(self) -> str: return self.userinfo[0] @property - def uses_netloc(self): - # type: () -> Optional[bool] + def uses_netloc(self) -> Optional[bool]: return self._url.uses_netloc def replace( self, - scheme=_UNSET, # type: Optional[Text] - host=_UNSET, # type: Optional[Text] - path=_UNSET, # type: Iterable[Text] - query=_UNSET, # type: QueryParameters - fragment=_UNSET, # type: Text - port=_UNSET, # type: Optional[int] - rooted=_UNSET, # type: Optional[bool] - userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]] - uses_netloc=_UNSET, # type: Optional[bool] - ): - # type: (...) -> DecodedURL + scheme: Optional[str] = _UNSET, + host: Optional[str] = _UNSET, + path: Iterable[str] = _UNSET, + query: QueryParameters = _UNSET, + fragment: str = _UNSET, + port: Optional[int] = _UNSET, + rooted: Optional[bool] = _UNSET, + userinfo: Union[Tuple[str], Tuple[str, str]] = _UNSET, + uses_netloc: Optional[bool] = _UNSET, + ) -> DecodedURL: """While the signature is the same, this `replace()` differs a little from URL.replace. For instance, it accepts userinfo as a tuple, not as a string, handling the case of having a username @@ -2268,7 +2160,7 @@ def replace( path = tuple(_encode_reserved(p) for p in path) if query is not _UNSET: query = cast( - QueryPairs, + "QueryPairs", tuple( tuple( _encode_reserved(x) if x is not None else None @@ -2283,7 +2175,7 @@ def replace( 'userinfo expected sequence of ["user"] or' ' ["user", "password"], got %r' % (userinfo,) ) - userinfo_text = u":".join([_encode_reserved(p) for p in userinfo]) + userinfo_text = ":".join([_encode_reserved(p) for p in userinfo]) else: userinfo_text = _UNSET new_url = self._url.replace( @@ -2299,19 +2191,16 @@ def replace( ) return self._clone(url=new_url) - def get(self, name): - # type: (Text) -> List[Optional[Text]] + def get(self, name: str) -> List[Optional[str]]: "Get the value of all query parameters whose name matches *name*" return [v for (k, v) in self.query if name == k] - def add(self, name, value=None): - # type: (Text, Optional[Text]) -> DecodedURL + def add(self, name: str, value: Optional[str] = None) -> DecodedURL: """Return a new DecodedURL with the query parameter *name* and *value* added.""" return self.replace(query=self.query + ((name, value),)) - def set(self, name, value=None): - # type: (Text, Optional[Text]) -> DecodedURL + def set(self, name: str, value: Optional[str] = None) -> DecodedURL: "Return a new DecodedURL with query parameter *name* set to *value*" query = self.query q = [(k, v) for (k, v) in query if k != name] @@ -2321,11 +2210,10 @@ def set(self, name, value=None): def remove( self, - name, # type: Text - value=_UNSET, # type: Text - limit=None, # type: Optional[int] - ): - # type: (...) -> DecodedURL + name: str, + value: str = _UNSET, + limit: Optional[int] = None, + ) -> DecodedURL: """Return a new DecodedURL with query parameter *name* removed. Optionally also filter for *value*, as well as cap the number @@ -2354,31 +2242,26 @@ def remove( return self.replace(query=nq) - def __repr__(self): - # type: () -> str + def __repr__(self) -> str: cn = self.__class__.__name__ return "%s(url=%r)" % (cn, self._url) - def __str__(self): - # type: () -> str + def __str__(self) -> str: # TODO: the underlying URL's __str__ needs to change to make # this work as the URL, see #55 return str(self._url) - def __eq__(self, other): - # type: (Any) -> bool + def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented return self.normalize().to_uri() == other.normalize().to_uri() - def __ne__(self, other): - # type: (Any) -> bool + def __ne__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented return not self.__eq__(other) - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: return hash( ( self.__class__, @@ -2399,16 +2282,13 @@ def __hash__(self): asIRI = to_iri @classmethod - def fromText(cls, s, lazy=False): - # type: (Text, bool) -> DecodedURL + def fromText(cls, s: str, lazy: bool = False) -> DecodedURL: return cls.from_text(s, lazy=lazy) - def asText(self, includeSecrets=False): - # type: (bool) -> Text + def asText(self, includeSecrets: bool = False) -> str: return self.to_text(with_password=includeSecrets) - def __dir__(self): - # type: () -> Sequence[Text] + def __dir__(self) -> Sequence[str]: try: ret = object.__dir__(self) except AttributeError: @@ -2421,26 +2301,21 @@ def __dir__(self): # Add some overloads so that parse gives a better return value. -@overload -def parse(url, decoded, lazy=False): - # type: (Text, Literal[False], bool) -> URL - """Passing decoded=False returns URL.""" - - -@overload -def parse(url, decoded=True, lazy=False): - # type: (Text, Literal[True], bool) -> DecodedURL - """Passing decoded=True (or the default value) returns DecodedURL.""" +if TYPE_CHECKING: + @overload + def parse(url: str, decoded: Literal[False], lazy: bool = False) -> URL: + """Passing decoded=False returns URL.""" + @overload + def parse(url: str, decoded: Literal[True] = True, lazy: bool = False) -> DecodedURL: + """Passing decoded=True (or the default value) returns DecodedURL.""" -@overload -def parse(url, decoded=True, lazy=False): - # type: (Text, bool, bool) -> Union[URL, DecodedURL] - """If decoded is not a literal we don't know the return type.""" + @overload + def parse(url: str, decoded: bool = True, lazy: bool = False) -> Union[URL, DecodedURL]: + """If decoded is not a literal we don't know the return type.""" -def parse(url, decoded=True, lazy=False): - # type: (Text, bool, bool) -> Union[URL, DecodedURL] +def parse(url: str, decoded: bool = True, lazy: bool = False) -> Union[URL, DecodedURL]: """ Automatically turn text into a structured URL object. diff --git a/src/hyperlink/hypothesis.py b/src/hyperlink/hypothesis.py index 4ab987eb..1a86dfd7 100644 --- a/src/hyperlink/hypothesis.py +++ b/src/hyperlink/hypothesis.py @@ -2,29 +2,22 @@ """ Hypothesis strategies. """ -from __future__ import absolute_import + +from __future__ import annotations try: import hypothesis del hypothesis except ImportError: - from typing import Tuple - - __all__ = () # type: Tuple[str, ...] + __all__: tuple[str, ...] = () else: from csv import reader as csv_reader from os.path import dirname, join from string import ascii_letters, digits from sys import maxunicode from typing import ( - Callable, - Iterable, - List, - Optional, - Sequence, - Text, - TypeVar, + TYPE_CHECKING, cast, ) from gzip import open as open_gzip @@ -42,6 +35,23 @@ from idna import IDNAError, check_label, encode as idna_encode + if TYPE_CHECKING: + from typing import ( + Callable, + Iterable, + List, + Optional, + Sequence, + TypeVar, + ) + + T = TypeVar("T") + DrawCallable = Callable[[Callable[..., T]], T] + else: + # Runtime stubs for types that are only used in type checking + # but are imported by test code + DrawCallable = None # type: ignore[assignment,misc] + __all__ = ( "decoded_urls", "encoded_urls", @@ -50,18 +60,10 @@ "idna_text", "paths", "port_numbers", + "DrawCallable", ) - T = TypeVar("T") - DrawCallable = Callable[[Callable[..., T]], T] - - try: - unichr - except NameError: # Py3 - unichr = chr # type: Callable[[int], Text] - - def idna_characters(): - # type: () -> Text + def idna_characters() -> str: """ Returns a string containing IDNA characters. """ @@ -100,17 +102,16 @@ def idna_characters(): for i in range(start, end + 1): if i > maxunicode: # Happens using Py2 on Windows break - result.append(unichr(i)) + result.append(chr(i)) - _idnaCharacters = u"".join(result) + _idnaCharacters = "".join(result) return _idnaCharacters - _idnaCharacters = "" # type: Text + _idnaCharacters: str = "" @composite - def idna_text(draw, min_size=1, max_size=None): - # type: (DrawCallable, int, Optional[int]) -> Text + def idna_text(draw: DrawCallable, min_size: int = 1, max_size: Optional[int] = None) -> str: """ A strategy which generates IDNA-encodable text. @@ -128,7 +129,7 @@ def idna_text(draw, min_size=1, max_size=None): assert max_size >= 1 result = cast( - Text, + str, draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)), ) @@ -142,8 +143,7 @@ def idna_text(draw, min_size=1, max_size=None): return result @composite - def port_numbers(draw, allow_zero=False): - # type: (DrawCallable, bool) -> int + def port_numbers(draw: DrawCallable, allow_zero: bool = False) -> int: """ A strategy which generates port numbers. @@ -157,8 +157,7 @@ def port_numbers(draw, allow_zero=False): return cast(int, draw(integers(min_value=min_value, max_value=65535))) @composite - def hostname_labels(draw, allow_idn=True): - # type: (DrawCallable, bool) -> Text + def hostname_labels(draw: DrawCallable, allow_idn: bool = True) -> str: """ A strategy which generates host name labels. @@ -166,7 +165,7 @@ def hostname_labels(draw, allow_idn=True): internationalized domain names (IDNs). """ if allow_idn: - label = cast(Text, draw(idna_text(min_size=1, max_size=63))) + label = cast(str, draw(idna_text(min_size=1, max_size=63))) try: label.encode("ascii") @@ -182,12 +181,12 @@ def hostname_labels(draw, allow_idn=True): else: label = cast( - Text, + str, draw( text( min_size=1, max_size=63, - alphabet=Text(ascii_letters + digits + u"-"), + alphabet=str(ascii_letters + digits + "-"), ) ), ) @@ -203,8 +202,7 @@ def hostname_labels(draw, allow_idn=True): return label @composite - def hostnames(draw, allow_leading_digit=True, allow_idn=True): - # type: (DrawCallable, bool, bool) -> Text + def hostnames(draw: DrawCallable, allow_leading_digit: bool = True, allow_idn: bool = True) -> str: """ A strategy which generates host names. @@ -215,9 +213,9 @@ def hostnames(draw, allow_leading_digit=True, allow_idn=True): internationalized domain names (IDNs). """ # Draw first label, filtering out labels with leading digits if needed - labels = [ + labels: list[str] = [ cast( - Text, + str, draw( hostname_labels(allow_idn=allow_idn).filter( lambda l: ( @@ -229,7 +227,7 @@ def hostnames(draw, allow_leading_digit=True, allow_idn=True): ] # Draw remaining labels labels += cast( - List[Text], + list[str], draw( lists( hostname_labels(allow_idn=allow_idn), @@ -244,10 +242,9 @@ def hostnames(draw, allow_leading_digit=True, allow_idn=True): while sum(len(label) for label in labels) + len(labels) - 1 > 252: labels = labels[:-1] - return u".".join(labels) + return ".".join(labels) - def path_characters(): - # type: () -> str + def path_characters() -> str: """ Returns a string containing valid URL path characters. """ @@ -255,10 +252,9 @@ def path_characters(): if _path_characters is None: - def chars(): - # type: () -> Iterable[Text] + def chars() -> Iterable[str]: for i in range(maxunicode): - c = unichr(i) + c = chr(i) # Exclude reserved characters if c in "#/?": @@ -276,43 +272,40 @@ def chars(): return _path_characters - _path_characters = None # type: Optional[str] + _path_characters: Optional[str] = None @composite - def paths(draw): - # type: (DrawCallable) -> Sequence[Text] + def paths(draw: DrawCallable) -> Sequence[str]: return cast( - List[Text], + "List[str]", draw( lists(text(min_size=1, alphabet=path_characters()), max_size=10) ), ) @composite - def encoded_urls(draw): - # type: (DrawCallable) -> EncodedURL + def encoded_urls(draw: DrawCallable) -> EncodedURL: """ A strategy which generates L{EncodedURL}s. Call the L{EncodedURL.to_uri} method on each URL to get an HTTP protocol-friendly URI. """ - port = cast(Optional[int], draw(port_numbers(allow_zero=True))) - host = cast(Text, draw(hostnames())) - path = cast(Sequence[Text], draw(paths())) + port = cast("Optional[int]", draw(port_numbers(allow_zero=True))) + host = cast(str, draw(hostnames())) + path = cast("Sequence[str]", draw(paths())) if port == 0: port = None return EncodedURL( - scheme=cast(Text, draw(sampled_from((u"http", u"https")))), + scheme=cast(str, draw(sampled_from(("http", "https")))), host=host, port=port, path=path, ) @composite - def decoded_urls(draw): - # type: (DrawCallable) -> DecodedURL + def decoded_urls(draw: DrawCallable) -> DecodedURL: """ A strategy which generates L{DecodedURL}s. Call the L{EncodedURL.to_uri} method on each URL to get an HTTP diff --git a/src/hyperlink/test/common.py b/src/hyperlink/test/common.py index ad3bd04a..b52cb92e 100644 --- a/src/hyperlink/test/common.py +++ b/src/hyperlink/test/common.py @@ -1,6 +1,11 @@ -from typing import Any, Callable, Optional, Type +from __future__ import annotations + +from typing import TYPE_CHECKING from unittest import TestCase +if TYPE_CHECKING: + from typing import Any, Callable, Optional, Type + class HyperlinkTestCase(TestCase): """This type mostly exists to provide a backwards-compatible @@ -9,12 +14,11 @@ class HyperlinkTestCase(TestCase): def assertRaises( # type: ignore[override] self, - expected_exception, # type: Type[BaseException] - callableObj=None, # type: Optional[Callable[..., Any]] - *args, # type: Any - **kwargs # type: Any - ): - # type: (...) -> Any + expected_exception: Type[BaseException], + callableObj: Optional[Callable[..., Any]] = None, + *args: Any, + **kwargs: Any, + ) -> Any: """Fail unless an exception of class expected_exception is raised by callableObj when invoked with arguments args and keyword arguments kwargs. If a different type of exception is @@ -47,17 +51,18 @@ def assertRaises( # type: ignore[override] class _AssertRaisesContext(object): "A context manager used to implement HyperlinkTestCase.assertRaises." - def __init__(self, expected, test_case): - # type: (Type[BaseException], TestCase) -> None + def __init__( + self, expected: Type[BaseException], test_case: TestCase + ) -> None: self.expected = expected self.failureException = test_case.failureException - def __enter__(self): - # type: () -> "_AssertRaisesContext" + def __enter__(self) -> _AssertRaisesContext: return self - def __exit__(self, exc_type, exc_value, tb): - # type: (Optional[Type[BaseException]], Any, Any) -> bool + def __exit__( + self, exc_type: Optional[Type[BaseException]], exc_value: Any, tb: Any + ) -> bool: if exc_type is None: exc_name = self.expected.__name__ raise self.failureException("%s not raised" % (exc_name,)) diff --git a/src/hyperlink/test/test_common.py b/src/hyperlink/test/test_common.py index dc5e5bb8..16a41118 100644 --- a/src/hyperlink/test/test_common.py +++ b/src/hyperlink/test/test_common.py @@ -1,10 +1,14 @@ """ Tests for hyperlink.test.common """ -from typing import Any + +from typing import TYPE_CHECKING from unittest import TestCase from .common import HyperlinkTestCase +if TYPE_CHECKING: + from typing import Any + class _ExpectedException(Exception): """An exception used to test HyperlinkTestCase.assertRaises.""" diff --git a/src/hyperlink/test/test_decoded_url.py b/src/hyperlink/test/test_decoded_url.py index 48452579..3503b927 100644 --- a/src/hyperlink/test/test_decoded_url.py +++ b/src/hyperlink/test/test_decoded_url.py @@ -1,12 +1,15 @@ # -*- coding: utf-8 -*- +from __future__ import annotations -from __future__ import unicode_literals +from typing import TYPE_CHECKING, Union -from typing import Dict, Union from .. import DecodedURL, URL from .._url import _percent_decode from .common import HyperlinkTestCase +if TYPE_CHECKING: + from typing import Dict + BASIC_URL = "http://example.com/#" TOTAL_URL = ( "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080/" @@ -15,8 +18,7 @@ class TestURL(HyperlinkTestCase): - def test_durl_basic(self): - # type: () -> None + def test_durl_basic(self) -> None: bdurl = DecodedURL.from_text(BASIC_URL) assert bdurl.scheme == "http" assert bdurl.host == "example.com" @@ -36,8 +38,7 @@ def test_durl_basic(self): assert durl.user == "user" assert durl.userinfo == ("user", "\0\0\0\0") - def test_roundtrip_iri_parameter_values(self): - # type: () -> None + def test_roundtrip_iri_parameter_values(self) -> None: """ .to_iri() should never modify the application-level data of a query parameter. @@ -50,8 +51,7 @@ def test_roundtrip_iri_parameter_values(self): [value], ) - def test_roundtrip_uri_parameter_values(self): - # type: () -> None + def test_roundtrip_uri_parameter_values(self) -> None: """ .to_uri() should never modify the application-level data of a query parameter. @@ -64,8 +64,7 @@ def test_roundtrip_uri_parameter_values(self): [value], ) - def test_passthroughs(self): - # type: () -> None + def test_passthroughs(self) -> None: # just basic tests for the methods that more or less pass straight # through to the underlying URL @@ -102,13 +101,11 @@ def test_passthroughs(self): assert not (durl == 1) assert durl != 1 - def test_repr(self): - # type: () -> None + def test_repr(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) assert repr(durl) == "DecodedURL(url=" + repr(durl._url) + ")" - def test_query_manipulation(self): - # type: () -> None + def test_query_manipulation(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) assert durl.get("zot") == ["23%"] @@ -143,8 +140,7 @@ def test_query_manipulation(self): "https://example.com/a/b/?fóó=1&bar=2&fóó=3" ) - def test_equality_and_hashability(self): - # type: () -> None + def test_equality_and_hashability(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) durl2 = DecodedURL.from_text(TOTAL_URL) burl = DecodedURL.from_text(BASIC_URL) @@ -158,7 +154,7 @@ def test_equality_and_hashability(self): AnyURL = Union[URL, DecodedURL] - durl_map = {} # type: Dict[AnyURL, AnyURL] + durl_map: "Dict[AnyURL, AnyURL]" = {} durl_map[durl] = durl durl_map[durl2] = durl2 @@ -172,8 +168,7 @@ def test_equality_and_hashability(self): assert len(durl_map) == 3 - def test_replace_roundtrip(self): - # type: () -> None + def test_replace_roundtrip(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) durl2 = durl.replace( @@ -190,8 +185,7 @@ def test_replace_roundtrip(self): assert durl == durl2 - def test_replace_userinfo(self): - # type: () -> None + def test_replace_userinfo(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) with self.assertRaises(ValueError): durl.replace( @@ -203,8 +197,7 @@ def test_replace_userinfo(self): ) return - def test_twisted_compat(self): - # type: () -> None + def test_twisted_compat(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) assert durl == DecodedURL.fromText(TOTAL_URL) @@ -212,8 +205,7 @@ def test_twisted_compat(self): assert "asText" not in dir(durl) assert durl.to_text() == durl.asText() - def test_percent_decode_mixed(self): - # type: () -> None + def test_percent_decode_mixed(self) -> None: # See https://github.com/python-hyper/hyperlink/pull/59 for a # nice discussion of the possibilities @@ -229,8 +221,7 @@ def test_percent_decode_mixed(self): # when not encodable as subencoding assert _percent_decode("é%25é", subencoding="ascii") == "é%25é" - def test_click_decoded_url(self): - # type: () -> None + def test_click_decoded_url(self) -> None: durl = DecodedURL.from_text(TOTAL_URL) durl_dest = DecodedURL.from_text("/tëst") @@ -239,15 +230,13 @@ def test_click_decoded_url(self): assert clicked.path == durl_dest.path assert clicked.path == ("tëst",) - def test_decode_plus(self): - # type: () -> None + def test_decode_plus(self) -> None: durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B") assert durl.path == ("x+y+",) assert durl.get("a") == ["b c+"] assert durl.query == (("a", "b c+"),) - def test_decode_nonplussed(self): - # type: () -> None + def test_decode_nonplussed(self) -> None: durl = DecodedURL.from_text( "/x+y%2B?a=b+c%2B", query_plus_is_space=False ) diff --git a/src/hyperlink/test/test_hypothesis.py b/src/hyperlink/test/test_hypothesis.py index 776ed7b7..f9805850 100644 --- a/src/hyperlink/test/test_hypothesis.py +++ b/src/hyperlink/test/test_hypothesis.py @@ -3,6 +3,8 @@ Tests for hyperlink.hypothesis. """ +from __future__ import annotations + try: import hypothesis @@ -11,15 +13,15 @@ pass else: from string import digits - from typing import Sequence, Text + from typing import TYPE_CHECKING, Sequence + + from unittest.mock import patch - try: - from unittest.mock import patch - except ImportError: - from mock import patch # type: ignore[misc] + from hypothesis import given, settings, HealthCheck + from hypothesis.strategies import data - from hypothesis import given, settings - from hypothesis.strategies import SearchStrategy, data + if TYPE_CHECKING: + from hypothesis.strategies import SearchStrategy from idna import IDNAError, check_label, encode as idna_encode @@ -43,8 +45,7 @@ class TestHypothesisStrategies(HyperlinkTestCase): """ @given(idna_text()) - def test_idna_text_valid(self, text): - # type: (Text) -> None + def test_idna_text_valid(self, text: str) -> None: """ idna_text() generates IDNA-encodable text. """ @@ -54,8 +55,7 @@ def test_idna_text_valid(self, text): raise AssertionError("Invalid IDNA text: {!r}".format(text)) @given(data()) - def test_idna_text_min_max(self, data): - # type: (SearchStrategy) -> None + def test_idna_text_min_max(self, data: SearchStrategy) -> None: """ idna_text() raises AssertionError if min_size is < 1. """ @@ -63,8 +63,7 @@ def test_idna_text_min_max(self, data): self.assertRaises(AssertionError, data.draw, idna_text(max_size=0)) @given(port_numbers()) - def test_port_numbers_bounds(self, port): - # type: (int) -> None + def test_port_numbers_bounds(self, port: int) -> None: """ port_numbers() generates integers between 1 and 65535, inclusive. """ @@ -72,8 +71,7 @@ def test_port_numbers_bounds(self, port): self.assertLessEqual(port, 65535) @given(port_numbers(allow_zero=True)) - def test_port_numbers_bounds_allow_zero(self, port): - # type: (int) -> None + def test_port_numbers_bounds_allow_zero(self, port: int) -> None: """ port_numbers(allow_zero=True) generates integers between 0 and 65535, inclusive. @@ -82,8 +80,7 @@ def test_port_numbers_bounds_allow_zero(self, port): self.assertLessEqual(port, 65535) @given(hostname_labels()) - def test_hostname_labels_valid_idn(self, label): - # type: (Text) -> None + def test_hostname_labels_valid_idn(self, label: str) -> None: """ hostname_labels() generates IDN host name labels. """ @@ -95,22 +92,24 @@ def test_hostname_labels_valid_idn(self, label): @given(data()) @settings(max_examples=10) - def test_hostname_labels_long_idn_punycode(self, data): - # type: (SearchStrategy) -> None + def test_hostname_labels_long_idn_punycode( + self, data: SearchStrategy + ) -> None: """ hostname_labels() handles case where idna_text() generates text that encoded to punycode ends up as longer than allowed. """ @composite - def mock_idna_text(draw, min_size, max_size): - # type: (DrawCallable, int, int) -> Text + def mock_idna_text( + draw: DrawCallable, min_size: int, max_size: int + ) -> str: # We want a string that does not exceed max_size, but when # encoded to punycode, does exceed max_size. # So use a unicode character that is larger when encoded, # "á" being a great example, and use it max_size times, which # will be max_size * 3 in size when encoded. - return u"\N{LATIN SMALL LETTER A WITH ACUTE}" * max_size + return "\N{LATIN SMALL LETTER A WITH ACUTE}" * max_size with patch("hyperlink.hypothesis.idna_text", mock_idna_text): label = data.draw(hostname_labels()) @@ -123,8 +122,8 @@ def mock_idna_text(draw, min_size, max_size): ) @given(hostname_labels(allow_idn=False)) - def test_hostname_labels_valid_ascii(self, label): - # type: (Text) -> None + @settings(suppress_health_check=[HealthCheck.filter_too_much]) + def test_hostname_labels_valid_ascii(self, label: str) -> None: """ hostname_labels() generates a ASCII host name labels. """ @@ -135,13 +134,12 @@ def test_hostname_labels_valid_ascii(self, label): raise AssertionError("Invalid ASCII label: {!r}".format(label)) @given(hostnames()) - def test_hostnames_idn(self, hostname): - # type: (Text) -> None + def test_hostnames_idn(self, hostname: str) -> None: """ hostnames() generates a IDN host names. """ try: - for label in hostname.split(u"."): + for label in hostname.split("."): check_label(label) idna_encode(hostname) except UnicodeError: # pragma: no cover @@ -150,8 +148,7 @@ def test_hostnames_idn(self, hostname): ) @given(hostnames(allow_leading_digit=False)) - def test_hostnames_idn_nolead(self, hostname): - # type: (Text) -> None + def test_hostnames_idn_nolead(self, hostname: str) -> None: """ hostnames(allow_leading_digit=False) generates a IDN host names without leading digits. @@ -159,13 +156,13 @@ def test_hostnames_idn_nolead(self, hostname): self.assertTrue(hostname == hostname.lstrip(digits)) @given(hostnames(allow_idn=False)) - def test_hostnames_ascii(self, hostname): - # type: (Text) -> None + @settings(suppress_health_check=[HealthCheck.filter_too_much]) + def test_hostnames_ascii(self, hostname: str) -> None: """ hostnames() generates a ASCII host names. """ try: - for label in hostname.split(u"."): + for label in hostname.split("."): check_label(label) hostname.encode("ascii") except UnicodeError: # pragma: no cover @@ -174,8 +171,8 @@ def test_hostnames_ascii(self, hostname): ) @given(hostnames(allow_leading_digit=False, allow_idn=False)) - def test_hostnames_ascii_nolead(self, hostname): - # type: (Text) -> None + @settings(suppress_health_check=[HealthCheck.filter_too_much]) + def test_hostnames_ascii_nolead(self, hostname: str) -> None: """ hostnames(allow_leading_digit=False, allow_idn=False) generates ASCII host names without leading digits. @@ -183,12 +180,11 @@ def test_hostnames_ascii_nolead(self, hostname): self.assertTrue(hostname == hostname.lstrip(digits)) @given(paths()) - def test_paths(self, path): - # type: (Sequence[Text]) -> None + def test_paths(self, path: Sequence[str]) -> None: """ paths() generates sequences of URL path components. """ - text = u"/".join(path) + text = "/".join(path) try: text.encode("utf-8") except UnicodeError: # pragma: no cover @@ -198,16 +194,14 @@ def test_paths(self, path): self.assertNotIn("#/?", segment) @given(encoded_urls()) - def test_encoded_urls(self, url): - # type: (EncodedURL) -> None + def test_encoded_urls(self, url: EncodedURL) -> None: """ encoded_urls() generates EncodedURLs. """ self.assertIsInstance(url, EncodedURL) @given(decoded_urls()) - def test_decoded_urls(self, url): - # type: (DecodedURL) -> None + def test_decoded_urls(self, url: DecodedURL) -> None: """ decoded_urls() generates DecodedURLs. """ diff --git a/src/hyperlink/test/test_parse.py b/src/hyperlink/test/test_parse.py index 66b02709..11e3f11c 100644 --- a/src/hyperlink/test/test_parse.py +++ b/src/hyperlink/test/test_parse.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- - -from __future__ import unicode_literals +from __future__ import annotations from .common import HyperlinkTestCase from hyperlink import parse, EncodedURL, DecodedURL @@ -16,8 +15,7 @@ class TestURL(HyperlinkTestCase): - def test_parse(self): - # type: () -> None + def test_parse(self) -> None: purl = parse(TOTAL_URL) assert isinstance(purl, DecodedURL) assert purl.user == "user" diff --git a/src/hyperlink/test/test_scheme_registration.py b/src/hyperlink/test/test_scheme_registration.py index b43c91e3..ae4614bb 100644 --- a/src/hyperlink/test/test_scheme_registration.py +++ b/src/hyperlink/test/test_scheme_registration.py @@ -1,6 +1,11 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals -from typing import cast + +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + pass from .. import _url @@ -9,18 +14,15 @@ class TestSchemeRegistration(HyperlinkTestCase): - def setUp(self): - # type: () -> None + def setUp(self) -> None: self._orig_scheme_port_map = dict(_url.SCHEME_PORT_MAP) self._orig_no_netloc_schemes = set(_url.NO_NETLOC_SCHEMES) - def tearDown(self): - # type: () -> None + def tearDown(self) -> None: _url.SCHEME_PORT_MAP = self._orig_scheme_port_map _url.NO_NETLOC_SCHEMES = self._orig_no_netloc_schemes - def test_register_scheme_basic(self): - # type: () -> None + def test_register_scheme_basic(self) -> None: register_scheme("deltron", uses_netloc=True, default_port=3030) u1 = URL.from_text("deltron://example.com") @@ -42,37 +44,31 @@ def test_register_scheme_basic(self): u4 = u4.replace(host="example.com") assert u4.to_text() == "nonetron://example.com" - def test_register_no_netloc_scheme(self): - # type: () -> None + def test_register_no_netloc_scheme(self) -> None: register_scheme("noloctron", uses_netloc=False) u4 = URL(scheme="noloctron") u4 = u4.replace(path=("example", "path")) assert u4.to_text() == "noloctron:example/path" - def test_register_no_netloc_with_port(self): - # type: () -> None + def test_register_no_netloc_with_port(self) -> None: with self.assertRaises(ValueError): register_scheme("badnetlocless", uses_netloc=False, default_port=7) - def test_invalid_uses_netloc(self): - # type: () -> None + def test_invalid_uses_netloc(self) -> None: with self.assertRaises(ValueError): register_scheme("badnetloc", uses_netloc=cast(bool, None)) with self.assertRaises(ValueError): register_scheme("badnetloc", uses_netloc=cast(bool, object())) - def test_register_invalid_uses_netloc(self): - # type: () -> None + def test_register_invalid_uses_netloc(self) -> None: with self.assertRaises(ValueError): register_scheme("lol", uses_netloc=cast(bool, object())) - def test_register_invalid_port(self): - # type: () -> None + def test_register_invalid_port(self) -> None: with self.assertRaises(ValueError): register_scheme("nope", default_port=cast(bool, object())) - def test_register_no_quote_plus_scheme(self): - # type: () -> None + def test_register_no_quote_plus_scheme(self) -> None: register_scheme("keepplus", query_plus_is_space=False) plus_is_not_space = DecodedURL.from_text( "keepplus://example.com/?q=a+b" diff --git a/src/hyperlink/test/test_socket.py b/src/hyperlink/test/test_socket.py deleted file mode 100644 index 5f83d45b..00000000 --- a/src/hyperlink/test/test_socket.py +++ /dev/null @@ -1,45 +0,0 @@ -# mypy: always-true=inet_pton - -try: - from socket import inet_pton -except ImportError: - inet_pton = None # type: ignore[assignment] - -if not inet_pton: - import socket - - from .common import HyperlinkTestCase - from .._socket import inet_pton - - class TestSocket(HyperlinkTestCase): - def test_inet_pton_ipv4_valid(self): - # type: () -> None - data = inet_pton(socket.AF_INET, "127.0.0.1") - assert isinstance(data, bytes) - - def test_inet_pton_ipv4_bogus(self): - # type: () -> None - with self.assertRaises(socket.error): - inet_pton(socket.AF_INET, "blah") - - def test_inet_pton_ipv6_valid(self): - # type: () -> None - data = inet_pton(socket.AF_INET6, "::1") - assert isinstance(data, bytes) - - def test_inet_pton_ipv6_bogus(self): - # type: () -> None - with self.assertRaises(socket.error): - inet_pton(socket.AF_INET6, "blah") - - def test_inet_pton_bogus_family(self): - # type: () -> None - # Find an integer not associated with a known address family - i = int(socket.AF_INET6) - while True: - if i != socket.AF_INET and i != socket.AF_INET6: - break - i += 100 - - with self.assertRaises(socket.error): - inet_pton(i, "127.0.0.1") diff --git a/src/hyperlink/test/test_url.py b/src/hyperlink/test/test_url.py index 37c91726..04d30b16 100644 --- a/src/hyperlink/test/test_url.py +++ b/src/hyperlink/test/test_url.py @@ -3,19 +3,17 @@ # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. -from __future__ import unicode_literals +from __future__ import annotations -import sys import socket -from typing import Any, Iterable, Optional, Text, Tuple, cast +from typing import TYPE_CHECKING, Any, Optional, cast from .common import HyperlinkTestCase from .. import URL, URLParseError from .._url import inet_pton, SCHEME_PORT_MAP - -PY2 = sys.version_info[0] == 2 -unicode = type("") +if TYPE_CHECKING: + from typing import Iterable, Tuple BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut" @@ -147,36 +145,32 @@ class TestURL(HyperlinkTestCase): Tests for L{URL}. """ - def assertUnicoded(self, u): - # type: (URL) -> None + def assertUnicoded(self, u: URL) -> None: """ The given L{URL}'s components should be L{unicode}. @param u: The L{URL} to test. """ - self.assertTrue( - isinstance(u.scheme, unicode) or u.scheme is None, repr(u) - ) - self.assertTrue(isinstance(u.host, unicode) or u.host is None, repr(u)) + self.assertTrue(u.scheme is None or isinstance(u.scheme, str), repr(u)) + self.assertTrue(u.host is None or isinstance(u.host, str), repr(u)) for seg in u.path: - self.assertEqual(type(seg), unicode, repr(u)) - for (_k, v) in u.query: - self.assertEqual(type(seg), unicode, repr(u)) - self.assertTrue(v is None or isinstance(v, unicode), repr(u)) - self.assertEqual(type(u.fragment), unicode, repr(u)) + self.assertEqual(type(seg), str, repr(u)) + for _k, v in u.query: + self.assertEqual(type(seg), str, repr(u)) + self.assertTrue(v is None or isinstance(v, str), repr(u)) + self.assertEqual(type(u.fragment), str, repr(u)) def assertURL( self, - u, # type: URL - scheme, # type: Text - host, # type: Text - path, # type: Iterable[Text] - query, # type: Iterable[Tuple[Text, Optional[Text]]] - fragment, # type: Text - port, # type: Optional[int] - userinfo="", # type: Text - ): - # type: (...) -> None + u: URL, + scheme: str, + host: str, + path: Iterable[str], + query: Iterable[Tuple[str, Optional[str]]], + fragment: str, + port: Optional[int], + userinfo: str = "", + ) -> None: """ The given L{URL} should have the given components. @@ -216,14 +210,12 @@ def assertURL( ) self.assertEqual(actual, expected) - def test_initDefaults(self): - # type: () -> None + def test_initDefaults(self) -> None: """ L{URL} should have appropriate default values. """ - def check(u): - # type: (URL) -> None + def check(u: URL) -> None: self.assertUnicoded(u) self.assertURL(u, "http", "", [], [], "", 80, "") @@ -231,8 +223,7 @@ def check(u): check(URL("http", "", [], [])) check(URL("http", "", [], [], "")) - def test_init(self): - # type: () -> None + def test_init(self) -> None: """ L{URL} should accept L{unicode} parameters. """ @@ -250,8 +241,7 @@ def test_init(self): 80, ) - def test_initPercent(self): - # type: () -> None + def test_initPercent(self) -> None: """ L{URL} should accept (and not interpret) percent characters. """ @@ -261,8 +251,7 @@ def test_initPercent(self): u, "s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66", None ) - def test_repr(self): - # type: () -> None + def test_repr(self) -> None: """ L{URL.__repr__} will display the canonical form of the URL, wrapped in a L{URL.from_text} invocation, so that it is C{eval}-able but still @@ -281,8 +270,7 @@ def test_repr(self): "URL.from_text(%s)" % (repr("http://foo/bar?baz&k=v#frob"),), ) - def test_from_text(self): - # type: () -> None + def test_from_text(self) -> None: """ Round-tripping L{URL.from_text} with C{str} results in an equivalent URL. @@ -290,8 +278,7 @@ def test_from_text(self): urlpath = URL.from_text(BASIC_URL) self.assertEqual(BASIC_URL, urlpath.to_text()) - def test_roundtrip(self): - # type: () -> None + def test_roundtrip(self) -> None: """ L{URL.to_text} should invert L{URL.from_text}. """ @@ -299,8 +286,7 @@ def test_roundtrip(self): result = URL.from_text(test).to_text(with_password=True) self.assertEqual(test, result) - def test_roundtrip_double_iri(self): - # type: () -> None + def test_roundtrip_double_iri(self) -> None: for test in ROUNDTRIP_TESTS: url = URL.from_text(test) iri = url.to_iri() @@ -312,8 +298,7 @@ def test_roundtrip_double_iri(self): assert iri_text == double_iri_text return - def test_equality(self): - # type: () -> None + def test_equality(self) -> None: """ Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they decoded same URL string, and unequal (C{!=}) if they decoded different @@ -328,8 +313,7 @@ def test_equality(self): ), ) - def test_fragmentEquality(self): - # type: () -> None + def test_fragmentEquality(self) -> None: """ An URL created with the empty string for a fragment compares equal to an URL created with an unspecified fragment. @@ -340,8 +324,7 @@ def test_fragmentEquality(self): URL.from_text("http://localhost/"), ) - def test_child(self): - # type: () -> None + def test_child(self) -> None: """ L{URL.child} appends a new path segment, but does not affect the query or fragment. @@ -364,8 +347,7 @@ def test_child(self): urlpath.child("gong/double/").to_text(), ) - def test_multiChild(self): - # type: () -> None + def test_multiChild(self) -> None: """ L{URL.child} receives multiple segments as C{*args} and appends each in turn. @@ -375,8 +357,7 @@ def test_multiChild(self): url.child("c", "d", "e").to_text(), "http://example.com/a/b/c/d/e" ) - def test_childInitRoot(self): - # type: () -> None + def test_childInitRoot(self) -> None: """ L{URL.child} of a L{URL} without a path produces a L{URL} with a single path segment. @@ -385,16 +366,14 @@ def test_childInitRoot(self): self.assertTrue(childURL.rooted) self.assertEqual("http://www.foo.com/c", childURL.to_text()) - def test_emptyChild(self): - # type: () -> None + def test_emptyChild(self) -> None: """ L{URL.child} without any new segments returns the original L{URL}. """ url = URL(host="www.foo.com") self.assertEqual(url.child(), url) - def test_sibling(self): - # type: () -> None + def test_sibling(self) -> None: """ L{URL.sibling} of a L{URL} replaces the last path segment, but does not affect the query or fragment. @@ -412,8 +391,7 @@ def test_sibling(self): urlpath.sibling("sister").to_text(), ) - def test_click(self): - # type: () -> None + def test_click(self) -> None: """ L{URL.click} interprets the given string as a relative URI-reference and returns a new L{URL} interpreting C{self} as the base absolute URI. @@ -474,17 +452,15 @@ def test_click(self): u3 = u.click(u2) self.assertEqual(u3.to_text(), "http://localhost/foo/bar") - def test_clickRFC3986(self): - # type: () -> None + def test_clickRFC3986(self) -> None: """ L{URL.click} should correctly resolve the examples in RFC 3986. """ base = URL.from_text(relativeLinkBaseForRFC3986) - for (ref, expected) in relativeLinkTestsForRFC3986: + for ref, expected in relativeLinkTestsForRFC3986: self.assertEqual(base.click(ref).to_text(), expected) - def test_clickSchemeRelPath(self): - # type: () -> None + def test_clickSchemeRelPath(self) -> None: """ L{URL.click} should not accept schemes with relative paths. """ @@ -492,8 +468,7 @@ def test_clickSchemeRelPath(self): self.assertRaises(NotImplementedError, base.click, "g:h") self.assertRaises(NotImplementedError, base.click, "http:h") - def test_cloneUnchanged(self): - # type: () -> None + def test_cloneUnchanged(self) -> None: """ Verify that L{URL.replace} doesn't change any of the arguments it is passed. @@ -512,8 +487,7 @@ def test_cloneUnchanged(self): ) self.assertEqual(urlpath.replace(), urlpath) - def test_clickCollapse(self): - # type: () -> None + def test_clickCollapse(self) -> None: """ L{URL.click} collapses C{.} and C{..} according to RFC 3986 section 5.2.4. @@ -554,8 +528,7 @@ def test_clickCollapse(self): ), ) - def test_queryAdd(self): - # type: () -> None + def test_queryAdd(self) -> None: """ L{URL.add} adds query parameters. """ @@ -593,8 +566,7 @@ def test_queryAdd(self): urlpath.add("burp", "xxx").add("zot", "32").to_text(), ) - def test_querySet(self): - # type: () -> None + def test_querySet(self) -> None: """ L{URL.set} replaces query parameters by name. """ @@ -615,8 +587,7 @@ def test_querySet(self): urlpath.add("zot", "xxx").set("zot", "32").to_text(), ) - def test_queryRemove(self): - # type: () -> None + def test_queryRemove(self) -> None: """ L{URL.remove} removes instances of a query parameter. """ @@ -640,8 +611,7 @@ def test_queryRemove(self): URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3"), ) - def test_parseEqualSignInParamValue(self): - # type: () -> None + def test_parseEqualSignInParamValue(self) -> None: """ Every C{=}-sign after the first in a query parameter is simply included in the value of the parameter. @@ -661,31 +631,27 @@ def test_parseEqualSignInParamValue(self): # assert that the equals is not unnecessarily escaped self.assertEqual(iri.to_uri().get("operator"), ["="]) - def test_empty(self): - # type: () -> None + def test_empty(self) -> None: """ An empty L{URL} should serialize as the empty string. """ self.assertEqual(URL().to_text(), "") - def test_justQueryText(self): - # type: () -> None + def test_justQueryText(self) -> None: """ An L{URL} with query text should serialize as just query text. """ u = URL(query=[("hello", "world")]) self.assertEqual(u.to_text(), "?hello=world") - def test_identicalEqual(self): - # type: () -> None + def test_identicalEqual(self) -> None: """ L{URL} compares equal to itself. """ u = URL.from_text("http://localhost/") self.assertEqual(u, u) - def test_similarEqual(self): - # type: () -> None + def test_similarEqual(self) -> None: """ URLs with equivalent components should compare equal. """ @@ -693,8 +659,7 @@ def test_similarEqual(self): u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertEqual(u1, u2) - def test_differentNotEqual(self): - # type: () -> None + def test_differentNotEqual(self) -> None: """ L{URL}s that refer to different resources are both unequal (C{!=}) and also not equal (not C{==}). @@ -704,8 +669,7 @@ def test_differentNotEqual(self): self.assertFalse(u1 == u2, "%r != %r" % (u1, u2)) self.assertNotEqual(u1, u2) - def test_otherTypesNotEqual(self): - # type: () -> None + def test_otherTypesNotEqual(self) -> None: """ L{URL} is not equal (C{==}) to other types. """ @@ -715,16 +679,14 @@ def test_otherTypesNotEqual(self): self.assertNotEqual(u, 42) self.assertNotEqual(u, object()) - def test_identicalNotUnequal(self): - # type: () -> None + def test_identicalNotUnequal(self) -> None: """ Identical L{URL}s are not unequal (C{!=}) to each other. """ u = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertFalse(u != u, "%r == itself" % u) - def test_similarNotUnequal(self): - # type: () -> None + def test_similarNotUnequal(self) -> None: """ Structurally similar L{URL}s are not unequal (C{!=}) to each other. """ @@ -732,8 +694,7 @@ def test_similarNotUnequal(self): u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertFalse(u1 != u2, "%r == %r" % (u1, u2)) - def test_differentUnequal(self): - # type: () -> None + def test_differentUnequal(self) -> None: """ Structurally different L{URL}s are unequal (C{!=}) to each other. """ @@ -741,8 +702,7 @@ def test_differentUnequal(self): u2 = URL.from_text("http://localhost/b") self.assertTrue(u1 != u2, "%r == %r" % (u1, u2)) - def test_otherTypesUnequal(self): - # type: () -> None + def test_otherTypesUnequal(self) -> None: """ L{URL} is unequal (C{!=}) to other types. """ @@ -750,8 +710,7 @@ def test_otherTypesUnequal(self): self.assertTrue(u != 42, "URL must differ from a number.") self.assertTrue(u != object(), "URL must be differ from an object.") - def test_asURI(self): - # type: () -> None + def test_asURI(self) -> None: """ L{URL.asURI} produces an URI which converts any URI unicode encoding into pure US-ASCII and returns a new L{URL}. @@ -776,8 +735,7 @@ def test_asURI(self): actualURI, expectedURI, "%r != %r" % (actualURI, expectedURI) ) - def test_asIRI(self): - # type: () -> None + def test_asIRI(self) -> None: """ L{URL.asIRI} decodes any percent-encoded text in the URI, making it more suitable for reading by humans, and returns a new L{URL}. @@ -800,8 +758,7 @@ def test_asIRI(self): actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI) ) - def test_badUTF8AsIRI(self): - # type: () -> None + def test_badUTF8AsIRI(self) -> None: """ Bad UTF-8 in a path segment, query parameter, or fragment results in that portion of the URI remaining percent-encoded in the IRI. @@ -819,8 +776,7 @@ def test_badUTF8AsIRI(self): actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI) ) - def test_alreadyIRIAsIRI(self): - # type: () -> None + def test_alreadyIRIAsIRI(self) -> None: """ A L{URL} composed of non-ASCII text will result in non-ASCII text. """ @@ -835,8 +791,7 @@ def test_alreadyIRIAsIRI(self): alsoIRI = iri.asIRI() self.assertEqual(alsoIRI.to_text(), unicodey) - def test_alreadyURIAsURI(self): - # type: () -> None + def test_alreadyURIAsURI(self) -> None: """ A L{URL} composed of encoded text will remain encoded. """ @@ -845,8 +800,7 @@ def test_alreadyURIAsURI(self): actualURI = uri.asURI().to_text() self.assertEqual(actualURI, expectedURI) - def test_userinfo(self): - # type: () -> None + def test_userinfo(self) -> None: """ L{URL.from_text} will parse the C{userinfo} portion of the URI separately from the host and port. @@ -868,8 +822,7 @@ def test_userinfo(self): "http://someuser@example.com/some-segment@ignore", ) - def test_portText(self): - # type: () -> None + def test_portText(self) -> None: """ L{URL.from_text} parses custom port numbers as integers. """ @@ -877,8 +830,7 @@ def test_portText(self): self.assertEqual(portURL.port, 8080) self.assertEqual(portURL.to_text(), "http://www.example.com:8080/") - def test_mailto(self): - # type: () -> None + def test_mailto(self) -> None: """ Although L{URL} instances are mainly for dealing with HTTP, other schemes (such as C{mailto:}) should work as well. For example, @@ -890,8 +842,7 @@ def test_mailto(self): "mailto:user@example.com", ) - def test_httpWithoutHost(self): - # type: () -> None + def test_httpWithoutHost(self) -> None: """ An HTTP URL without a hostname, but with a path, should also round-trip cleanly. @@ -902,8 +853,7 @@ def test_httpWithoutHost(self): self.assertEqual(without_host.uses_netloc, False) self.assertEqual(without_host.to_text(), "http:relative-path") - def test_queryIterable(self): - # type: () -> None + def test_queryIterable(self) -> None: """ When a L{URL} is created with a C{query} argument, the C{query} argument is converted into an N-tuple of 2-tuples, sensibly @@ -915,8 +865,7 @@ def test_queryIterable(self): url = URL(query={"alpha": "beta"}) self.assertEqual(url.query, expected) - def test_pathIterable(self): - # type: () -> None + def test_pathIterable(self) -> None: """ When a L{URL} is created with a C{path} argument, the C{path} is converted into a tuple. @@ -924,8 +873,7 @@ def test_pathIterable(self): url = URL(path=["hello", "world"]) self.assertEqual(url.path, ("hello", "world")) - def test_invalidArguments(self): - # type: () -> None + def test_invalidArguments(self) -> None: """ Passing an argument of the wrong type to any of the constructor arguments of L{URL} will raise a descriptive L{TypeError}. @@ -937,18 +885,15 @@ def test_invalidArguments(self): """ class Unexpected(object): - def __str__(self): - # type: () -> str + def __str__(self) -> str: return "wrong" - def __repr__(self): - # type: () -> str + def __repr__(self) -> str: return "" - defaultExpectation = "unicode" if bytes is str else "str" + defaultExpectation = "str" - def assertRaised(raised, expectation, name): - # type: (Any, Text, Text) -> None + def assertRaised(raised: Any, expectation: str, name: str) -> None: self.assertEqual( str(raised.exception), "expected {0} for {1}, got {2}".format( @@ -956,8 +901,7 @@ def assertRaised(raised, expectation, name): ), ) - def check(param, expectation=defaultExpectation): - # type: (Any, str) -> None + def check(param: Any, expectation: str = defaultExpectation) -> None: with self.assertRaises(TypeError) as raised: URL(**{param: Unexpected()}) # type: ignore[arg-type] @@ -971,46 +915,45 @@ def check(param, expectation=defaultExpectation): check("port", "int or NoneType") with self.assertRaises(TypeError) as raised: - URL(path=[cast(Text, Unexpected())]) + URL(path=[cast(str, Unexpected())]) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: - URL(query=[("name", cast(Text, Unexpected()))]) + URL(query=[("name", cast(str, Unexpected()))]) assertRaised( raised, defaultExpectation + " or NoneType", "query parameter value" ) with self.assertRaises(TypeError) as raised: - URL(query=[(cast(Text, Unexpected()), "value")]) + URL(query=[(cast(str, Unexpected()), "value")]) assertRaised(raised, defaultExpectation, "query parameter name") # No custom error message for this one, just want to make sure # non-2-tuples don't get through. with self.assertRaises(TypeError): - URL(query=[cast(Tuple[Text, Text], Unexpected())]) + URL(query=[cast("Tuple[str, str]", Unexpected())]) with self.assertRaises(ValueError): - URL(query=[cast(Tuple[Text, Text], ("k", "v", "vv"))]) + URL(query=[cast("Tuple[str, str]", ("k", "v", "vv"))]) with self.assertRaises(ValueError): - URL(query=[cast(Tuple[Text, Text], ("k",))]) + URL(query=[cast("Tuple[str, str]", ("k",))]) url = URL.from_text("https://valid.example.com/") with self.assertRaises(TypeError) as raised: - url.child(cast(Text, Unexpected())) + url.child(cast(str, Unexpected())) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: - url.sibling(cast(Text, Unexpected())) + url.sibling(cast(str, Unexpected())) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: - url.click(cast(Text, Unexpected())) + url.click(cast(str, Unexpected())) assertRaised(raised, defaultExpectation, "relative URL") - def test_technicallyTextIsIterableBut(self): - # type: () -> None + def test_technicallyTextIsIterableBut(self) -> None: """ Technically, L{str} (or L{unicode}, as appropriate) is iterable, but C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what @@ -1023,8 +966,7 @@ def test_technicallyTextIsIterableBut(self): "expected iterable of text for path, not: {0}".format(repr("foo")), ) - def test_netloc(self): - # type: () -> None + def test_netloc(self) -> None: url = URL(scheme="https") self.assertEqual(url.uses_netloc, True) self.assertEqual(url.to_text(), "https://") @@ -1057,16 +999,14 @@ def test_netloc(self): url = URL.from_text("ztp:test:com") self.assertEqual(url.uses_netloc, False) - def test_ipv6_with_port(self): - # type: () -> None + def test_ipv6_with_port(self) -> None: t = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/" url = URL.from_text(t) assert url.host == "2001:0db8:85a3:0000:0000:8a2e:0370:7334" assert url.port == 80 assert SCHEME_PORT_MAP[url.scheme] != url.port - def test_basic(self): - # type: () -> None + def test_basic(self) -> None: text = "https://user:pass@example.com/path/to/here?k=v#nice" url = URL.from_text(text) assert url.scheme == "https" @@ -1089,16 +1029,13 @@ def test_basic(self): assert url.host == "::1" assert url.path == ("path", "to", "here") - def test_invalid_url(self): - # type: () -> None + def test_invalid_url(self) -> None: self.assertRaises(URLParseError, URL.from_text, "#\n\n") - def test_invalid_authority_url(self): - # type: () -> None + def test_invalid_authority_url(self) -> None: self.assertRaises(URLParseError, URL.from_text, "http://abc:\n\n/#") - def test_invalid_ipv6(self): - # type: () -> None + def test_invalid_ipv6(self) -> None: invalid_ipv6_ips = [ "2001::0234:C1ab::A0:aabc:003F", "2001::1::3F", @@ -1111,8 +1048,7 @@ def test_invalid_ipv6(self): self.assertRaises(socket.error, inet_pton, socket.AF_INET6, ip) self.assertRaises(URLParseError, URL.from_text, url_text) - def test_invalid_port(self): - # type: () -> None + def test_invalid_port(self) -> None: self.assertRaises(URLParseError, URL.from_text, "ftp://portmouth:smash") self.assertRaises( ValueError, @@ -1120,8 +1056,7 @@ def test_invalid_port(self): "http://reader.googlewebsite.com:neverforget", ) - def test_idna(self): - # type: () -> None + def test_idna(self) -> None: u1 = URL.from_text("http://bücher.ch") self.assertEqual(u1.host, "bücher.ch") self.assertEqual(u1.to_text(), "http://bücher.ch") @@ -1132,8 +1067,7 @@ def test_idna(self): self.assertEqual(u2.to_text(), "https://xn--bcher-kva.ch") self.assertEqual(u2.to_iri().to_text(), "https://bücher.ch") - def test_netloc_slashes(self): - # type: () -> None + def test_netloc_slashes(self) -> None: # basic sanity checks url = URL.from_text("mailto:mahmoud@hatnote.com") @@ -1191,8 +1125,7 @@ def test_netloc_slashes(self): return - def test_rooted_to_relative(self): - # type: () -> None + def test_rooted_to_relative(self) -> None: """ On host-relative URLs, the C{rooted} flag can be updated to indicate that the path should no longer be treated as absolute. @@ -1203,8 +1136,7 @@ def test_rooted_to_relative(self): self.assertEqual(b.to_text(), "/hello") self.assertNotEqual(a, b) - def test_autorooted(self): - # type: () -> None + def test_autorooted(self) -> None: """ The C{rooted} flag can be updated in some cases, but it cannot be made to conflict with other facts surrounding the URL; for example, all URLs @@ -1226,8 +1158,7 @@ def test_autorooted(self): self.assertEqual(normal_absolute.rooted, True) self.assertEqual(attempt_unrooted_absolute.rooted, True) - def test_rooted_with_port_but_no_host(self): - # type: () -> None + def test_rooted_with_port_but_no_host(self) -> None: """ URLs which include a ``://`` netloc-separator for any reason are inherently rooted, regardless of the value or presence of the @@ -1253,8 +1184,7 @@ def test_rooted_with_port_but_no_host(self): self.assertEqual(directly_constructed_implict, parsed) self.assertEqual(directly_constructed_rooted, parsed) - def test_wrong_constructor(self): - # type: () -> None + def test_wrong_constructor(self) -> None: with self.assertRaises(ValueError): # whole URL not allowed URL(BASIC_URL) @@ -1262,8 +1192,7 @@ def test_wrong_constructor(self): # explicitly bad scheme not allowed URL("HTTP_____more_like_imHoTTeP") - def test_encoded_userinfo(self): - # type: () -> None + def test_encoded_userinfo(self) -> None: url = URL.from_text("http://user:pass@example.com") assert url.userinfo == "user:pass" url = url.replace(userinfo="us%20her:pass") @@ -1277,8 +1206,7 @@ def test_encoded_userinfo(self): == "http://us%20her:pass@example.com" ) - def test_hash(self): - # type: () -> None + def test_hash(self) -> None: url_map = {} url1 = URL.from_text("http://blog.hatnote.com/ask?utm_source=geocity") assert hash(url1) == hash(url1) # sanity @@ -1295,8 +1223,7 @@ def test_hash(self): assert hash(URL()) == hash(URL()) # slightly more sanity - def test_dir(self): - # type: () -> None + def test_dir(self) -> None: url = URL() res = dir(url) @@ -1307,16 +1234,13 @@ def test_dir(self): assert "asURI" not in res assert "asIRI" not in res - def test_twisted_compat(self): - # type: () -> None + def test_twisted_compat(self) -> None: url = URL.fromText("http://example.com/a%20té%C3%A9st") assert url.asText() == "http://example.com/a%20té%C3%A9st" assert url.asURI().asText() == "http://example.com/a%20t%C3%A9%C3%A9st" # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést' - def test_set_ordering(self): - # type: () -> None - + def test_set_ordering(self) -> None: # TODO url = URL.from_text("http://example.com/?a=b&c") url = url.set("x", "x") @@ -1325,8 +1249,7 @@ def test_set_ordering(self): # Would expect: # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y' - def test_schemeless_path(self): - # type: () -> None + def test_schemeless_path(self) -> None: "See issue #4" u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob") u2 = URL.from_text(u1.to_text()) @@ -1344,24 +1267,21 @@ def test_schemeless_path(self): u6 = URL.from_text(u5.to_text()).to_uri() assert u5 == u6 # colons stay decoded bc they're not in the first seg - def test_emoji_domain(self): - # type: () -> None + def test_emoji_domain(self) -> None: "See issue #7, affecting only narrow builds (2.6-3.3)" url = URL.from_text("https://xn--vi8hiv.ws") iri = url.to_iri() iri.to_text() # as long as we don't get ValueErrors, we're good - def test_delim_in_param(self): - # type: () -> None + def test_delim_in_param(self) -> None: "Per issue #6 and #8" self.assertRaises(ValueError, URL, scheme="http", host="a/c") self.assertRaises(ValueError, URL, path=("?",)) self.assertRaises(ValueError, URL, path=("#",)) self.assertRaises(ValueError, URL, query=(("&", "test"))) - def test_empty_paths_eq(self): - # type: () -> None + def test_empty_paths_eq(self) -> None: u1 = URL.from_text("http://example.com/") u2 = URL.from_text("http://example.com") @@ -1382,15 +1302,12 @@ def test_empty_paths_eq(self): assert u1 == u2 - def test_from_text_type(self): - # type: () -> None + def test_from_text_type(self) -> None: assert URL.from_text("#ok").fragment == "ok" # sanity self.assertRaises(TypeError, URL.from_text, b"bytes://x.y.z") self.assertRaises(TypeError, URL.from_text, object()) - def test_from_text_bad_authority(self): - # type: () -> None - + def test_from_text_bad_authority(self) -> None: # bad ipv6 brackets self.assertRaises(URLParseError, URL.from_text, "http://[::1/") self.assertRaises(URLParseError, URL.from_text, "http://::1]/") @@ -1404,8 +1321,7 @@ def test_from_text_bad_authority(self): # extra port colon (makes for an invalid host) self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1::80") - def test_normalize(self): - # type: () -> None + def test_normalize(self) -> None: url = URL.from_text("HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64") assert url.get("Bb") == [] assert url.get("B%62") == ["C%63"] @@ -1461,24 +1377,17 @@ def test_normalize(self): == "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25" ) - def test_str(self): - # type: () -> None - + def test_str(self) -> None: # see also issue #49 text = "http://example.com/á/y%20a%20y/?b=%25" url = URL.from_text(text) - assert unicode(url) == text + assert str(url) == text assert bytes(url) == b"http://example.com/%C3%A1/y%20a%20y/?b=%25" - if PY2: - assert isinstance(str(url), bytes) - assert isinstance(unicode(url), unicode) - else: - assert isinstance(str(url), unicode) - assert isinstance(bytes(url), bytes) + assert isinstance(str(url), str) + assert isinstance(bytes(url), bytes) - def test_idna_corners(self): - # type: () -> None + def test_idna_corners(self) -> None: url = URL.from_text("http://abé.com/") assert url.to_iri().host == "abé.com" assert url.to_uri().host == "xn--ab-cja.com" diff --git a/tox.ini b/tox.ini index 8865d178..50d47e7f 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ envlist = flake8, black, mypy - test-py{26,27,34,35,36,37,38,39,py2,py3} + test-py{310,311,312,313,314,py3} coverage_report docs packaging @@ -12,10 +12,9 @@ skip_missing_interpreters = {tty:True:False} [default] -basepython = python3.9 +basepython = python3.14 deps = - idna==2.9 # rq.filter: <3 setenv = PY_MODULE=hyperlink @@ -34,38 +33,22 @@ description = run tests basepython = py: python - py26: python2.6 - py27: python2.7 - py34: python3.4 - py35: python3.5 - py36: python3.6 - py37: python3.7 - py38: python3.8 - py39: python3.9 - py310: python3.10 - - pypy2: pypy - pypy3: pypy3 + py10: python3.10 + py11: python3.11 + py12: python3.12 + py13: python3.13 + py14: python3.14 deps = - {[default]deps} - - # In Python 2, we need to pull in typing, mock - py{26,27,py2}: typing==3.10.0.0 - py{26,27,py2}: mock==3.0.5 # rq.filter: <4 - # For pytest - py{26,27,34,py2}: pytest==4.6.11 # rq.filter: <5 - py{35,36,37,38,39,py3}: pytest==5.2.4 + py{310,311,312,313,314}: pytest==9.0.2 # For code coverage {[testenv:coverage_report]deps} - py{26,27,34,py2}: pytest-cov==2.8.1 # rq.filter: <2.9 - py{35,36,37,38,39,py3}: pytest-cov==2.10.1 + py{310,311,312,313,314}: pytest-cov==7.0.0 - # For hypothesis. Note Python 3.4 isn't supported by hypothesis. - py{26,27,py2}: hypothesis==4.43.9 # rq.filter: <4.44 - py{35,36,37,38,39,py3}: hypothesis==5.8.6 + # For hypothesis. + py{310,311,312,313,314}: hypothesis==6.151.9 setenv = {[default]setenv} @@ -93,7 +76,7 @@ basepython = {[default]basepython} skip_install = True deps = - black==21.7b0 + black==26.1.0 setenv = BLACK_LINT_ARGS=--check @@ -124,13 +107,13 @@ basepython = {[default]basepython} skip_install = True deps = - flake8-bugbear==21.4.3 - flake8==3.9.2 - mccabe==0.6.1 - pep8-naming==0.12.1 - pycodestyle==2.7.0 - pydocstyle==6.1.1 - pyflakes==2.3.1 + flake8-bugbear==25.11.29 + flake8==7.3.0 + mccabe==0.7.0 + pep8-naming==0.15.1 + pycodestyle==2.14.0 + pydocstyle==6.3.0 + pyflakes==3.4.0 commands = flake8 {posargs:setup.py src/{env:PY_MODULE}} @@ -187,8 +170,8 @@ description = run Mypy (static type checker) basepython = {[default]basepython} deps = - mypy==0.910 - types-mock==0.1.5 + mypy==1.19.1 + types-mock==5.2.0.20250924 {[default]deps} @@ -244,15 +227,14 @@ ignore_missing_imports = True description = generate coverage report -depends = test-py{26,27,34,35,36,37,38,39,py2,py3} +depends = test-py{310,311,312,313,314} basepython = {[default]basepython} skip_install = True deps = - # coverage 5.0 drops Python 3.4 support - coverage==4.5.4 # rq.filter: <5 + coverage==7.13.4 setenv = {[default]setenv} @@ -281,7 +263,7 @@ skip_install = True deps = {[testenv:coverage_report]deps} - codecov==2.1.12 + codecov passenv = # See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox @@ -323,8 +305,8 @@ description = build documentation basepython = {[default]basepython} deps = - Sphinx==4.1.2 - sphinx-rtd-theme==0.5.2 + Sphinx==9.1.0 + sphinx-rtd-theme==3.1.0 commands = sphinx-build \ @@ -341,7 +323,7 @@ basepython = {[default]basepython} deps = {[testenv:docs]deps} - sphinx-autobuild==2021.3.14 + sphinx-autobuild==2025.8.25 commands = sphinx-autobuild \ @@ -364,9 +346,9 @@ basepython = {[default]basepython} skip_install = True deps = - check-manifest==0.46 - readme-renderer==29.0 - twine==3.4.2 + check-manifest==0.51 + readme-renderer==44.0 + twine==6.2.0 commands = check-manifest From 10ea0a6e508db243491086861181b9cf29e80d14 Mon Sep 17 00:00:00 2001 From: mvaught Date: Thu, 26 Feb 2026 15:00:54 -0500 Subject: [PATCH 2/4] lint and formatting --- setup.py | 1 - src/hyperlink/_url.py | 98 +++++++++++++++++++++++-------- src/hyperlink/hypothesis.py | 35 +++++++++-- src/hyperlink/test/__init__.py | 5 +- src/hyperlink/test/test_common.py | 24 +++----- 5 files changed, 114 insertions(+), 49 deletions(-) diff --git a/setup.py b/setup.py index 4ee2a1c1..369d91e4 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,6 @@ from setuptools import find_packages, setup - __author__ = "Mahmoud Hashemi and Glyph Lefkowitz" __version__ = "21.0.1dev" __contact__ = "mahmoud@hatnote.com" diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py index fcd845cd..8b375d0c 100644 --- a/src/hyperlink/_url.py +++ b/src/hyperlink/_url.py @@ -1,4 +1,5 @@ from __future__ import annotations + # -*- coding: utf-8 -*- """Hyperlink provides Pythonic URL parsing, construction, and rendering. @@ -31,6 +32,7 @@ cast, TYPE_CHECKING, ) + if TYPE_CHECKING: from typing import ( Type, @@ -43,8 +45,11 @@ TypeVar, Union, ) + NoneType: Type[None] = type(None) - QueryPairs = Tuple[Tuple[str, Optional[str]], ...] # internal representation + QueryPairs = Tuple[ + Tuple[str, Optional[str]], ... + ] # internal representation QueryParameters = Union[ Mapping[str, Optional[str]], QueryPairs, @@ -177,7 +182,9 @@ def __bool__(self) -> bool: _QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE -def _make_decode_map(delims: Iterable[str], allow_percent: bool = False) -> Mapping[bytes, bytes]: +def _make_decode_map( + delims: Iterable[str], allow_percent: bool = False +) -> Mapping[bytes, bytes]: ret = dict(_HEX_CHAR_MAP) if not allow_percent: delims = set(delims) | set(["%"]) @@ -262,9 +269,11 @@ def _encode_schemeless_path_part(text: str, maximal: bool = True) -> str: return "".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) return "".join( [ - _SCHEMELESS_PATH_PART_QUOTE_MAP[t] - if t in _SCHEMELESS_PATH_DELIMS - else t + ( + _SCHEMELESS_PATH_PART_QUOTE_MAP[t] + if t in _SCHEMELESS_PATH_DELIMS + else t + ) for t in text ] ) @@ -449,7 +458,10 @@ def _encode_userinfo_part(text: str, maximal: bool = True) -> str: def register_scheme( - text: str, uses_netloc: bool = True, default_port: Optional[int] = None, query_plus_is_space: bool = True + text: str, + uses_netloc: bool = True, + default_port: Optional[int] = None, + query_plus_is_space: bool = True, ) -> None: """Registers new scheme information, resulting in correct port and slash behavior from the URL object. There are dozens of standard @@ -499,7 +511,9 @@ def register_scheme( return -def scheme_uses_netloc(scheme: str, default: Optional[bool] = None) -> Optional[bool]: +def scheme_uses_netloc( + scheme: str, default: Optional[bool] = None +) -> Optional[bool]: """Whether or not a URL uses :code:`:` or :code:`://` to separate the scheme from the rest of the URL depends on the scheme's own standard definition. There is no way to infer this behavior @@ -560,7 +574,12 @@ def _typecheck(name: str, value: T, *types: Type[Any]) -> T: return value -def _textcheck(name: str, value: T, delims: Iterable[str] = frozenset(), nullable: bool = False) -> T: +def _textcheck( + name: str, + value: T, + delims: Iterable[str] = frozenset(), + nullable: bool = False, +) -> T: if not isinstance(value, str): if nullable and value is None: # used by query string values @@ -590,7 +609,9 @@ def iter_pairs(iterable: Iterable[Any]) -> Iterator[Any]: return iter(iterable) -def _decode_unreserved(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: +def _decode_unreserved( + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -610,7 +631,9 @@ def _decode_userinfo_part( ) -def _decode_path_part(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: +def _decode_path_part( + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: """ >>> _decode_path_part(u'%61%77%2f%7a') u'aw%2fz' @@ -625,7 +648,9 @@ def _decode_path_part(text: str, normalize_case: bool = False, encode_stray_perc ) -def _decode_query_key(text: str, normalize_case: bool = False, encode_stray_percents: bool = False) -> str: +def _decode_query_key( + text: str, normalize_case: bool = False, encode_stray_percents: bool = False +) -> str: return _percent_decode( text, normalize_case=normalize_case, @@ -1154,7 +1179,9 @@ def authority(self, with_password: bool = False, **kw: Any) -> str: # first, a bit of twisted compat with_password = kw.pop("includeSecrets", with_password) if kw: - raise TypeError("got unexpected keyword arguments: %r" % list(kw.keys())) + raise TypeError( + "got unexpected keyword arguments: %r" % list(kw.keys()) + ) host = self.host if ":" in host: hostport = ["[" + host + "]"] @@ -1612,9 +1639,11 @@ def to_uri(self) -> URL: [ ( _encode_query_key(k, maximal=True), - _encode_query_value(v, maximal=True) - if v is not None - else None, + ( + _encode_query_value(v, maximal=True) + if v is not None + else None + ), ) for k, v in self.query ] @@ -1954,7 +1983,12 @@ class DecodedURL(object): .. versionadded:: 18.0.0 """ - def __init__(self, url: URL = _EMPTY_URL, lazy: bool = False, query_plus_is_space: Optional[bool] = None) -> None: + def __init__( + self, + url: URL = _EMPTY_URL, + lazy: bool = False, + query_plus_is_space: Optional[bool] = None, + ) -> None: self._url = url if query_plus_is_space is None: query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES @@ -1962,11 +1996,16 @@ def __init__(self, url: URL = _EMPTY_URL, lazy: bool = False, query_plus_is_spac if not lazy: # cache the following, while triggering any decoding # issues with decodable fields - self.host, self.userinfo, self.path, self.query, self.fragment + _ = (self.host, self.userinfo, self.path, self.query, self.fragment) return @classmethod - def from_text(cls, text: str, lazy: bool = False, query_plus_is_space: Optional[bool] = None) -> DecodedURL: + def from_text( + cls, + text: str, + lazy: bool = False, + query_plus_is_space: Optional[bool] = None, + ) -> DecodedURL: """\ Make a `DecodedURL` instance from any text string containing a URL. @@ -2096,11 +2135,13 @@ def query(self) -> QueryPairs: "QueryPairs", tuple( tuple( - _percent_decode( - predecode(x), raise_subencoding_exc=True + ( + _percent_decode( + predecode(x), raise_subencoding_exc=True + ) + if x is not None + else None ) - if x is not None - else None for x in (k, v) ) for k, v in self._url.query @@ -2302,20 +2343,27 @@ def __dir__(self) -> Sequence[str]: # Add some overloads so that parse gives a better return value. if TYPE_CHECKING: + @overload def parse(url: str, decoded: Literal[False], lazy: bool = False) -> URL: """Passing decoded=False returns URL.""" @overload - def parse(url: str, decoded: Literal[True] = True, lazy: bool = False) -> DecodedURL: + def parse( + url: str, decoded: Literal[True] = True, lazy: bool = False + ) -> DecodedURL: """Passing decoded=True (or the default value) returns DecodedURL.""" @overload - def parse(url: str, decoded: bool = True, lazy: bool = False) -> Union[URL, DecodedURL]: + def parse( + url: str, decoded: bool = True, lazy: bool = False + ) -> Union[URL, DecodedURL]: """If decoded is not a literal we don't know the return type.""" -def parse(url: str, decoded: bool = True, lazy: bool = False) -> Union[URL, DecodedURL]: +def parse( + url: str, decoded: bool = True, lazy: bool = False +) -> Union[URL, DecodedURL]: """ Automatically turn text into a structured URL object. diff --git a/src/hyperlink/hypothesis.py b/src/hyperlink/hypothesis.py index 1a86dfd7..9fdb1ce6 100644 --- a/src/hyperlink/hypothesis.py +++ b/src/hyperlink/hypothesis.py @@ -111,7 +111,9 @@ def idna_characters() -> str: _idnaCharacters: str = "" @composite - def idna_text(draw: DrawCallable, min_size: int = 1, max_size: Optional[int] = None) -> str: + def idna_text( + draw: DrawCallable, min_size: int = 1, max_size: Optional[int] = None + ) -> str: """ A strategy which generates IDNA-encodable text. @@ -202,7 +204,11 @@ def hostname_labels(draw: DrawCallable, allow_idn: bool = True) -> str: return label @composite - def hostnames(draw: DrawCallable, allow_leading_digit: bool = True, allow_idn: bool = True) -> str: + def hostnames( + draw: DrawCallable, + allow_leading_digit: bool = True, + allow_idn: bool = True, + ) -> str: """ A strategy which generates host names. @@ -218,8 +224,10 @@ def hostnames(draw: DrawCallable, allow_leading_digit: bool = True, allow_idn: b str, draw( hostname_labels(allow_idn=allow_idn).filter( - lambda l: ( - True if allow_leading_digit else l[0] not in digits + lambda label: ( + True + if allow_leading_digit + else label[0] not in digits ) ) ), @@ -239,7 +247,24 @@ def hostnames(draw: DrawCallable, allow_leading_digit: bool = True, allow_idn: b # Trim off labels until the total host name length fits in 252 # characters. This avoids having to filter the data. - while sum(len(label) for label in labels) + len(labels) - 1 > 252: + # For IDNs, the length must also be checked after Punycode encoding, + # because the encoded form may be much longer due to the 'xn--' prefix + # and Unicode-to-ASCII expansion. This ensures compliance with RFC 1035 + # and IDNA's 255-byte limit. + + def get_len(lbls: list[str]) -> int: + d = ".".join(lbls) + if allow_idn: + try: + return len(idna_encode(d)) + except IDNAError: + # Encoding failed due to length or invalidity. + # Return large number to force trimming. + return 9999 + return len(d) + + # Trim off labels until total hostname length fits IDNA/DNS limits + while get_len(labels) > 253 and len(labels) > 1: labels = labels[:-1] return ".".join(labels) diff --git a/src/hyperlink/test/__init__.py b/src/hyperlink/test/__init__.py index e10ca70f..e2b878e7 100644 --- a/src/hyperlink/test/__init__.py +++ b/src/hyperlink/test/__init__.py @@ -3,11 +3,12 @@ Tests for hyperlink """ +from __future__ import annotations + __all = () -def _init_hypothesis(): - # type: () -> None +def _init_hypothesis() -> None: from os import environ if "CI" in environ: diff --git a/src/hyperlink/test/test_common.py b/src/hyperlink/test/test_common.py index 16a41118..513776b4 100644 --- a/src/hyperlink/test/test_common.py +++ b/src/hyperlink/test/test_common.py @@ -34,8 +34,7 @@ def test_assertRaisesWithCallable(self): """ called_with = [] - def raisesExpected(*args, **kwargs): - # type: (Any, Any) -> None + def raisesExpected(*args: Any, **kwargs: Any) -> None: called_with.append((args, kwargs)) raise _ExpectedException @@ -44,15 +43,13 @@ def raisesExpected(*args, **kwargs): ) self.assertEqual(called_with, [((1,), {"keyword": True})]) - def test_assertRaisesWithCallableUnexpectedException(self): - # type: () -> None + def test_assertRaisesWithCallableUnexpectedException(self) -> None: """When given a callable that raises an unexpected exception, HyperlinkTestCase.assertRaises raises that exception. """ - def doesNotRaiseExpected(*args, **kwargs): - # type: (Any, Any) -> None + def doesNotRaiseExpected(*args: Any, **kwargs: Any) -> None: raise _UnexpectedException try: @@ -62,15 +59,13 @@ def doesNotRaiseExpected(*args, **kwargs): except _UnexpectedException: pass - def test_assertRaisesWithCallableDoesNotRaise(self): - # type: () -> None + def test_assertRaisesWithCallableDoesNotRaise(self) -> None: """HyperlinkTestCase.assertRaises raises an AssertionError when given a callable that, when called, does not raise any exception. """ - def doesNotRaise(*args, **kwargs): - # type: (Any, Any) -> None + def doesNotRaise(*args: Any, **kwargs: Any) -> None: pass try: @@ -78,8 +73,7 @@ def doesNotRaise(*args, **kwargs): except AssertionError: pass - def test_assertRaisesContextManager(self): - # type: () -> None + def test_assertRaisesContextManager(self) -> None: """HyperlinkTestCase.assertRaises does not raise an AssertionError when used as a context manager with a suite that raises the expected exception. The context manager stores the exception @@ -93,8 +87,7 @@ def test_assertRaisesContextManager(self): isinstance(cm.exception, _ExpectedException) ) - def test_assertRaisesContextManagerUnexpectedException(self): - # type: () -> None + def test_assertRaisesContextManagerUnexpectedException(self) -> None: """When used as a context manager with a block that raises an unexpected exception, HyperlinkTestCase.assertRaises raises that unexpected exception. @@ -106,8 +99,7 @@ def test_assertRaisesContextManagerUnexpectedException(self): except _UnexpectedException: pass - def test_assertRaisesContextManagerDoesNotRaise(self): - # type: () -> None + def test_assertRaisesContextManagerDoesNotRaise(self) -> None: """HyperlinkTestcase.assertRaises raises an AssertionError when used as a context manager with a block that does not raise any exception. From 400b880efb79e33bb506bcc02d1e39fc163ee327 Mon Sep 17 00:00:00 2001 From: mvaught Date: Thu, 26 Feb 2026 15:04:59 -0500 Subject: [PATCH 3/4] update TODO based on changes --- TODO.md | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.md b/TODO.md index f5d2fdda..742fcdbb 100644 --- a/TODO.md +++ b/TODO.md @@ -4,7 +4,6 @@ just when query string is present) * Polish logo * Get coverage up -* Switch off ctypes/socket for IP validation * rebase method for path (prepends to path) * sibling() should be maximal=False like child() * make subencoding an exposed parameter From 07c99d22a35680aa4740f7b96945831859e59225 Mon Sep 17 00:00:00 2001 From: mvaught Date: Thu, 26 Feb 2026 15:32:30 -0500 Subject: [PATCH 4/4] finish test cleanup --- src/hyperlink/test/test_common.py | 2 ++ tox.ini | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/hyperlink/test/test_common.py b/src/hyperlink/test/test_common.py index 513776b4..05c7fcd2 100644 --- a/src/hyperlink/test/test_common.py +++ b/src/hyperlink/test/test_common.py @@ -2,6 +2,8 @@ Tests for hyperlink.test.common """ +from __future__ import annotations + from typing import TYPE_CHECKING from unittest import TestCase from .common import HyperlinkTestCase diff --git a/tox.ini b/tox.ini index 50d47e7f..afd0119d 100644 --- a/tox.ini +++ b/tox.ini @@ -39,16 +39,18 @@ basepython = py13: python3.13 py14: python3.14 + py3: pypy3 + deps = # For pytest - py{310,311,312,313,314}: pytest==9.0.2 + py{310,311,312,313,314,py3}: pytest==9.0.2 # For code coverage {[testenv:coverage_report]deps} - py{310,311,312,313,314}: pytest-cov==7.0.0 + py{310,311,312,313,314,py3}: pytest-cov==7.0.0 # For hypothesis. - py{310,311,312,313,314}: hypothesis==6.151.9 + py{310,311,312,313,314,py3}: hypothesis==6.151.9 setenv = {[default]setenv}