From 2b57be80483665b2f55e40fe479ef7600bfa2a02 Mon Sep 17 00:00:00 2001 From: Yong-Shin Jiang Date: Sun, 14 Jun 2026 22:51:21 -0700 Subject: [PATCH 1/4] Add unified parse_json runtime type checker (#3285) Introduce zarr.core.json_parse.parse_json, a single type-annotation-driven validator that consolidates the scattered per-field parse_* helpers. Handles primitives, Literal, unions/Optional, fixed and variadic tuples, Sequence/list (coerced to tuple), Mapping/dict, and TypedDict, with a bool-vs-int safe primitive check. Adds tests/test_json_parse.py (94 tests) and a changelog fragment. No existing call sites migrated yet; this is the proof-of-direction module. Co-Authored-By: Claude Opus 4.8 (1M context) --- changes/3285.feature.md | 1 + src/zarr/core/json_parse.py | 271 ++++++++++++++++++++ tests/test_json_parse.py | 476 ++++++++++++++++++++++++++++++++++++ 3 files changed, 748 insertions(+) create mode 100644 changes/3285.feature.md create mode 100644 src/zarr/core/json_parse.py create mode 100644 tests/test_json_parse.py diff --git a/changes/3285.feature.md b/changes/3285.feature.md new file mode 100644 index 0000000000..498f1d38e7 --- /dev/null +++ b/changes/3285.feature.md @@ -0,0 +1 @@ +Add `zarr.core.json_parse.parse_json`, a unified runtime type checker that validates JSON-decoded metadata values against a type annotation and returns data assignable to it (coercing sequences to tuples), or raises a clear error. It handles the JSON-relevant type categories used by array metadata: primitives (`None`, `str`, `int`, `float`, `bool`), `Literal`, unions/`Optional`, fixed and variadic `tuple`, `Sequence`/`list`, `Mapping`/`dict`, and `TypedDict`. This begins de-duplicating the scattered per-field `parse_*` helpers into a single shared validation path (see #3285). diff --git a/src/zarr/core/json_parse.py b/src/zarr/core/json_parse.py new file mode 100644 index 0000000000..9e22121bca --- /dev/null +++ b/src/zarr/core/json_parse.py @@ -0,0 +1,271 @@ +"""Unified runtime type checking for JSON-decoded metadata. + +This module provides a single entry point, :func:`parse_json`, that validates a +JSON-decoded ``value`` against a Python ``type_annotation`` and returns data +assignable to that annotation (coercing where sensible, e.g. a list is coerced +to a ``tuple`` for ``Sequence[T]`` annotations), or raises a useful exception. + +It is intended to consolidate the many hand-written ``parse_*`` helpers spread +across the codebase (see issue #3285). The scope is deliberately limited to the +JSON-shaped types that appear in Zarr metadata: + +* primitives: ``None``, ``str``, ``int``, ``float``, ``bool`` +* ``Literal[...]`` +* unions, including ``Optional[T]`` and ``X | Y`` +* ``tuple[...]`` (fixed-length and variadic) +* ``Sequence[T]`` / ``list[T]`` (coerced to ``tuple``) +* ``Mapping[str, T]`` / ``dict[str, T]`` +* :class:`~typing.TypedDict` + +Anything outside this scope raises a :class:`TypeError`. +""" + +from __future__ import annotations + +import types +from collections.abc import Mapping, Sequence +from typing import Any, Literal, Union, get_args, get_origin + +from typing_extensions import get_type_hints, is_typeddict + +__all__ = ["parse_json"] + +# Primitive types handled by a plain ``isinstance`` check. ``bool`` deliberately +# comes before ``int`` because ``bool`` is a subclass of ``int`` and the two +# must not be confused (see ``_parse_primitive``). +_PRIMITIVES: tuple[type, ...] = (bool, int, float, str) + + +def parse_json(value: object, type_annotation: object) -> Any: + """Validate ``value`` against ``type_annotation`` and return assignable data. + + Parameters + ---------- + value : object + A JSON-decoded value (``str``, ``int``, ``float``, ``bool``, ``None``, + ``list``/``Sequence`` or ``dict``/``Mapping``). + type_annotation : object + The expected type. One of the categories listed in the module + docstring. + + Returns + ------- + object + ``value``, possibly coerced (e.g. a list coerced to a ``tuple`` for a + ``Sequence[T]`` annotation, or a TypedDict-shaped ``dict``). + + Raises + ------ + ValueError + If ``value`` does not satisfy a primitive, literal, or union + annotation. + TypeError + If ``value`` has the wrong container shape, or if + ``type_annotation`` is not within the supported scope. + """ + # 1. None / type(None) + if type_annotation is None or type_annotation is type(None): + if value is None: + return None + raise ValueError(f"Expected None, got {value!r} instead.") + + origin = get_origin(type_annotation) + + # 3. Literal[...] + if origin is Literal: + return _parse_literal(value, type_annotation) + + # 4. Union / Optional / types.UnionType (X | Y) + if origin is Union or origin is types.UnionType: + return _parse_union(value, type_annotation) + + # 8. TypedDict + if is_typeddict(type_annotation): + return _parse_typeddict(value, type_annotation) + + # 5. tuple[...] + if origin is tuple: + return _parse_tuple(value, type_annotation) + + # 7. Mapping[str, T] / dict[str, T] + if origin is not None and isinstance(origin, type) and issubclass(origin, Mapping): + return _parse_mapping(value, type_annotation) + + # 6. Sequence[T] / list[T] (after tuple/Mapping so those take precedence) + if origin is not None and isinstance(origin, type) and issubclass(origin, Sequence): + return _parse_sequence(value, type_annotation) + + # 2. Primitives (str, int, float, bool) + if isinstance(type_annotation, type) and issubclass(type_annotation, _PRIMITIVES): + return _parse_primitive(value, type_annotation) + + # 9. Fallback + raise TypeError( + f"Cannot parse value {value!r} against unsupported type annotation " + f"{type_annotation!r}." + ) + + +def _parse_primitive(value: object, type_annotation: type) -> Any: + """Validate ``value`` against a primitive type via ``isinstance``. + + The critical edge case is that ``bool`` is a subclass of ``int``. When an + ``int`` is expected a ``bool`` must be rejected, and when a ``bool`` is + expected an ``int`` must be rejected. + """ + if type_annotation is bool: + if isinstance(value, bool): + return value + raise ValueError(f"Expected bool, got {value!r} instead.") + + if type_annotation is int: + # Reject bool, which is an ``int`` subclass. + if isinstance(value, int) and not isinstance(value, bool): + return value + raise ValueError(f"Expected int, got {value!r} instead.") + + if type_annotation is float: + # Reject bool, which is an ``int`` (and thus a ``float``-compatible) value. + if isinstance(value, float) and not isinstance(value, bool): + return value + raise ValueError(f"Expected float, got {value!r} instead.") + + # str + if isinstance(value, type_annotation): + return value + raise ValueError(f"Expected {type_annotation.__name__}, got {value!r} instead.") + + +def _parse_literal(value: object, type_annotation: object) -> Any: + """Validate that ``value`` is one of the literal members.""" + choices = get_args(type_annotation) + # ``True == 1`` in Python, so guard against a bool being accepted for an int + # literal (and vice versa) by also comparing types for the matched member. + for choice in choices: + if value == choice and type(value) is type(choice): + return value + raise ValueError(f"Expected one of {choices!r}, got {value!r} instead.") + + +def _parse_union(value: object, type_annotation: object) -> Any: + """Try each union member; return the first that parses, else aggregate.""" + members = get_args(type_annotation) + errors: list[str] = [] + for member in members: + try: + return parse_json(value, member) + except (ValueError, TypeError) as exc: # noqa: PERF203 + errors.append(f" - against {member!r}: {exc}") + joined = "\n".join(errors) + raise ValueError( + f"Expected a value matching one of {members!r}, got {value!r} instead. " + f"Tried each union member:\n{joined}" + ) + + +def _parse_tuple(value: object, type_annotation: object) -> tuple[Any, ...]: + """Validate a fixed-length or variadic ``tuple[...]`` annotation. + + ``tuple[int, str]`` is fixed-length; ``tuple[int, ...]`` is variadic. Each + element is recursively parsed against its corresponding element type. + """ + if not isinstance(value, Sequence) or isinstance(value, str | bytes): + raise TypeError(f"Expected a sequence, got {value!r} instead.") + + args = get_args(type_annotation) + + # Bare ``tuple`` with no parameters: accept any sequence of elements. + if not args: + return tuple(value) + + # Variadic ``tuple[T, ...]``. + if len(args) == 2 and args[1] is Ellipsis: + element_type = args[0] + return tuple(parse_json(item, element_type) for item in value) + + # Special-case the empty tuple ``tuple[()]``. + if args == ((),): + args = () + + # Fixed-length ``tuple[T1, T2, ...]``. + if len(value) != len(args): + raise TypeError( + f"Expected a sequence of length {len(args)}, got {value!r} of " + f"length {len(value)} instead." + ) + return tuple(parse_json(item, element_type) for item, element_type in zip(value, args, strict=True)) + + +def _parse_sequence(value: object, type_annotation: object) -> tuple[Any, ...]: + """Validate ``Sequence[T]`` / ``list[T]``, returning a ``tuple``. + + Each element is recursively parsed against ``T``. A ``str`` is *not* a valid + sequence here, even though it is technically a ``Sequence[str]``, because in + JSON terms a string is a primitive, not an array. + """ + if not isinstance(value, Sequence) or isinstance(value, str | bytes): + raise TypeError(f"Expected a sequence, got {value!r} instead.") + + args = get_args(type_annotation) + if not args: + # Bare ``list`` / ``Sequence`` with no element type: accept any element. + return tuple(value) + element_type = args[0] + return tuple(parse_json(item, element_type) for item in value) + + +def _parse_mapping(value: object, type_annotation: object) -> dict[str, Any]: + """Validate ``Mapping[str, T]`` / ``dict[str, T]``, returning a ``dict``. + + Keys must be ``str`` and each value is recursively parsed against ``T``. + """ + if not isinstance(value, Mapping): + raise TypeError(f"Expected a mapping, got {value!r} instead.") + + args = get_args(type_annotation) + if args: + key_type, value_type = args[0], args[1] + else: + key_type, value_type = str, object + + result: dict[str, Any] = {} + for key, val in value.items(): + if key_type is str and not isinstance(key, str): + raise TypeError(f"Expected mapping key to be str, got {key!r} instead.") + result[key] = parse_json(val, value_type) if value_type is not object else val + return result + + +def _parse_typeddict(value: object, type_annotation: Any) -> dict[str, Any]: + """Validate a :class:`~typing.TypedDict` annotation. + + Each required key must be present and is parsed against its annotation. + Optional keys are parsed when present. Totality is respected via the + TypedDict's ``__required_keys__`` / ``__optional_keys__`` attributes. + """ + if not isinstance(value, Mapping): + raise TypeError(f"Expected a mapping, got {value!r} instead.") + + # ``include_extras=True`` keeps ``Required`` / ``NotRequired`` / ``ReadOnly`` + # wrappers visible if needed; the required/optional split is taken from the + # TypedDict's own bookkeeping which already accounts for totality. + hints = get_type_hints(type_annotation, include_extras=False) + required_keys: frozenset[str] = getattr(type_annotation, "__required_keys__", frozenset()) + optional_keys: frozenset[str] = getattr(type_annotation, "__optional_keys__", frozenset()) + + missing = [key for key in required_keys if key not in value] + if missing: + raise ValueError( + f"Expected required key(s) {sorted(missing)!r} for " + f"{type_annotation.__name__}, got {value!r} instead." + ) + + result: dict[str, Any] = {} + for key in required_keys | optional_keys: + if key in value: + result[key] = parse_json(value[key], hints[key]) + # Preserve any extra keys not declared on the TypedDict. + for key, val in value.items(): + if key not in result: + result[key] = val + return result diff --git a/tests/test_json_parse.py b/tests/test_json_parse.py new file mode 100644 index 0000000000..87f9f587c4 --- /dev/null +++ b/tests/test_json_parse.py @@ -0,0 +1,476 @@ +"""Tests for :func:`zarr.core.json_parse.parse_json`. + +These cover every dispatch category described in section 4.3 of the SDD: +primitives (incl. the bool/int edge case), ``Literal``, unions / ``Optional``, +fixed-length and variadic ``tuple``, ``Sequence`` / ``list`` (with coercion to +``tuple``), ``Mapping`` / ``dict``, ``TypedDict`` (required/optional/nested), +and the fallback ``TypeError`` for unsupported annotations. +""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING, Literal, Optional, Union + +import pytest + +# ``parse_json`` (the module under test) uses ``typing_extensions`` for its +# TypedDict bookkeeping, so we build TypedDicts the same way for consistency. +from typing_extensions import NotRequired, TypedDict + +from zarr.core.json_parse import parse_json + +if TYPE_CHECKING: + from typing import Any + + +# --------------------------------------------------------------------------- +# None / type(None) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("annotation", [None, type(None)]) +def test_none_valid(annotation: Any) -> None: + assert parse_json(None, annotation) is None + + +@pytest.mark.parametrize("annotation", [None, type(None)]) +@pytest.mark.parametrize("value", [0, "", False, [], {}]) +def test_none_invalid(annotation: Any, value: Any) -> None: + with pytest.raises(ValueError, match="Expected None"): + parse_json(value, annotation) + + +# --------------------------------------------------------------------------- +# Primitives: str, int, float, bool +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + ("value", "annotation"), + [ + ("hello", str), + ("", str), + (1, int), + (-5, int), + (1.5, float), + (-0.0, float), + (True, bool), + (False, bool), + ], +) +def test_primitive_valid(value: Any, annotation: type) -> None: + result = parse_json(value, annotation) + assert result == value + # The exact object is returned unchanged for primitives. + assert result is value + + +@pytest.mark.parametrize( + ("value", "annotation"), + [ + (1, str), + (1.0, str), + (True, str), + ("1", int), + (1.0, int), + ("x", float), + (1, float), # an int is NOT a float here + ("true", bool), + (0, bool), + ], +) +def test_primitive_wrong_type_raises(value: Any, annotation: type) -> None: + with pytest.raises(ValueError, match="Expected"): + parse_json(value, annotation) + + +# --- The CRITICAL bool/int edge cases ------------------------------------- + + +def test_bool_not_accepted_as_int() -> None: + """``True`` (an ``int`` subclass instance) must NOT satisfy ``int``.""" + with pytest.raises(ValueError, match="Expected int"): + parse_json(True, int) + with pytest.raises(ValueError, match="Expected int"): + parse_json(False, int) + + +def test_int_not_accepted_as_bool() -> None: + """A plain ``int`` must NOT satisfy ``bool``.""" + with pytest.raises(ValueError, match="Expected bool"): + parse_json(1, bool) + with pytest.raises(ValueError, match="Expected bool"): + parse_json(0, bool) + + +def test_bool_accepted_as_bool() -> None: + assert parse_json(True, bool) is True + assert parse_json(False, bool) is False + + +def test_int_accepted_as_int() -> None: + assert parse_json(1, int) == 1 + assert parse_json(0, int) == 0 + + +def test_bool_not_accepted_as_float() -> None: + """``bool`` is an ``int`` subclass and must not satisfy ``float`` either.""" + with pytest.raises(ValueError, match="Expected float"): + parse_json(True, float) + + +# --------------------------------------------------------------------------- +# Literal[...] +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("value", ["C", "F"]) +def test_literal_str_member_valid(value: str) -> None: + assert parse_json(value, Literal["C", "F"]) == value + + +@pytest.mark.parametrize("value", ["Q", "c", "", 0]) +def test_literal_str_non_member_raises(value: Any) -> None: + with pytest.raises(ValueError, match="Expected one of"): + parse_json(value, Literal["C", "F"]) + + +@pytest.mark.parametrize("value", [1, 2]) +def test_literal_int_member_valid(value: int) -> None: + assert parse_json(value, Literal[1, 2]) == value + + +def test_literal_int_non_member_raises() -> None: + with pytest.raises(ValueError, match="Expected one of"): + parse_json(3, Literal[1, 2]) + + +def test_literal_true_does_not_match_int_literal() -> None: + """``True == 1`` in Python, but ``True`` must NOT satisfy ``Literal[1, 2]``.""" + with pytest.raises(ValueError, match="Expected one of"): + parse_json(True, Literal[1, 2]) + + +def test_literal_one_does_not_match_bool_literal() -> None: + """Conversely, ``1`` must not satisfy a bool literal.""" + with pytest.raises(ValueError, match="Expected one of"): + parse_json(1, Literal[True, False]) + + +def test_literal_mixed_members() -> None: + annotation = Literal["a", 3, None] + assert parse_json("a", annotation) == "a" + assert parse_json(3, annotation) == 3 + # ``None`` is a valid literal member. + assert parse_json(None, annotation) is None + + +# --------------------------------------------------------------------------- +# Union / Optional +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("value", ["hello", 5]) +def test_union_each_member_accepted(value: Any) -> None: + assert parse_json(value, Union[str, int]) == value + + +def test_union_pep604_syntax() -> None: + """The ``X | Y`` syntax (``types.UnionType``) is handled too.""" + assert parse_json("hello", str | int) == "hello" + assert parse_json(5, str | int) == 5 + + +def test_union_no_member_matches_raises() -> None: + with pytest.raises(ValueError, match="Expected a value matching one of"): + parse_json(1.5, Union[str, int]) + + +def test_union_error_lists_each_member() -> None: + with pytest.raises(ValueError, match="Tried each union member"): + parse_json([], Union[str, int]) + + +@pytest.mark.parametrize("value", [None, "x"]) +def test_optional_accepts_none_and_value(value: Any) -> None: + assert parse_json(value, Optional[str]) == value + + +def test_optional_rejects_other_types() -> None: + with pytest.raises(ValueError, match="Expected a value matching one of"): + parse_json(5, Optional[str]) + + +def test_union_with_coercion_returns_coerced_value() -> None: + """A union member that coerces (Sequence -> tuple) returns the coerced form.""" + result = parse_json([1, 2, 3], Union[str, Sequence[int]]) + assert result == (1, 2, 3) + assert isinstance(result, tuple) + + +# --------------------------------------------------------------------------- +# tuple[...] +# --------------------------------------------------------------------------- + + +def test_tuple_fixed_length_valid() -> None: + result = parse_json([1, "a"], tuple[int, str]) + assert result == (1, "a") + assert isinstance(result, tuple) + + +def test_tuple_fixed_length_wrong_length_raises() -> None: + with pytest.raises(TypeError, match="Expected a sequence of length 2"): + parse_json([1], tuple[int, str]) + with pytest.raises(TypeError, match="Expected a sequence of length 2"): + parse_json([1, "a", "extra"], tuple[int, str]) + + +def test_tuple_fixed_length_wrong_element_type_raises() -> None: + # Second element should be str but is an int -> inner primitive ValueError. + with pytest.raises(ValueError, match="Expected str"): + parse_json([1, 2], tuple[int, str]) + + +def test_tuple_variadic_valid() -> None: + result = parse_json([1, 2, 3], tuple[int, ...]) + assert result == (1, 2, 3) + assert isinstance(result, tuple) + + +def test_tuple_variadic_empty() -> None: + assert parse_json([], tuple[int, ...]) == () + + +def test_tuple_variadic_wrong_element_raises() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json([1, "x", 3], tuple[int, ...]) + + +def test_tuple_accepts_tuple_input() -> None: + assert parse_json((1, "a"), tuple[int, str]) == (1, "a") + + +def test_tuple_rejects_non_sequence() -> None: + with pytest.raises(TypeError, match="Expected a sequence"): + parse_json(5, tuple[int, ...]) + + +def test_tuple_rejects_str_input() -> None: + """A ``str`` is not treated as a sequence for tuple parsing.""" + with pytest.raises(TypeError, match="Expected a sequence"): + parse_json("ab", tuple[str, str]) + + +def test_tuple_empty_annotation() -> None: + """``tuple[()]`` matches the empty sequence.""" + assert parse_json([], tuple[()]) == () + + +# --------------------------------------------------------------------------- +# Sequence[T] / list[T] (coerced to tuple) +# --------------------------------------------------------------------------- + + +def test_sequence_coerces_to_tuple() -> None: + result = parse_json([1, 2, 3], Sequence[int]) + assert result == (1, 2, 3) + assert isinstance(result, tuple) + + +def test_list_coerces_to_tuple() -> None: + result = parse_json([1, 2, 3], list[int]) + assert result == (1, 2, 3) + assert isinstance(result, tuple) + + +def test_sequence_validates_elements() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json([1, "two", 3], Sequence[int]) + + +def test_sequence_rejects_str() -> None: + """A bare string is a primitive in JSON terms, not a sequence.""" + with pytest.raises(TypeError, match="Expected a sequence"): + parse_json("abc", Sequence[str]) + + +def test_sequence_rejects_non_sequence() -> None: + with pytest.raises(TypeError, match="Expected a sequence"): + parse_json(42, Sequence[int]) + + +def test_sequence_of_sequence_nested() -> None: + result = parse_json([[1, 2], [3]], Sequence[Sequence[int]]) + assert result == ((1, 2), (3,)) + assert isinstance(result, tuple) + assert all(isinstance(inner, tuple) for inner in result) + + +# --------------------------------------------------------------------------- +# Mapping[str, T] / dict[str, T] +# --------------------------------------------------------------------------- + + +def test_mapping_valid_returns_dict() -> None: + result = parse_json({"a": 1, "b": 2}, Mapping[str, int]) + assert result == {"a": 1, "b": 2} + assert isinstance(result, dict) + + +def test_dict_valid_returns_dict() -> None: + result = parse_json({"a": 1}, dict[str, int]) + assert result == {"a": 1} + assert isinstance(result, dict) + + +def test_mapping_wrong_value_type_raises() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json({"a": "not an int"}, Mapping[str, int]) + + +def test_mapping_non_str_key_raises() -> None: + with pytest.raises(TypeError, match="Expected mapping key to be str"): + parse_json({1: 1}, Mapping[str, int]) + + +def test_mapping_non_mapping_raises() -> None: + with pytest.raises(TypeError, match="Expected a mapping"): + parse_json([("a", 1)], Mapping[str, int]) + + +def test_mapping_nested_values() -> None: + result = parse_json({"a": [1, 2], "b": [3]}, Mapping[str, Sequence[int]]) + assert result == {"a": (1, 2), "b": (3,)} + assert isinstance(result, dict) + assert all(isinstance(v, tuple) for v in result.values()) + + +# --------------------------------------------------------------------------- +# TypedDict +# --------------------------------------------------------------------------- + + +class Point(TypedDict): + x: int + y: int + + +# NOTE: This module uses ``from __future__ import annotations``, which stringizes +# annotations. With the *class* TypedDict syntax, ``typing_extensions`` cannot see +# a ``NotRequired[...]`` wrapper at class-creation time (the annotation is a string), +# so the key is wrongly recorded as required. This is a known limitation of +# stringized annotations + ``NotRequired`` -- NOT a bug in ``json_parse``. The +# functional ``TypedDict(...)`` form keeps ``NotRequired`` as a real runtime object, +# so ``__optional_keys__`` is populated correctly; we use it here to genuinely +# exercise NotRequired. (``total=False``, used by ``PartialConfig`` below, is +# class-level and works correctly even with stringized annotations.) +Config = TypedDict("Config", {"name": str, "count": NotRequired[int]}) + + +class PartialConfig(TypedDict, total=False): + a: int + b: str + + +class Outer(TypedDict): + label: str + point: Point + + +def test_typeddict_valid() -> None: + result = parse_json({"x": 1, "y": 2}, Point) + assert result == {"x": 1, "y": 2} + assert isinstance(result, dict) + + +def test_typeddict_missing_required_key_raises() -> None: + with pytest.raises(ValueError, match="Expected required key"): + parse_json({"x": 1}, Point) + + +def test_typeddict_wrong_value_type_raises() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json({"x": 1, "y": "two"}, Point) + + +def test_typeddict_non_mapping_raises() -> None: + with pytest.raises(TypeError, match="Expected a mapping"): + parse_json([1, 2], Point) + + +def test_typeddict_notrequired_present() -> None: + result = parse_json({"name": "a", "count": 3}, Config) + assert result == {"name": "a", "count": 3} + + +def test_typeddict_notrequired_absent() -> None: + result = parse_json({"name": "a"}, Config) + assert result == {"name": "a"} + + +def test_typeddict_notrequired_wrong_type_raises() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json({"name": "a", "count": "x"}, Config) + + +def test_typeddict_total_false_all_optional() -> None: + # All keys optional: empty dict is valid. + assert parse_json({}, PartialConfig) == {} + assert parse_json({"a": 1}, PartialConfig) == {"a": 1} + assert parse_json({"a": 1, "b": "y"}, PartialConfig) == {"a": 1, "b": "y"} + + +def test_typeddict_total_false_validates_present_keys() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json({"a": "not int"}, PartialConfig) + + +def test_typeddict_nested_valid() -> None: + result = parse_json({"label": "L", "point": {"x": 1, "y": 2}}, Outer) + assert result == {"label": "L", "point": {"x": 1, "y": 2}} + + +def test_typeddict_nested_invalid_recurses() -> None: + with pytest.raises(ValueError, match="Expected int"): + parse_json({"label": "L", "point": {"x": 1, "y": "bad"}}, Outer) + + +def test_typeddict_nested_missing_required_recurses() -> None: + with pytest.raises(ValueError, match="Expected required key"): + parse_json({"label": "L", "point": {"x": 1}}, Outer) + + +def test_typeddict_preserves_extra_keys() -> None: + """Extra keys not declared on the TypedDict are preserved unchanged.""" + result = parse_json({"x": 1, "y": 2, "extra": "kept"}, Point) + assert result == {"x": 1, "y": 2, "extra": "kept"} + + +# --------------------------------------------------------------------------- +# Fallback / error quality +# --------------------------------------------------------------------------- + + +def test_unsupported_annotation_raises_typeerror() -> None: + with pytest.raises(TypeError, match="unsupported type annotation"): + parse_json(1, complex) + + +def test_unsupported_annotation_names_value_and_annotation() -> None: + with pytest.raises(TypeError) as excinfo: + parse_json(object(), set[int]) + msg = str(excinfo.value) + assert "set" in msg + + +def test_error_message_names_offending_value() -> None: + """Primitive errors name the offending value via ``repr``.""" + with pytest.raises(ValueError, match=r"Expected int, got 'nope' instead"): + parse_json("nope", int) + + +def test_error_message_names_expected_literal_choices() -> None: + with pytest.raises(ValueError, match=r"Expected one of \('C', 'F'\)"): + parse_json("Q", Literal["C", "F"]) From 0eba162e1e5c8377b10bac10509f8f464aaffc18 Mon Sep 17 00:00:00 2001 From: Yong-Shin Jiang Date: Sun, 14 Jun 2026 22:54:10 -0700 Subject: [PATCH 2/4] Migrate parse_order, parse_bool, parse_zarr_format to parse_json (#3285) Pilot migration delegating three representative helpers to the unified parse_json validator. Public signatures and return types are unchanged. parse_zarr_format re-wraps parse_json's ValueError/TypeError as MetadataValidationError with the original message to preserve observable behavior. parse_json is imported function-locally to avoid circular imports. Focused suites green: test_common/test_config/test_metadata/test_json_parse = 430 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/common.py | 12 ++++++------ src/zarr/core/metadata/v3.py | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index eafffa1818..b1b4403f1c 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -206,15 +206,15 @@ def parse_fill_value(data: Any) -> Any: def parse_order(data: Any) -> Literal["C", "F"]: - if data in ("C", "F"): - return cast("Literal['C', 'F']", data) - raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") + from zarr.core.json_parse import parse_json + + return cast("Literal['C', 'F']", parse_json(data, Literal["C", "F"])) def parse_bool(data: Any) -> bool: - if isinstance(data, bool): - return data - raise ValueError(f"Expected bool, got {data} instead.") + from zarr.core.json_parse import parse_json + + return cast("bool", parse_json(data, bool)) def _warn_write_empty_chunks_kwarg() -> None: diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 9eaccc5076..b27ff05160 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -47,10 +47,13 @@ def parse_zarr_format(data: object) -> Literal[3]: - if data == 3: - return 3 - msg = f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'." - raise MetadataValidationError(msg) + from zarr.core.json_parse import parse_json + + try: + return cast("Literal[3]", parse_json(data, Literal[3])) + except (ValueError, TypeError) as exc: + msg = f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'." + raise MetadataValidationError(msg) from exc def parse_node_type_array(data: object) -> Literal["array"]: From df1a0cc14818f13d5c075e3bb035a552911d65a6 Mon Sep 17 00:00:00 2001 From: Yong-Shin Jiang Date: Sun, 14 Jun 2026 23:23:50 -0700 Subject: [PATCH 3/4] Fix lint and make TypedDict NotRequired robust under future annotations (#3285) Apply ruff check/format to satisfy the Lint CI hook. Keep typing.Union/Optional spellings in tests (with noqa) to cover that origin path alongside X | Y. Rework _parse_typeddict to derive required/optional from get_type_hints(include_extras=True) + __total__ instead of __required_keys__, so class-syntax NotRequired is detected even when 'from __future__ import annotations' stringizes the hints (as in zarr's metadata modules). test_json_parse + test_common + test_metadata = 393 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/json_parse.py | 50 +++++++++++++++++++++++++------------ tests/test_json_parse.py | 39 ++++++++++++++++------------- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/zarr/core/json_parse.py b/src/zarr/core/json_parse.py index 9e22121bca..8608e69d11 100644 --- a/src/zarr/core/json_parse.py +++ b/src/zarr/core/json_parse.py @@ -24,7 +24,7 @@ import types from collections.abc import Mapping, Sequence -from typing import Any, Literal, Union, get_args, get_origin +from typing import Any, Literal, NotRequired, Required, Union, get_args, get_origin from typing_extensions import get_type_hints, is_typeddict @@ -101,8 +101,7 @@ def parse_json(value: object, type_annotation: object) -> Any: # 9. Fallback raise TypeError( - f"Cannot parse value {value!r} against unsupported type annotation " - f"{type_annotation!r}." + f"Cannot parse value {value!r} against unsupported type annotation {type_annotation!r}." ) @@ -154,7 +153,7 @@ def _parse_union(value: object, type_annotation: object) -> Any: for member in members: try: return parse_json(value, member) - except (ValueError, TypeError) as exc: # noqa: PERF203 + except (ValueError, TypeError) as exc: errors.append(f" - against {member!r}: {exc}") joined = "\n".join(errors) raise ValueError( @@ -193,7 +192,9 @@ def _parse_tuple(value: object, type_annotation: object) -> tuple[Any, ...]: f"Expected a sequence of length {len(args)}, got {value!r} of " f"length {len(value)} instead." ) - return tuple(parse_json(item, element_type) for item, element_type in zip(value, args, strict=True)) + return tuple( + parse_json(item, element_type) for item, element_type in zip(value, args, strict=True) + ) def _parse_sequence(value: object, type_annotation: object) -> tuple[Any, ...]: @@ -239,19 +240,36 @@ def _parse_mapping(value: object, type_annotation: object) -> dict[str, Any]: def _parse_typeddict(value: object, type_annotation: Any) -> dict[str, Any]: """Validate a :class:`~typing.TypedDict` annotation. - Each required key must be present and is parsed against its annotation. - Optional keys are parsed when present. Totality is respected via the - TypedDict's ``__required_keys__`` / ``__optional_keys__`` attributes. + Each required key must be present and is parsed against its annotation; + optional keys are parsed when present. Whether a key is required is derived + from the *resolved* annotations -- their ``Required`` / ``NotRequired`` + wrappers combined with the TypedDict's totality (``__total__``). + + This deliberately does not use ``__required_keys__`` / ``__optional_keys__``: + under ``from __future__ import annotations`` those are computed from + stringized annotations at class-creation time, so a ``NotRequired[...]`` + wrapper is invisible to them. Resolving the hints with + ``include_extras=True`` evaluates the strings and makes the wrappers visible. """ if not isinstance(value, Mapping): raise TypeError(f"Expected a mapping, got {value!r} instead.") - # ``include_extras=True`` keeps ``Required`` / ``NotRequired`` / ``ReadOnly`` - # wrappers visible if needed; the required/optional split is taken from the - # TypedDict's own bookkeeping which already accounts for totality. - hints = get_type_hints(type_annotation, include_extras=False) - required_keys: frozenset[str] = getattr(type_annotation, "__required_keys__", frozenset()) - optional_keys: frozenset[str] = getattr(type_annotation, "__optional_keys__", frozenset()) + hints = get_type_hints(type_annotation, include_extras=True) + total = getattr(type_annotation, "__total__", True) + + required_keys: set[str] = set() + field_types: dict[str, Any] = {} + for key, hint in hints.items(): + origin = get_origin(hint) + if origin is Required: + required_keys.add(key) + field_types[key] = get_args(hint)[0] + elif origin is NotRequired: + field_types[key] = get_args(hint)[0] + else: + if total: + required_keys.add(key) + field_types[key] = hint missing = [key for key in required_keys if key not in value] if missing: @@ -261,9 +279,9 @@ def _parse_typeddict(value: object, type_annotation: Any) -> dict[str, Any]: ) result: dict[str, Any] = {} - for key in required_keys | optional_keys: + for key, field_type in field_types.items(): if key in value: - result[key] = parse_json(value[key], hints[key]) + result[key] = parse_json(value[key], field_type) # Preserve any extra keys not declared on the TypedDict. for key, val in value.items(): if key not in result: diff --git a/tests/test_json_parse.py b/tests/test_json_parse.py index 87f9f587c4..cf87dad914 100644 --- a/tests/test_json_parse.py +++ b/tests/test_json_parse.py @@ -10,13 +10,13 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import TYPE_CHECKING, Literal, Optional, Union +from typing import TYPE_CHECKING, Literal, NotRequired, Optional, Union import pytest # ``parse_json`` (the module under test) uses ``typing_extensions`` for its # TypedDict bookkeeping, so we build TypedDicts the same way for consistency. -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict from zarr.core.json_parse import parse_json @@ -159,7 +159,7 @@ def test_literal_one_does_not_match_bool_literal() -> None: def test_literal_mixed_members() -> None: - annotation = Literal["a", 3, None] + annotation = Literal["a", 3] | None assert parse_json("a", annotation) == "a" assert parse_json(3, annotation) == 3 # ``None`` is a valid literal member. @@ -169,11 +169,15 @@ def test_literal_mixed_members() -> None: # --------------------------------------------------------------------------- # Union / Optional # --------------------------------------------------------------------------- +# These tests deliberately use the ``typing.Union`` / ``typing.Optional`` +# spelling (which yields a ``typing.Union`` origin) to cover that code path; +# the ``X | Y`` spelling (``types.UnionType``) is covered separately below. +# The legacy-spelling lines therefore suppress ruff's UP007/UP045 rules. @pytest.mark.parametrize("value", ["hello", 5]) def test_union_each_member_accepted(value: Any) -> None: - assert parse_json(value, Union[str, int]) == value + assert parse_json(value, Union[str, int]) == value # noqa: UP007 def test_union_pep604_syntax() -> None: @@ -184,27 +188,27 @@ def test_union_pep604_syntax() -> None: def test_union_no_member_matches_raises() -> None: with pytest.raises(ValueError, match="Expected a value matching one of"): - parse_json(1.5, Union[str, int]) + parse_json(1.5, Union[str, int]) # noqa: UP007 def test_union_error_lists_each_member() -> None: with pytest.raises(ValueError, match="Tried each union member"): - parse_json([], Union[str, int]) + parse_json([], Union[str, int]) # noqa: UP007 @pytest.mark.parametrize("value", [None, "x"]) def test_optional_accepts_none_and_value(value: Any) -> None: - assert parse_json(value, Optional[str]) == value + assert parse_json(value, Optional[str]) == value # noqa: UP045 def test_optional_rejects_other_types() -> None: with pytest.raises(ValueError, match="Expected a value matching one of"): - parse_json(5, Optional[str]) + parse_json(5, Optional[str]) # noqa: UP045 def test_union_with_coercion_returns_coerced_value() -> None: """A union member that coerces (Sequence -> tuple) returns the coerced form.""" - result = parse_json([1, 2, 3], Union[str, Sequence[int]]) + result = parse_json([1, 2, 3], Union[str, Sequence[int]]) # noqa: UP007 assert result == (1, 2, 3) assert isinstance(result, tuple) @@ -358,15 +362,14 @@ class Point(TypedDict): # NOTE: This module uses ``from __future__ import annotations``, which stringizes -# annotations. With the *class* TypedDict syntax, ``typing_extensions`` cannot see -# a ``NotRequired[...]`` wrapper at class-creation time (the annotation is a string), -# so the key is wrongly recorded as required. This is a known limitation of -# stringized annotations + ``NotRequired`` -- NOT a bug in ``json_parse``. The -# functional ``TypedDict(...)`` form keeps ``NotRequired`` as a real runtime object, -# so ``__optional_keys__`` is populated correctly; we use it here to genuinely -# exercise NotRequired. (``total=False``, used by ``PartialConfig`` below, is -# class-level and works correctly even with stringized annotations.) -Config = TypedDict("Config", {"name": str, "count": NotRequired[int]}) +# annotations -- so a class-syntax ``NotRequired[...]`` is invisible to the +# TypedDict's ``__optional_keys__``. ``_parse_typeddict`` handles this by +# resolving the hints with ``get_type_hints(..., include_extras=True)`` and +# reading the ``Required`` / ``NotRequired`` wrappers directly, so the class +# form below genuinely exercises NotRequired under stringized annotations. +class Config(TypedDict): + name: str + count: NotRequired[int] class PartialConfig(TypedDict, total=False): From 78874b3b6057476f64ac808a5d06f064f53c3285 Mon Sep 17 00:00:00 2001 From: Yong-Shin Jiang Date: Sun, 14 Jun 2026 23:32:49 -0700 Subject: [PATCH 4/4] Annotate origin as Any to satisfy mypy in _parse_typeddict (#3285) get_origin(hint) is Required/NotRequired tripped mypy's comparison-overlap and unreachable checks; typing origin as Any keeps the runtime check while satisfying mypy. Full 'uv run --frozen mypy' is clean (190 files); ruff check/format clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/json_parse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/json_parse.py b/src/zarr/core/json_parse.py index 8608e69d11..0ce63173c4 100644 --- a/src/zarr/core/json_parse.py +++ b/src/zarr/core/json_parse.py @@ -260,7 +260,9 @@ def _parse_typeddict(value: object, type_annotation: Any) -> dict[str, Any]: required_keys: set[str] = set() field_types: dict[str, Any] = {} for key, hint in hints.items(): - origin = get_origin(hint) + # Annotated as ``Any`` so mypy does not narrow the ``is`` comparisons + # against the ``Required`` / ``NotRequired`` special forms. + origin: Any = get_origin(hint) if origin is Required: required_keys.add(key) field_types[key] = get_args(hint)[0]