From 9e6b0f18b53b84622e9d0cbbdd6a1f790d138f4a Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 12:20:29 -0400 Subject: [PATCH 1/3] chore: extend hypothesis block indexing tests for rectilinear grids --- changes/4073.misc.md | 1 + src/zarr/testing/strategies.py | 93 +++++++++++++++++++++++++++++----- tests/test_properties.py | 24 ++++----- 3 files changed, 91 insertions(+), 27 deletions(-) create mode 100644 changes/4073.misc.md diff --git a/changes/4073.misc.md b/changes/4073.misc.md new file mode 100644 index 0000000000..bcc6b8281e --- /dev/null +++ b/changes/4073.misc.md @@ -0,0 +1 @@ +Extend the `test_block_indexing` Hypothesis property test to cover rectilinear chunk grids and sharded regular grids, and generalize the `block_indices` strategy in `zarr.testing.strategies` to build its array-space oracle from cumulative chunk offsets (`chunk_sizes` parameter) instead of a uniform chunk size. diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 7d6556a359..1fee5f2a6e 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -1,3 +1,4 @@ +import itertools import math import sys from collections.abc import Callable, Mapping @@ -593,20 +594,30 @@ def orthogonal_indices( @st.composite def block_indices( - draw: st.DrawFn, *, chunk_grid_shape: tuple[int, ...], chunks: tuple[int, ...] + draw: st.DrawFn, *, chunk_sizes: tuple[tuple[int, ...], ...] ) -> tuple[tuple[int | slice, ...], tuple[slice, ...]]: """ - Strategy for block-selection indexers over a *regular* chunk grid. + Strategy for block-selection indexers over a chunk grid. Block indexing is basic indexing applied to the block grid (the grid of chunks), so each axis is drawn with ``basic_indices`` over that axis's chunk - count from ``chunk_grid_shape`` (e.g. ``Array.cdata_shape``), mirroring how - ``orthogonal_indices`` reuses ``basic_indices`` per axis. Block indexing only - supports integers and step-1 slices whose start references an existing chunk, - so strided slices and slices starting at the grid edge are filtered out. The - array-space translation assumes a regular (uniform) chunk grid; an over-long - stop into a smaller last chunk is left for numpy to clamp when the oracle is - applied. + count, mirroring how ``orthogonal_indices`` reuses ``basic_indices`` per + axis. ``chunk_sizes`` gives the per-chunk data sizes of the array's *outer* + (block) grid for every axis — i.e. ``Array.write_chunk_sizes``, the grid that + ``Array.blocks`` addresses (the shard grid when sharding is used). For + example ``(3, 3, 3, 1)`` for a length-10 axis with a regular chunk size of 3, + or the explicit edges of a rectilinear axis; ``nchunks`` for an axis is + ``len(chunk_sizes[axis])``. + + The array-space translation uses the cumulative sum of those sizes, matching + ``BlockIndexer``'s use of ``dim_grid.chunk_offset``. Because the sizes are + clipped to the array extent, the final offset equals the extent and the + translation is exact for regular (uniform), rectilinear, and sharded grids + alike. + + Block indexing only supports integers and step-1 slices whose start + references an existing chunk, so strided slices and slices starting at the + grid edge are filtered out. Returns ------- @@ -634,7 +645,10 @@ def predicate(value: tuple[Any, ...]) -> bool: block_indexer: list[int | slice] = [] array_indexer: list[slice] = [] - for chunk, nchunks in zip(chunks, chunk_grid_shape, strict=True): + for sizes in chunk_sizes: + nchunks = len(sizes) + # offsets[i] is the array-space start of chunk i; length nchunks + 1. + offsets = list(itertools.accumulate(sizes, initial=0)) (dim_sel,) = draw( basic_indices(min_dims=1, shape=(nchunks,), allow_ellipsis=False) # normalize bare ints / slices to a 1-tuple, skip the empty tuple @@ -645,13 +659,68 @@ def predicate(value: tuple[Any, ...]) -> bool: block_indexer.append(dim_sel) if isinstance(dim_sel, slice): start, stop, _ = dim_sel.indices(nchunks) - array_indexer.append(slice(start * chunk, stop * chunk)) + array_indexer.append(slice(offsets[start], offsets[stop])) else: block = dim_sel % nchunks - array_indexer.append(slice(block * chunk, (block + 1) * chunk)) + array_indexer.append(slice(offsets[block], offsets[block + 1])) return tuple(block_indexer), tuple(array_indexer) +@st.composite +def block_test_arrays( + draw: st.DrawFn, +) -> tuple[Array[Any], np.ndarray[Any, Any], tuple[tuple[int, ...], ...]]: + """Draw an array for block-indexing property tests with its contents and sizes. + + Two arms, selected with equal probability: + + - **regular**: a regular chunk grid, optionally wrapped in sharding. + - **rectilinear**: a variable (rectilinear) chunk grid, always unsharded. + + Returns ``(zarray, nparray, chunk_sizes)`` where ``chunk_sizes`` is + ``Array.write_chunk_sizes`` — the per-axis sizes of the array's *outer* + (block / shard) grid, which is exactly the grid ``Array.blocks`` addresses. + Using it keeps the oracle correct for regular, sharded, and rectilinear + grids alike, without reaching into the private chunk grid. + """ + if draw(st.booleans()): + # regular arm, optionally sharded + nparray, chunks = draw( + np_array_and_chunks( + arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)) + ) + ) + # shard_shapes needs shape // chunk >= 1 on every axis; min_side=1 chunking + # already guarantees this, but guard defensively. + if all(s // c >= 1 for s, c in zip(nparray.shape, chunks, strict=True)): + shards = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks)) + else: + shards = None + event("block regular sharded" if shards is not None else "block regular unsharded") + store = draw(stores) + zarray = zarr.create_array( + store=store, + shape=nparray.shape, + chunks=chunks, + shards=shards, + dtype=nparray.dtype, + ) + zarray[...] = nparray + return zarray, nparray, zarray.write_chunk_sizes + + # rectilinear arm, always unsharded + event("block rectilinear") + shape = draw(_rectilinear_shapes) + chunk_shapes = draw(rectilinear_chunks(shape=shape)) + np_dtype = draw(dtypes()) + nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=np_dtype)) + store = draw(stores) + with zarr.config.set({"array.rectilinear_chunks": True}): + zarray = zarr.create_array(store=store, shape=shape, chunks=chunk_shapes, dtype=np_dtype) + zarray[...] = nparray + return zarray, nparray, zarray.write_chunk_sizes + + def key_ranges( keys: SearchStrategy[str] = node_names, max_size: int = sys.maxsize ) -> SearchStrategy[list[tuple[str, RangeByteRequest]]]: diff --git a/tests/test_properties.py b/tests/test_properties.py index 994510aca0..2f6d8bee89 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -26,8 +26,8 @@ arrays, basic_indices, block_indices, + block_test_arrays, complex_rectilinear_arrays, - np_array_and_chunks, numpy_arrays, orthogonal_indices, rectilinear_arrays, @@ -261,27 +261,21 @@ def test_mask_indexing(data: st.DataObject) -> None: @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(data=st.data()) def test_block_indexing(data: st.DataObject) -> None: - # Block indexing addresses whole chunks on a regular grid; the array-space - # oracle in block_indices() assumes regular, unsharded chunks, so build the - # array directly from a regular chunking rather than drawing one that might - # be rectilinear or sharded. - nparray, chunks = data.draw( - np_array_and_chunks(arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) - ) - store = data.draw(stores) - zarray = zarr.create_array(store=store, shape=nparray.shape, chunks=chunks, dtype=nparray.dtype) - zarray[...] = nparray + # Block indexing addresses whole inner chunks. block_indices() builds its + # array-space oracle from cumulative chunk offsets, so it works for regular + # (uniform), rectilinear, and sharded grids alike; block_test_arrays draws + # across that matrix (rectilinear + sharded is unsupported and not drawn). + zarray, nparray, chunk_sizes = data.draw(block_test_arrays()) - block_indexer, array_indexer = data.draw( - block_indices(chunk_grid_shape=zarray.cdata_shape, chunks=chunks) - ) + block_indexer, array_indexer = data.draw(block_indices(chunk_sizes=chunk_sizes)) expected = nparray[array_indexer] # sync get, via both the .blocks interface and the dedicated method assert_array_equal(expected, zarray.blocks[block_indexer]) assert_array_equal(expected, zarray.get_block_selection(block_indexer)) - # sync set, via both interfaces + # sync set, via both interfaces; sharded set is broken upstream (GH2834) + assume(zarray.shards is None) new_data = data.draw(numpy_arrays(shapes=st.just(expected.shape), dtype=nparray.dtype)) nparray[array_indexer] = new_data zarray.blocks[block_indexer] = new_data From 6d4c2c712468d37f992558afcb37c134dd16016f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 12:28:42 -0400 Subject: [PATCH 2/3] Simplify --- src/zarr/testing/strategies.py | 50 +++++++++++++++------------------- tests/test_properties.py | 4 +-- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 1fee5f2a6e..7aef2abb85 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -669,20 +669,19 @@ def predicate(value: tuple[Any, ...]) -> bool: @st.composite def block_test_arrays( draw: st.DrawFn, -) -> tuple[Array[Any], np.ndarray[Any, Any], tuple[tuple[int, ...], ...]]: - """Draw an array for block-indexing property tests with its contents and sizes. +) -> tuple[Array[Any], np.ndarray[Any, Any]]: + """Draw an array for block-indexing property tests, with its source contents. Two arms, selected with equal probability: - **regular**: a regular chunk grid, optionally wrapped in sharding. - **rectilinear**: a variable (rectilinear) chunk grid, always unsharded. - Returns ``(zarray, nparray, chunk_sizes)`` where ``chunk_sizes`` is - ``Array.write_chunk_sizes`` — the per-axis sizes of the array's *outer* - (block / shard) grid, which is exactly the grid ``Array.blocks`` addresses. - Using it keeps the oracle correct for regular, sharded, and rectilinear - grids alike, without reaching into the private chunk grid. + Returns ``(zarray, nparray)``. The per-axis block sizes the oracle needs are + ``zarray.write_chunk_sizes`` — the array's *outer* (block / shard) grid, which + is exactly the grid ``Array.blocks`` addresses; the caller reads it directly. """ + chunks: tuple[int, ...] | list[list[int]] if draw(st.booleans()): # regular arm, optionally sharded nparray, chunks = draw( @@ -690,14 +689,21 @@ def block_test_arrays( arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)) ) ) - # shard_shapes needs shape // chunk >= 1 on every axis; min_side=1 chunking - # already guarantees this, but guard defensively. - if all(s // c >= 1 for s, c in zip(nparray.shape, chunks, strict=True)): - shards = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks)) - else: - shards = None + # min_side=1 chunking guarantees shape // chunk >= 1 on every axis, which + # shard_shapes requires. + shards = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks)) event("block regular sharded" if shards is not None else "block regular unsharded") - store = draw(stores) + rectilinear = False + else: + # rectilinear arm, always unsharded + event("block rectilinear") + shape = draw(_rectilinear_shapes) + chunks = draw(rectilinear_chunks(shape=shape)) + nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=draw(dtypes()))) + shards, rectilinear = None, True + + store = draw(stores) + with zarr.config.set({"array.rectilinear_chunks": rectilinear}): zarray = zarr.create_array( store=store, shape=nparray.shape, @@ -705,20 +711,8 @@ def block_test_arrays( shards=shards, dtype=nparray.dtype, ) - zarray[...] = nparray - return zarray, nparray, zarray.write_chunk_sizes - - # rectilinear arm, always unsharded - event("block rectilinear") - shape = draw(_rectilinear_shapes) - chunk_shapes = draw(rectilinear_chunks(shape=shape)) - np_dtype = draw(dtypes()) - nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=np_dtype)) - store = draw(stores) - with zarr.config.set({"array.rectilinear_chunks": True}): - zarray = zarr.create_array(store=store, shape=shape, chunks=chunk_shapes, dtype=np_dtype) - zarray[...] = nparray - return zarray, nparray, zarray.write_chunk_sizes + zarray[...] = nparray + return zarray, nparray def key_ranges( diff --git a/tests/test_properties.py b/tests/test_properties.py index 2f6d8bee89..3f71fdf493 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -265,9 +265,9 @@ def test_block_indexing(data: st.DataObject) -> None: # array-space oracle from cumulative chunk offsets, so it works for regular # (uniform), rectilinear, and sharded grids alike; block_test_arrays draws # across that matrix (rectilinear + sharded is unsupported and not drawn). - zarray, nparray, chunk_sizes = data.draw(block_test_arrays()) + zarray, nparray = data.draw(block_test_arrays()) - block_indexer, array_indexer = data.draw(block_indices(chunk_sizes=chunk_sizes)) + block_indexer, array_indexer = data.draw(block_indices(chunk_sizes=zarray.write_chunk_sizes)) expected = nparray[array_indexer] # sync get, via both the .blocks interface and the dedicated method From 362f54aa6390b8f5024272a8a9c681018d2ddbc0 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 13:08:59 -0400 Subject: [PATCH 3/3] retain coverage --- src/zarr/testing/strategies.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 7aef2abb85..82072b5afc 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -649,13 +649,20 @@ def predicate(value: tuple[Any, ...]) -> bool: nchunks = len(sizes) # offsets[i] is the array-space start of chunk i; length nchunks + 1. offsets = list(itertools.accumulate(sizes, initial=0)) - (dim_sel,) = draw( + dim_strategy = ( basic_indices(min_dims=1, shape=(nchunks,), allow_ellipsis=False) # normalize bare ints / slices to a 1-tuple, skip the empty tuple .map(lambda x: (x,) if not isinstance(x, tuple) else x) .filter(bool) .filter(supported(nchunks)) ) + # basic_indices draws slices far more often than bare integers, so the + # integer (single-block) branch below would only be hit on rare draws. + # Union in an explicit integer so it is reliably exercised — keeping + # coverage deterministic under the derandomized ``ci`` Hypothesis profile. + (dim_sel,) = draw( + dim_strategy | st.integers(min_value=0, max_value=nchunks - 1).map(lambda i: (i,)) + ) block_indexer.append(dim_sel) if isinstance(dim_sel, slice): start, stop, _ = dim_sel.indices(nchunks)