zarr-developers · maxrjones · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 18, 2026
diff --git a/changes/4073.misc.md b/changes/4073.misc.md
@@ -0,0 +1 @@
+Extend the `test_block_indexing` Hypothesis property test to cover rectilinear chunk grids and sharded regular grids, and generalize the `block_indices` strategy in `zarr.testing.strategies` to build its array-space oracle from cumulative chunk offsets (`chunk_sizes` parameter) instead of a uniform chunk size.
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
@@ -1,3 +1,4 @@
+import itertools
 import math
 import sys
 from collections.abc import Callable, Mapping
@@ -599,20 +600,30 @@ def orthogonal_indices(
 
 @st.composite
 def block_indices(
-    draw: st.DrawFn, *, chunk_grid_shape: tuple[int, ...], chunks: tuple[int, ...]
+    draw: st.DrawFn, *, chunk_sizes: tuple[tuple[int, ...], ...]
 ) -> tuple[tuple[int | slice, ...], tuple[slice, ...]]:
     """
-    Strategy for block-selection indexers over a *regular* chunk grid.
+    Strategy for block-selection indexers over a chunk grid.
 
     Block indexing is basic indexing applied to the block grid (the grid of
     chunks), so each axis is drawn with ``basic_indices`` over that axis's chunk
-    count from ``chunk_grid_shape`` (e.g. ``Array.cdata_shape``), mirroring how
-    ``orthogonal_indices`` reuses ``basic_indices`` per axis. Block indexing only
-    supports integers and step-1 slices whose start references an existing chunk,
-    so strided slices and slices starting at the grid edge are filtered out. The
-    array-space translation assumes a regular (uniform) chunk grid; an over-long
-    stop into a smaller last chunk is left for numpy to clamp when the oracle is
-    applied.
+    count, mirroring how ``orthogonal_indices`` reuses ``basic_indices`` per
+    axis. ``chunk_sizes`` gives the per-chunk data sizes of the array's *outer*
+    (block) grid for every axis — i.e. ``Array.write_chunk_sizes``, the grid that
+    ``Array.blocks`` addresses (the shard grid when sharding is used). For
+    example ``(3, 3, 3, 1)`` for a length-10 axis with a regular chunk size of 3,
+    or the explicit edges of a rectilinear axis; ``nchunks`` for an axis is
+    ``len(chunk_sizes[axis])``.
+
+    The array-space translation uses the cumulative sum of those sizes, matching
+    ``BlockIndexer``'s use of ``dim_grid.chunk_offset``. Because the sizes are
+    clipped to the array extent, the final offset equals the extent and the
+    translation is exact for regular (uniform), rectilinear, and sharded grids
+    alike.
+
+    Block indexing only supports integers and step-1 slices whose start
+    references an existing chunk, so strided slices and slices starting at the
+    grid edge are filtered out.
 
     Returns
     -------
@@ -640,24 +651,83 @@ def predicate(value: tuple[Any, ...]) -> bool:
 
     block_indexer: list[int | slice] = []
     array_indexer: list[slice] = []
-    for chunk, nchunks in zip(chunks, chunk_grid_shape, strict=True):
-        (dim_sel,) = draw(
+    for sizes in chunk_sizes:
+        nchunks = len(sizes)
+        # offsets[i] is the array-space start of chunk i; length nchunks + 1.
+        offsets = list(itertools.accumulate(sizes, initial=0))
+        dim_strategy = (
             basic_indices(min_dims=1, shape=(nchunks,), allow_ellipsis=False)
             # normalize bare ints / slices to a 1-tuple, skip the empty tuple
             .map(lambda x: (x,) if not isinstance(x, tuple) else x)
             .filter(bool)
             .filter(supported(nchunks))
         )
+        # basic_indices draws slices far more often than bare integers, so the
+        # integer (single-block) branch below would only be hit on rare draws.
+        # Union in an explicit integer so it is reliably exercised — keeping
+        # coverage deterministic under the derandomized ``ci`` Hypothesis profile.
+        (dim_sel,) = draw(
+            dim_strategy | st.integers(min_value=0, max_value=nchunks - 1).map(lambda i: (i,))
+        )
         block_indexer.append(dim_sel)
         if isinstance(dim_sel, slice):
             start, stop, _ = dim_sel.indices(nchunks)
-            array_indexer.append(slice(start * chunk, stop * chunk))
+            array_indexer.append(slice(offsets[start], offsets[stop]))
         else:
             block = dim_sel % nchunks
-            array_indexer.append(slice(block * chunk, (block + 1) * chunk))
+            array_indexer.append(slice(offsets[block], offsets[block + 1]))
     return tuple(block_indexer), tuple(array_indexer)
 
 
+@st.composite
+def block_test_arrays(
+    draw: st.DrawFn,
+) -> tuple[Array[Any], np.ndarray[Any, Any]]:
+    """Draw an array for block-indexing property tests, with its source contents.
+
+    Two arms, selected with equal probability:
+
+    - **regular**: a regular chunk grid, optionally wrapped in sharding.
+    - **rectilinear**: a variable (rectilinear) chunk grid, always unsharded.
+
+    Returns ``(zarray, nparray)``. The per-axis block sizes the oracle needs are
+    ``zarray.write_chunk_sizes`` — the array's *outer* (block / shard) grid, which
+    is exactly the grid ``Array.blocks`` addresses; the caller reads it directly.
+    """
+    chunks: tuple[int, ...] | list[list[int]]
+    if draw(st.booleans()):
+        # regular arm, optionally sharded
+        nparray, chunks = draw(
+            np_array_and_chunks(
+                arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))
+            )
+        )
+        # min_side=1 chunking guarantees shape // chunk >= 1 on every axis, which
+        # shard_shapes requires.
+        shards = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks))
+        event("block regular sharded" if shards is not None else "block regular unsharded")
+        rectilinear = False
+    else:
+        # rectilinear arm, always unsharded
+        event("block rectilinear")
+        shape = draw(_rectilinear_shapes)
+        chunks = draw(rectilinear_chunks(shape=shape))
+        nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=draw(dtypes())))
+        shards, rectilinear = None, True
+
+    store = draw(stores)
+    with zarr.config.set({"array.rectilinear_chunks": rectilinear}):
+        zarray = zarr.create_array(
+            store=store,
+            shape=nparray.shape,
+            chunks=chunks,
+            shards=shards,
+            dtype=nparray.dtype,
+        )
+    zarray[...] = nparray
+    return zarray, nparray
+
+
 def key_ranges(
     keys: SearchStrategy[str] = node_names, max_size: int = sys.maxsize
 ) -> SearchStrategy[list[tuple[str, ByteRequest | None]]]:

diff --git a/tests/test_properties.py b/tests/test_properties.py
@@ -26,8 +26,8 @@
     arrays,
     basic_indices,
     block_indices,
+    block_test_arrays,
     complex_rectilinear_arrays,
-    np_array_and_chunks,
     numpy_arrays,
     orthogonal_indices,
     rectilinear_arrays,
@@ -261,27 +261,21 @@ def test_mask_indexing(data: st.DataObject) -> None:
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @given(data=st.data())
 def test_block_indexing(data: st.DataObject) -> None:
-    # Block indexing addresses whole chunks on a regular grid; the array-space
-    # oracle in block_indices() assumes regular, unsharded chunks, so build the
-    # array directly from a regular chunking rather than drawing one that might
-    # be rectilinear or sharded.
-    nparray, chunks = data.draw(
-        np_array_and_chunks(arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
-    )
-    store = data.draw(stores)
-    zarray = zarr.create_array(store=store, shape=nparray.shape, chunks=chunks, dtype=nparray.dtype)
-    zarray[...] = nparray
+    # Block indexing addresses whole inner chunks. block_indices() builds its
+    # array-space oracle from cumulative chunk offsets, so it works for regular
+    # (uniform), rectilinear, and sharded grids alike; block_test_arrays draws
+    # across that matrix (rectilinear + sharded is unsupported and not drawn).
+    zarray, nparray = data.draw(block_test_arrays())
 
-    block_indexer, array_indexer = data.draw(
-        block_indices(chunk_grid_shape=zarray.cdata_shape, chunks=chunks)
-    )
+    block_indexer, array_indexer = data.draw(block_indices(chunk_sizes=zarray.write_chunk_sizes))
     expected = nparray[array_indexer]
 
     # sync get, via both the .blocks interface and the dedicated method
     assert_array_equal(expected, zarray.blocks[block_indexer])
     assert_array_equal(expected, zarray.get_block_selection(block_indexer))
 
-    # sync set, via both interfaces
+    # sync set, via both interfaces; sharded set is broken upstream (GH2834)
+    assume(zarray.shards is None)
     new_data = data.draw(numpy_arrays(shapes=st.just(expected.shape), dtype=nparray.dtype))
     nparray[array_indexer] = new_data
     zarray.blocks[block_indexer] = new_data
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Extend the `test_block_indexing` Hypothesis property test to cover rectilinear chunk grids and sharded regular grids, and generalize the `block_indices` strategy in `zarr.testing.strategies` to build its array-space oracle from cumulative chunk offsets (`chunk_sizes` parameter) instead of a uniform chunk size.