From 07b6fb74e2ae32c57228b7b206fe97fe843c3aad Mon Sep 17 00:00:00 2001 From: Cipher Date: Wed, 11 Mar 2026 02:40:21 -0700 Subject: [PATCH 1/3] fix: apply drop_axes squeeze in partial decode path for sharding When reading sharded arrays with mixed integer/list indexing (e.g. arr[0:10, 0, [0, 1]]), the outer OrthogonalIndexer produces chunk selections that have been ix_()-transformed for orthogonal advanced indexing. Integer indices become single-element ranges (size-1 dims) via ix_() to enable NumPy orthogonal indexing. In CodecPipeline.read_batch(), the non-partial path correctly applies drop_axes.squeeze() to remove those size-1 integer dimensions before writing to the output buffer. However, the partial decode path (used by ShardingCodec) was missing this squeeze step. Fixes #3691 Also: Fix line length violation in test error message to comply with 100 character linting limit. --- src/zarr/core/codec_pipeline.py | 6 ++-- tests/test_codecs/test_sharding.py | 56 +++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index fd557ac43e..eed49556d3 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -263,6 +263,8 @@ async def read_batch( chunk_array_batch, batch_info, strict=False ): if chunk_array is not None: + if drop_axes: + chunk_array = chunk_array.squeeze(axis=drop_axes) out[out_selection] = chunk_array else: out[out_selection] = fill_value_or_default(chunk_spec) @@ -285,7 +287,7 @@ async def read_batch( ): if chunk_array is not None: tmp = chunk_array[chunk_selection] - if drop_axes != (): + if drop_axes: tmp = tmp.squeeze(axis=drop_axes) out[out_selection] = tmp else: @@ -324,7 +326,7 @@ def _merge_chunk_array( else: chunk_value = value[out_selection] # handle missing singleton dimensions - if drop_axes != (): + if drop_axes: item = tuple( None # equivalent to np.newaxis if idx in drop_axes diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index d0e2d09b7c..d4370cc519 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -490,7 +490,8 @@ def test_invalid_shard_shape() -> None: with pytest.raises( ValueError, match=re.escape( - "The array's `chunk_shape` (got (16, 16)) needs to be divisible by the shard's inner `chunk_shape` (got (9,))." + "The array's `chunk_shape` (got (16, 16)) needs to be divisible " + "by the shard's inner `chunk_shape` (got (9,))." ), ): zarr.create_array( @@ -501,3 +502,56 @@ def test_invalid_shard_shape() -> None: dtype=np.dtype("uint8"), fill_value=0, ) + + +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +def test_sharding_mixed_integer_list_indexing(store: Store) -> None: + """Regression test for https://github.com/zarr-developers/zarr-python/issues/3691. + + Mixed integer/list indexing on sharded arrays should return the same + shape and data as on equivalent chunked arrays. + """ + import numpy as np + + data = np.arange(200 * 100 * 10, dtype=np.uint8).reshape(200, 100, 10) + + chunked = zarr.create_array( + store, + name="chunked", + shape=(200, 100, 10), + dtype=np.uint8, + chunks=(200, 100, 1), + overwrite=True, + ) + chunked[:, :, :] = data + + sharded = zarr.create_array( + store, + name="sharded", + shape=(200, 100, 10), + dtype=np.uint8, + chunks=(200, 100, 1), + shards=(200, 100, 10), + overwrite=True, + ) + sharded[:, :, :] = data + + # Mixed integer + list indexing + c = chunked[0:10, 0, [0, 1]] + s = sharded[0:10, 0, [0, 1]] + assert c.shape == s.shape == (10, 2), ( + f"Expected (10, 2), got chunked={c.shape}, sharded={s.shape}" + ) + np.testing.assert_array_equal(c, s) + + # Multiple integer axes + c2 = chunked[0, 0, [0, 1, 2]] + s2 = sharded[0, 0, [0, 1, 2]] + assert c2.shape == s2.shape == (3,) + np.testing.assert_array_equal(c2, s2) + + # Slice + integer + slice + c3 = chunked[0:5, 1, 0:3] + s3 = sharded[0:5, 1, 0:3] + assert c3.shape == s3.shape == (5, 3) + np.testing.assert_array_equal(c3, s3) From 4f332c470301ccc4bcb465ffe76c8b302ae0bcbb Mon Sep 17 00:00:00 2001 From: Cipher Date: Wed, 11 Mar 2026 02:42:22 -0700 Subject: [PATCH 2/3] fix(mypy): add type ignore comments for dynamic array indexing in sharding test The test uses complex indexing patterns (mixed integer/list indices) that mypy's zarr.Array stubs don't recognize as valid. Add specific type ignore comments for [index] and [union-attr] errors to suppress false positives. --- tests/test_codecs/test_sharding.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index d4370cc519..e6822c005c 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -537,21 +537,21 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: sharded[:, :, :] = data # Mixed integer + list indexing - c = chunked[0:10, 0, [0, 1]] - s = sharded[0:10, 0, [0, 1]] - assert c.shape == s.shape == (10, 2), ( - f"Expected (10, 2), got chunked={c.shape}, sharded={s.shape}" + c = chunked[0:10, 0, [0, 1]] # type: ignore[index] + s = sharded[0:10, 0, [0, 1]] # type: ignore[index] + assert c.shape == s.shape == (10, 2), ( # type: ignore[index] + f"Expected (10, 2), got chunked={c.shape}, sharded={s.shape}" # type: ignore[union-attr] ) np.testing.assert_array_equal(c, s) # Multiple integer axes - c2 = chunked[0, 0, [0, 1, 2]] - s2 = sharded[0, 0, [0, 1, 2]] - assert c2.shape == s2.shape == (3,) + c2 = chunked[0, 0, [0, 1, 2]] # type: ignore[index] + s2 = sharded[0, 0, [0, 1, 2]] # type: ignore[index] + assert c2.shape == s2.shape == (3,) # type: ignore[union-attr] np.testing.assert_array_equal(c2, s2) # Slice + integer + slice - c3 = chunked[0:5, 1, 0:3] - s3 = sharded[0:5, 1, 0:3] - assert c3.shape == s3.shape == (5, 3) + c3 = chunked[0:5, 1, 0:3] # type: ignore[index] + s3 = sharded[0:5, 1, 0:3] # type: ignore[index] + assert c3.shape == s3.shape == (5, 3) # type: ignore[union-attr] np.testing.assert_array_equal(c3, s3) From 7d786453673bbab6f3785fa4d90fea85065f07ea Mon Sep 17 00:00:00 2001 From: Cipher Date: Wed, 11 Mar 2026 02:50:13 -0700 Subject: [PATCH 3/3] fix(mypy): correct type-ignore codes for union attribute access in sharding test - Line 542: Fix assert accessing .shape by changing from [index] to [union-attr] - Line 544: Add missing type-ignore[union-attr] for f-string .shape access - Lines 554-555: Remove unused type-ignore[index] comments on assignments The mypy errors were caused by indexing operations returning union types that include scalar types (int, float, etc.), which don't have a .shape attribute. The proper fix uses type-ignore[union-attr] for attribute access, not [index]. --- tests/test_codecs/test_sharding.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index e6822c005c..d7cbeb5bdb 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -539,7 +539,7 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: # Mixed integer + list indexing c = chunked[0:10, 0, [0, 1]] # type: ignore[index] s = sharded[0:10, 0, [0, 1]] # type: ignore[index] - assert c.shape == s.shape == (10, 2), ( # type: ignore[index] + assert c.shape == s.shape == (10, 2), ( # type: ignore[union-attr] f"Expected (10, 2), got chunked={c.shape}, sharded={s.shape}" # type: ignore[union-attr] ) np.testing.assert_array_equal(c, s) @@ -551,7 +551,7 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: np.testing.assert_array_equal(c2, s2) # Slice + integer + slice - c3 = chunked[0:5, 1, 0:3] # type: ignore[index] - s3 = sharded[0:5, 1, 0:3] # type: ignore[index] + c3 = chunked[0:5, 1, 0:3] + s3 = sharded[0:5, 1, 0:3] assert c3.shape == s3.shape == (5, 3) # type: ignore[union-attr] np.testing.assert_array_equal(c3, s3)