Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/zarr/storage/_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from zarr.core.buffer import Buffer, gpu
from zarr.core.buffer.core import default_buffer_prototype
from zarr.core.common import concurrent_map
from zarr.storage._utils import _normalize_byte_range_index
from zarr.storage._utils import _normalize_byte_range_index, _normalize_prefix

if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterable, MutableMapping
Expand Down Expand Up @@ -152,6 +152,7 @@ async def list(self) -> AsyncIterator[str]:
async def list_prefix(self, prefix: str) -> AsyncIterator[str]:
# docstring inherited
# note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix)
prefix = _normalize_prefix(prefix)
for key in list(self._store_dict):
if key.startswith(prefix):
yield key
Expand Down
11 changes: 11 additions & 0 deletions src/zarr/storage/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
from zarr.core.buffer import Buffer


def _normalize_prefix(prefix: str) -> str:
"""Normalize a store prefix to ensure it has a trailing slash.
This ensures that prefix matching uses directory-like semantics,
so that e.g. prefix "a" does not match keys under "a_extra/".
"""
if prefix != "" and not prefix.endswith("/"):
return prefix + "/"
return prefix


def normalize_path(path: str | bytes | Path | None) -> str:
if path is None:
result = ""
Expand Down
2 changes: 2 additions & 0 deletions src/zarr/storage/_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
SuffixByteRequest,
)
from zarr.core.buffer import Buffer, BufferPrototype
from zarr.storage._utils import _normalize_prefix

if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterable
Expand Down Expand Up @@ -261,6 +262,7 @@ async def list(self) -> AsyncIterator[str]:

async def list_prefix(self, prefix: str) -> AsyncIterator[str]:
# docstring inherited
prefix = _normalize_prefix(prefix)
async for key in self.list():
if key.startswith(prefix):
yield key
Expand Down
43 changes: 30 additions & 13 deletions src/zarr/testing/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,23 +442,40 @@ async def test_list(self, store: S) -> None:
async def test_list_prefix(self, store: S) -> None:
"""
Test that the `list_prefix` method works as intended. Given a prefix, it should return
all the keys in storage that start with this prefix.
all the keys under that prefix, treating the prefix as a directory path.
"""
prefixes = ("", "a/", "a/b/", "a/b/c/")
data = self.buffer_cls.from_bytes(b"")
fname = "zarr.json"
store_dict = {p + fname: data for p in prefixes}

store_dict = {
"zarr.json": data,
"a/zarr.json": data,
"a/b/zarr.json": data,
"a/b/c/zarr.json": data,
"a_extra/zarr.json": data,
}
await store._set_many(store_dict.items())
all_keys = sorted(store_dict.keys())

a_keys = ["a/b/c/zarr.json", "a/b/zarr.json", "a/zarr.json"]
ab_keys = ["a/b/c/zarr.json", "a/b/zarr.json"]

test_cases: dict[str, list[str]] = {
"": all_keys,
"a/": a_keys,
"a/b/": ab_keys,
"a/b/c/": ["a/b/c/zarr.json"],
"a_extra/": ["a_extra/zarr.json"],
"a": a_keys,
"a/b": ab_keys,
"a/b/c": ["a/b/c/zarr.json"],
"a_extra": ["a_extra/zarr.json"],
"a_e": [],
"b": [],
"b/": [],
}

for prefix in prefixes:
observed = tuple(sorted(await _collect_aiterator(store.list_prefix(prefix))))
expected: tuple[str, ...] = ()
for key in store_dict:
if key.startswith(prefix):
expected += (key,)
expected = tuple(sorted(expected))
assert observed == expected
for prefix, expected in test_cases.items():
observed = sorted(await _collect_aiterator(store.list_prefix(prefix)))
assert observed == expected, f"list_prefix({prefix!r}): {observed} != {expected}"

async def test_list_empty_path(self, store: S) -> None:
"""
Expand Down
6 changes: 0 additions & 6 deletions tests/test_store/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def test_store_supports_writes(self, store: MemoryStore) -> None:
def test_store_supports_listing(self, store: MemoryStore) -> None:
assert store.supports_listing

async def test_list_prefix(self, store: MemoryStore) -> None:
assert True

@pytest.mark.parametrize("dtype", ["uint8", "float32", "int64"])
@pytest.mark.parametrize("zarr_format", [2, 3])
async def test_deterministic_size(
Expand Down Expand Up @@ -163,9 +160,6 @@ def test_store_supports_writes(self, store: GpuMemoryStore) -> None:
def test_store_supports_listing(self, store: GpuMemoryStore) -> None:
assert store.supports_listing

async def test_list_prefix(self, store: GpuMemoryStore) -> None:
assert True

def test_dict_reference(self, store: GpuMemoryStore) -> None:
store_dict: dict[str, Any] = {}
result = GpuMemoryStore(store_dict=store_dict)
Expand Down
Loading