diff --git a/.github/workflows/zarrs.yml b/.github/workflows/zarrs.yml new file mode 100644 index 0000000000..b17cfb3a3b --- /dev/null +++ b/.github/workflows/zarrs.yml @@ -0,0 +1,34 @@ +name: Zarrs bindings + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 0 # hatch-vcs needs tags to compute zarr's version + persist-credentials: false + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + with: + python-version: '3.12' + - name: Install Rust + uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + - name: Run zarrs bindings tests + # the ubuntu runner image ships a Rust toolchain; the maturin build + # backend is fetched by uv on demand + run: uv run --group zarrs pytest tests/crud tests/zarrs -v diff --git a/.gitignore b/.gitignore index 3284865d6c..ae184fa731 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,6 @@ zarr.egg-info/ # zarr-metadata package lockfile (a library, not an app) packages/zarr-metadata/uv.lock + +# zarrs-bindings Rust build artifacts +packages/zarrs-bindings/target/ diff --git a/changes/+zarrs-bindings.feature.md b/changes/+zarrs-bindings.feature.md new file mode 100644 index 0000000000..26216a71ae --- /dev/null +++ b/changes/+zarrs-bindings.feature.md @@ -0,0 +1,9 @@ +Added `zarr.crud`, an experimental backend-agnostic low-level functional API for +zarr hierarchy CRUD (`create_*`, `read_chunk`, `read_region`, `read_encoded_chunk`, +`write_chunk`, `delete_chunk`, `read_metadata`, `delete_node`, `list_children`). +Array routines take an explicit metadata document, enabling read-only views. +Operations delegate to a pluggable `CrudBackend`: a pure-Python reference backend +(the default) or the zarrs-accelerated backend in `zarr.zarrs`, backed by the Rust +[zarrs](https://zarrs.dev) crate via the in-repo `zarrs-bindings` PyO3 crate. +Select a backend with the `crud.backend` config key or a per-call `backend=` +argument. Build the zarrs backend for development with `uv sync --group zarrs`. diff --git a/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md b/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md new file mode 100644 index 0000000000..364de7cebc --- /dev/null +++ b/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md @@ -0,0 +1,1796 @@ +# zarrs functional API (Phase 1) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** A new in-repo PyO3 crate `zarrs-bindings` plus a `zarr.zarrs` subpackage exposing an async functional API (node lifecycle + whole-chunk I/O) that delegates to the Rust `zarrs` crate, working against any zarr-python `Store`. + +**Architecture:** Two layers. The Rust crate (`zarrs-bindings/`, maturin/PyO3 abi3-py312, native module `_zarrs_bindings`) is a thin binding over `zarrs` ≈0.23: functions take metadata as JSON strings, a store object, and a node path. The Python subpackage `src/zarr/zarrs/` owns the public API: dict metadata documents, `Store` adaptation (native `LocalStore` fast path + a generic sync-shim callback bridge), numpy conversion, and error translation. Spec: `docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md`. + +**Tech Stack:** Rust 1.91+ (1.96 installed), zarrs 0.23 (default features), pyo3 0.28 (abi3-py312), maturin build backend driven by uv (no maturin CLI needed), pytest with `asyncio_mode = "auto"`. + +--- + +## Environment notes (read first) + +- **Python/pytest/mypy always via `uv run`** (user preference). +- **Build/refresh the extension:** `uv sync --group zarrs --reinstall-package zarrs-bindings`. Plain `uv run --group zarrs ...` does NOT reliably rebuild after Rust edits — always re-sync with `--reinstall-package zarrs-bindings` after touching `zarrs-bindings/`. +- **Fast Rust feedback:** `cargo check --manifest-path zarrs-bindings/Cargo.toml` (compiles without packaging a wheel). +- Builds need network access (crates.io for cargo, PyPI for maturin). The Claude Code sandbox on this host fails at bwrap init, so run build commands with the sandbox disabled. +- Pre-commit hooks (ruff format/check, mypy, codespell) run on `git commit`. If a hook modifies files, `git add` the changes and commit again. +- The Rust snippets below were written against verified zarrs 0.23.13 / zarrs_storage 0.4.3 signatures. If `cargo check` reports a mismatch (most likely candidates: the exact signature of `zarrs::node::node_exists`, the re-export path of `store_set_partial_many`, or `TryInto for &NodePath`), check https://docs.rs/zarrs/latest — the primitives all exist; only spelling may need adjustment. +- Docstrings use **markdown** (mkdocs), single backticks — not RST. + +## File structure + +``` +zarrs-bindings/ # new Rust crate (own wheel: zarrs-bindings / _zarrs_bindings) + Cargo.toml + pyproject.toml # maturin backend + src/lib.rs # pymodule, exceptions, shared error helpers + src/store.rs # PyStore bridge + store resolution + src/node.rs # group/array creation, read_metadata, delete_node, list_children + src/chunk.rs # retrieve/store/erase chunk, retrieve_encoded_chunk +src/zarr/zarrs/ # new Python subpackage (public API) + __init__.py # import guard + re-exports + _bridge.py # StoreShim (sync adapter over async Store), resolve_store + _api.py # async functional API, numpy/JSON conversion, error translation +tests/zarrs/ # new test directory (skips when bindings missing) + __init__.py + conftest.py # store fixtures, array_metadata helper + test_bridge.py + test_node.py + test_chunk.py +.github/workflows/zarrs.yml # new CI job +pyproject.toml # modified: zarrs dependency group, uv source, sdist exclude +.gitignore # modified: zarrs-bindings/target/ +changes/+zarrs-bindings.feature.md +``` + +--- + +### Task 1: Rust crate scaffolding + uv wiring + +**Files:** +- Create: `zarrs-bindings/Cargo.toml` +- Create: `zarrs-bindings/pyproject.toml` +- Create: `zarrs-bindings/src/lib.rs` +- Modify: `pyproject.toml` (root) +- Modify: `.gitignore` + +- [ ] **Step 1: Create `zarrs-bindings/Cargo.toml`** + +```toml +[package] +name = "zarrs-bindings" +version = "0.1.0" +edition = "2024" +rust-version = "1.91" +publish = false + +[lib] +name = "_zarrs_bindings" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.28", features = ["abi3-py312"] } +serde_json = "1" +zarrs = "0.23" + +[profile.release] +lto = "thin" +``` + +- [ ] **Step 2: Create `zarrs-bindings/pyproject.toml`** + +```toml +[build-system] +requires = ["maturin>=1.7,<2"] +build-backend = "maturin" + +[project] +name = "zarrs-bindings" +version = "0.1.0" +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" +requires-python = ">=3.12" +license = "MIT" + +[tool.maturin] +module-name = "_zarrs_bindings" +strip = true +``` + +- [ ] **Step 3: Create `zarrs-bindings/src/lib.rs`** (exceptions + version only for now) + +```rust +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::*; + +pyo3::create_exception!( + _zarrs_bindings, + NodeExistsError, + PyValueError, + "A node already exists at the given path." +); +pyo3::create_exception!( + _zarrs_bindings, + NodeNotFoundError, + PyValueError, + "No node was found at the given path." +); + +pub(crate) fn runtime_err(err: impl std::fmt::Display) -> PyErr { + PyRuntimeError::new_err(err.to_string()) +} + +pub(crate) fn value_err(err: impl std::fmt::Display) -> PyErr { + PyValueError::new_err(err.to_string()) +} + +#[pyfunction] +fn version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + +#[pymodule] +fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("NodeExistsError", m.py().get_type::())?; + m.add("NodeNotFoundError", m.py().get_type::())?; + m.add_function(wrap_pyfunction!(version, m)?)?; + Ok(()) +} +``` + +- [ ] **Step 4: Wire into the root `pyproject.toml`** + +Add to the `[dependency-groups]` table (after the `dev` group): + +```toml +zarrs = [ + {include-group = "test"}, + "zarrs-bindings", +] +``` + +Add a new section at the end of the file: + +```toml +[tool.uv.sources] +zarrs-bindings = { path = "zarrs-bindings" } +``` + +Add `"/zarrs-bindings",` to the `exclude` list under `[tool.hatch.build.targets.sdist]`. + +- [ ] **Step 5: Add `zarrs-bindings/target/` to `.gitignore`** + +- [ ] **Step 6: Lock, build, smoke-test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` +Expected: compiles clean (first run downloads ~zarrs dependency tree). + +Run: `uv lock && uv sync --group zarrs` +Expected: lockfile updated; `zarrs-bindings` builds via maturin and installs. + +Run: `uv run --group zarrs python -c "import _zarrs_bindings as z; print(z.version())"` +Expected: `0.1.0` + +- [ ] **Step 7: Commit** (include `zarrs-bindings/Cargo.lock`, which the build created) + +```bash +git add zarrs-bindings .gitignore pyproject.toml uv.lock +git commit -m "feat: scaffold zarrs-bindings PyO3 crate" +``` + +--- + +### Task 2: `zarr.zarrs` package skeleton + test scaffolding + +**Files:** +- Create: `src/zarr/zarrs/__init__.py` +- Create: `tests/zarrs/__init__.py` (empty) +- Create: `tests/zarrs/conftest.py` +- Test: `tests/zarrs/test_api.py` + +- [ ] **Step 1: Write the failing test** — `tests/zarrs/test_api.py` + +```python +from __future__ import annotations + + +def test_import() -> None: + import zarr.zarrs + + assert isinstance(zarr.zarrs.__version__, str) +``` + +- [ ] **Step 2: Create `tests/zarrs/__init__.py`** (empty file) **and `tests/zarrs/conftest.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from pathlib import Path + + from zarr.abc.store import Store + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> Store: + """A writable store: MemoryStore exercises the generic Python-callback bridge, + LocalStore exercises the native zarrs filesystem store.""" + if request.param == "memory": + return await MemoryStore.open() + return await LocalStore.open(root=tmp_path / "store") + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """Build an array metadata document using zarr-python itself, so the + documents fed to zarrs always match what zarr-python would write.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return doc +``` + +- [ ] **Step 3: Run the test to verify it fails** + +Run: `uv run --group zarrs pytest tests/zarrs -v` +Expected: FAIL with `ModuleNotFoundError: No module named 'zarr.zarrs'` + +- [ ] **Step 4: Create `src/zarr/zarrs/__init__.py`** + +```python +""" +Low-level functional API for zarr hierarchies, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +This subpackage is experimental. It requires the `zarrs-bindings` package +(in-repo Rust crate; install for development with `uv sync --group zarrs`). + +All array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading metadata from the store, +which makes read-only and virtual views possible. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +__version__: str = _zarrs_bindings.version() + +__all__ = ["__version__"] +``` + +- [ ] **Step 5: Run the test to verify it passes** + +Run: `uv run --group zarrs pytest tests/zarrs -v` +Expected: 1 passed. Also verify the skip path works in the default env: `uv run pytest tests/zarrs -v` → all skipped/deselected with "zarrs-bindings is not installed" (the default group lacks the bindings). + +- [ ] **Step 6: Commit** + +```bash +git add src/zarr/zarrs tests/zarrs +git commit -m "feat: add zarr.zarrs package skeleton and test scaffolding" +``` + +--- + +### Task 3: StoreShim — sync bridge over async stores (pure Python, TDD) + +**Files:** +- Create: `src/zarr/zarrs/_bridge.py` +- Test: `tests/zarrs/test_bridge.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_bridge.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.storage import LocalStore, MemoryStore +from zarr.zarrs._bridge import StoreShim, resolve_store + +if TYPE_CHECKING: + from pathlib import Path + + +def test_shim_get_set_delete() -> None: + shim = StoreShim(MemoryStore()) + assert shim.get("a/b") is None + shim.set("a/b", b"xyz") + assert shim.get("a/b") == b"xyz" + assert shim.get_range("a/b", 1, 1) == b"y" + assert shim.get_range("a/b", 1, None) == b"yz" + assert shim.get_suffix("a/b", 2) == b"yz" + assert shim.getsize("a/b") == 3 + assert shim.getsize("missing") is None + shim.delete("a/b") + assert shim.get("a/b") is None + + +def test_shim_listing() -> None: + shim = StoreShim(MemoryStore()) + shim.set("zarr.json", b"{}") + shim.set("a/zarr.json", b"{}") + shim.set("a/c/0/0", b"\x00") + assert shim.list() == ["a/c/0/0", "a/zarr.json", "zarr.json"] + assert shim.list_prefix("a/") == ["a/c/0/0", "a/zarr.json"] + assert shim.list_dir("a/") == (["a/zarr.json"], ["a/c/"]) + assert shim.list_dir("") == (["zarr.json"], ["a/"]) + assert shim.getsize_prefix("a/") == 3 + shim.delete_prefix("a/") + assert shim.list() == ["zarr.json"] + + +def test_resolve_store(tmp_path: Path) -> None: + local = LocalStore(tmp_path) + assert resolve_store(local) == {"filesystem": str(tmp_path)} + # read-only LocalStore must go through the shim so writes are rejected in Python + assert isinstance(resolve_store(LocalStore(tmp_path, read_only=True)), StoreShim) + assert isinstance(resolve_store(MemoryStore()), StoreShim) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run --group zarrs pytest tests/zarrs/test_bridge.py -v` +Expected: FAIL with `ModuleNotFoundError: No module named 'zarr.zarrs._bridge'` + +- [ ] **Step 3: Create `src/zarr/zarrs/_bridge.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.sync import _collect_aiterator, sync +from zarr.storage import LocalStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +class StoreShim: + """ + Synchronous adapter over an async `Store`, called from Rust worker threads. + + Each method blocks the calling thread by submitting a coroutine to the zarr + event-loop thread (`zarr.core.sync`). Methods must never be called from the + zarr event-loop thread itself; the Rust bindings only call them from + `asyncio.to_thread` worker threads. + """ + + def __init__(self, store: Store) -> None: + self._store = store + self._prototype = default_buffer_prototype() + + def get(self, key: str) -> bytes | None: + buf = sync(self._store.get(key, prototype=self._prototype)) + return None if buf is None else buf.to_bytes() + + def get_range(self, key: str, offset: int, length: int | None) -> bytes | None: + byte_range = ( + RangeByteRequest(offset, offset + length) + if length is not None + else OffsetByteRequest(offset) + ) + buf = sync(self._store.get(key, prototype=self._prototype, byte_range=byte_range)) + return None if buf is None else buf.to_bytes() + + def get_suffix(self, key: str, suffix: int) -> bytes | None: + buf = sync( + self._store.get(key, prototype=self._prototype, byte_range=SuffixByteRequest(suffix)) + ) + return None if buf is None else buf.to_bytes() + + def set(self, key: str, value: bytes) -> None: + sync(self._store.set(key, self._prototype.buffer.from_bytes(value))) + + def delete(self, key: str) -> None: + sync(self._store.delete(key)) + + def delete_prefix(self, prefix: str) -> None: + sync(self._store.delete_dir(prefix.rstrip("/"))) + + def getsize(self, key: str) -> int | None: + try: + return sync(self._store.getsize(key)) + except FileNotFoundError: + return None + + def getsize_prefix(self, prefix: str) -> int: + return sync(self._store.getsize_prefix(prefix.rstrip("/"))) + + def list(self) -> list[str]: + return sorted(sync(_collect_aiterator(self._store.list()))) + + def list_prefix(self, prefix: str) -> list[str]: + return sorted(sync(_collect_aiterator(self._store.list_prefix(prefix)))) + + def list_dir(self, prefix: str) -> tuple[list[str], list[str]]: + """Return `(keys, prefixes)` directly under `prefix`, as zarrs expects: + full keys, and child prefixes ending in `/`.""" + stripped = prefix.rstrip("/") + children = sorted(sync(_collect_aiterator(self._store.list_dir(stripped)))) + keys: list[str] = [] + prefixes: list[str] = [] + for child in children: + full = f"{stripped}/{child}" if stripped else child + if sync(self._store.exists(full)): + keys.append(full) + else: + prefixes.append(full + "/") + return keys, prefixes + + +def resolve_store(store: Store) -> StoreShim | dict[str, str]: + """ + Convert a zarr `Store` into the representation `_zarrs_bindings` expects: + a config dict for stores with a native Rust implementation, otherwise a + `StoreShim` that Rust calls back into. + """ + if isinstance(store, LocalStore) and not store.read_only: + return {"filesystem": str(store.root)} + return StoreShim(store) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run --group zarrs pytest tests/zarrs/test_bridge.py -v` +Expected: 3 passed. (If `list_dir`/`delete_dir`/`getsize_prefix` choke on the stripped prefix, check the `Store` ABC docstrings in `src/zarr/abc/store.py:348-501` — these methods take prefixes without trailing slashes.) + +- [ ] **Step 5: Commit** + +```bash +git add src/zarr/zarrs/_bridge.py tests/zarrs/test_bridge.py +git commit -m "feat: sync store bridge for zarrs bindings" +``` + +--- + +### Task 4: Rust store bridge + group creation, end to end + +**Files:** +- Create: `zarrs-bindings/src/store.rs` +- Create: `zarrs-bindings/src/node.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Create: `src/zarr/zarrs/_api.py` +- Modify: `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_node.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +import zarr +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import NodeExistsError, create_new_group, create_overwrite_group + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = { + "zarr_format": 3, + "node_type": "group", + "attributes": {"answer": 42}, +} + + +async def test_create_new_group(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_at_root(store: Store) -> None: + await create_new_group(GROUP_META, store, "") + group = zarr.open_group(store=store, mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_existing_node(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo") + + +async def test_create_overwrite_group(store: Store) -> None: + # an array and its chunks previously occupied the path; overwrite removes both + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + assert await store.exists("foo/c/0") + await create_overwrite_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + assert await store.get("foo/zarr.json", prototype=default_buffer_prototype()) is not None +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: FAIL with `ImportError: cannot import name 'NodeExistsError' from 'zarr.zarrs'` + +- [ ] **Step 3: Create `zarrs-bindings/src/store.rs`** + +```rust +use std::sync::Arc; + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; +use zarrs::filesystem::FilesystemStore; +use zarrs::storage::{ + Bytes, ByteRange, ByteRangeIterator, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, + OffsetBytesIterator, ReadableStorageTraits, ReadableWritableListableStorage, StorageError, + StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; + +/// A zarrs store backed by a Python `zarr.zarrs._bridge.StoreShim`. +/// +/// Every method attaches to the Python interpreter and calls the shim, which +/// blocks on the zarr event loop. Blocking waits in Python release the GIL, so +/// the loop thread can make progress while a Rust worker waits here. +pub(crate) struct PyStore(Py); + +fn py_err(err: PyErr) -> StorageError { + StorageError::Other(err.to_string()) +} + +fn invalid(err: impl std::fmt::Display) -> StorageError { + StorageError::Other(err.to_string()) +} + +impl PyStore { + fn get_with_range( + &self, + key: &StoreKey, + range: Option<&ByteRange>, + ) -> Result { + Python::attach(|py| { + let shim = self.0.bind(py); + let result = match range { + None => shim.call_method1("get", (key.as_str(),)), + Some(ByteRange::FromStart(offset, length)) => { + shim.call_method1("get_range", (key.as_str(), *offset, *length)) + } + Some(ByteRange::Suffix(suffix)) => { + shim.call_method1("get_suffix", (key.as_str(), *suffix)) + } + } + .map_err(py_err)?; + if result.is_none() { + Ok(None) + } else { + let bytes: Vec = result.extract().map_err(py_err)?; + Ok(Some(Bytes::from(bytes))) + } + }) + } +} + +impl ReadableStorageTraits for PyStore { + fn get(&self, key: &StoreKey) -> Result { + self.get_with_range(key, None) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + let mut out = Vec::new(); + for byte_range in byte_ranges { + match self.get_with_range(key, Some(&byte_range))? { + Some(bytes) => out.push(Ok(bytes)), + None => return Ok(None), + } + } + Ok(Some(Box::new(out.into_iter()))) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize", (key.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } + + fn supports_get_partial(&self) -> bool { + true + } +} + +impl WritableStorageTraits for PyStore { + fn set(&self, key: &StoreKey, value: Bytes) -> Result<(), StorageError> { + Python::attach(|py| { + let data = PyBytes::new(py, &value); + self.0 + .bind(py) + .call_method1("set", (key.as_str(), data)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn set_partial_many( + &self, + key: &StoreKey, + offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + // read-modify-write fallback provided by zarrs + zarrs::storage::store_set_partial_many(self, key, offset_values) + } + + fn supports_set_partial(&self) -> bool { + false + } + + fn erase(&self, key: &StoreKey) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete", (key.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn erase_prefix(&self, prefix: &StorePrefix) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete_prefix", (prefix.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } +} + +impl ListableStorageTraits for PyStore { + fn list(&self) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method0("list") + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method1("list_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let (keys, prefixes): (Vec, Vec) = self + .0 + .bind(py) + .call_method1("list_dir", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + let keys = keys + .into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect::, StorageError>>()?; + let prefixes = prefixes + .into_iter() + .map(|p| StorePrefix::new(p).map_err(invalid)) + .collect::, StorageError>>()?; + Ok(StoreKeysPrefixes::new(keys, prefixes)) + }) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } +} + +/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` +/// output) into a zarrs storage handle. +pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { + if let Ok(config) = obj.downcast::() { + if let Some(root) = config.get_item("filesystem")? { + let root: String = root.extract()?; + let store = + FilesystemStore::new(root).map_err(|e| PyValueError::new_err(e.to_string()))?; + return Ok(Arc::new(store)); + } + return Err(PyValueError::new_err("unrecognized store configuration")); + } + Ok(Arc::new(PyStore(obj.clone().unbind()))) +} +``` + +- [ ] **Step 4: Create `zarrs-bindings/src/node.rs`** (group functions only; later tasks extend this file) + +```rust +use pyo3::prelude::*; +use zarrs::group::Group; +use zarrs::metadata::GroupMetadata; +use zarrs::node::{node_exists, NodePath}; +use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err, NodeExistsError}; + +pub(crate) fn parse_node_path(path: &str) -> PyResult { + NodePath::new(path).map_err(value_err) +} + +/// When a node exists at `node_path`: erase it (and everything under it) if +/// `overwrite`, otherwise raise `NodeExistsError`. +pub(crate) fn prepare_target( + storage: &ReadableWritableListableStorage, + node_path: &NodePath, + overwrite: bool, +) -> PyResult<()> { + if node_exists(storage, node_path).map_err(runtime_err)? { + if !overwrite { + return Err(NodeExistsError::new_err(format!( + "a node already exists at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = node_path.try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err)?; + } + Ok(()) +} + +#[pyfunction] +pub(crate) fn create_group( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = GroupMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let group = Group::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + group.store_metadata().map_err(runtime_err) + }) +} +``` + +- [ ] **Step 5: Register in `zarrs-bindings/src/lib.rs`** + +Add after the `use` lines: + +```rust +mod node; +mod store; +``` + +Add to the `#[pymodule]` body before `Ok(())`: + +```rust + m.add_function(wrap_pyfunction!(node::create_group, m)?)?; +``` + +- [ ] **Step 6: Compile** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` +Expected: success. If `node_exists` or `try_into::()` signatures mismatch, fix per https://docs.rs/zarrs/latest/zarrs/node/ (the helpers exist; argument form may differ, e.g. `node_exists(&storage, &node_path)` vs a `&Arc` receiver). + +- [ ] **Step 7: Create `src/zarr/zarrs/_api.py`** + +```python +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import _zarrs_bindings as _zb + +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from zarr.abc.store import Store + from zarr.core.common import JSON + +NodeExistsError = _zb.NodeExistsError +"""Raised by `create_new_*` when a node already exists at the target path.""" + + +@dataclass(frozen=True, slots=True) +class ZarrsOptions: + """Options for zarrs-backed operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +def _node_path(path: str) -> str: + """Convert a zarr-python node path (`""`, `"foo/bar"`) to a zarrs node path + (`"/"`, `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path` from a group metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path`, deleting any existing node (and its children) first.""" + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) +``` + +- [ ] **Step 8: Re-export from `src/zarr/zarrs/__init__.py`** + +Replace the `__version__`/`__all__` lines at the end with: + +```python +__version__: str = _zarrs_bindings.version() + +from zarr.zarrs._api import ( + NodeExistsError, + ZarrsOptions, + create_new_group, + create_overwrite_group, +) + +__all__ = [ + "NodeExistsError", + "ZarrsOptions", + "__version__", + "create_new_group", + "create_overwrite_group", +] +``` + +- [ ] **Step 9: Rebuild and run the tests** + +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: 8 passed (4 tests × 2 store params). The MemoryStore param proves the full Rust→Python callback bridge; LocalStore proves the native path. + +- [ ] **Step 10: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs/test_node.py +git commit -m "feat: zarrs store bridge and group creation" +``` + +--- + +### Task 5: Array creation + read_metadata + +**Files:** +- Modify: `zarrs-bindings/src/node.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Add failing tests to `tests/zarrs/test_node.py`** + +Extend the imports: + +```python +import json + +import numpy as np + +from tests.zarrs.conftest import array_metadata +from zarr.errors import NodeNotFoundError +from zarr.zarrs import create_new_array, create_overwrite_array, read_metadata +``` + +(If `from tests.zarrs.conftest import ...` fails at collection, use a relative import `from .conftest import array_metadata` — `tests` is a package.) + +Add tests: + +```python +async def test_create_new_array(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + assert arr.chunks == (4, 4) + assert arr.dtype == np.dtype("uint16") + + +async def test_create_new_array_existing_node(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + with pytest.raises(NodeExistsError): + await create_new_array(array_metadata(), store, "arr") + + +async def test_create_overwrite_array(store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + + +async def test_read_metadata_matches_stored_document(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + observed = await read_metadata(store, "arr") + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_zarr_python_group(store: Store) -> None: + zarr.create_group(store=store, path="g", attributes={"a": 1}) + observed = await read_metadata(store, "g") + assert observed["node_type"] == "group" + assert observed["attributes"] == {"a": 1} + + +async def test_read_metadata_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope") +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: FAIL with `ImportError: cannot import name 'create_new_array'` + +- [ ] **Step 3: Add Rust functions to `zarrs-bindings/src/node.rs`** + +Extend the `use` block: + +```rust +use zarrs::array::Array; +use zarrs::metadata::ArrayMetadata; +use zarrs::node::Node; + +use crate::NodeNotFoundError; +``` + +Append: + +```rust +#[pyfunction] +pub(crate) fn create_array( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = ArrayMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let array = Array::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + array.store_metadata().map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn read_metadata( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult { + let storage = resolve_store(store)?; + py.detach(move || { + let node = Node::open(&storage, &path) + .map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + serde_json::to_string(node.metadata()).map_err(runtime_err) + }) +} +``` + +Register both in `lib.rs`: + +```rust + m.add_function(wrap_pyfunction!(node::create_array, m)?)?; + m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; +``` + +- [ ] **Step 4: Add Python wrappers to `src/zarr/zarrs/_api.py`** + +```python +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path` from a v2 or v3 array metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path`, deleting any existing node (and its children) first.""" + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) + + +async def read_metadata( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. + """ + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + result: dict[str, JSON] = json.loads(raw) + return result +``` + +Add `create_new_array`, `create_overwrite_array`, `read_metadata` to the `__init__.py` import and `__all__`. + +- [ ] **Step 5: Rebuild and test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` → success +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: all pass (20 = 10 tests × 2 stores). Note: `test_read_metadata_matches_stored_document` asserts zarrs round-trips the document zarrs itself wrote; if zarrs normalizes a field zarr-python emits differently (e.g. drops a `null` `dimension_names`), adjust the *fixture* (`array_metadata`) to drop the field, not the assertion. + +- [ ] **Step 6: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs +git commit -m "feat: zarrs-backed array creation and metadata reads" +``` + +--- + +### Task 6: delete_node + list_children + +**Files:** +- Modify: `zarrs-bindings/src/node.rs`, `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Add failing tests to `tests/zarrs/test_node.py`** + +```python +from zarr.zarrs import delete_node, list_children + + +async def test_delete_node(store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed") + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope") + + +async def test_list_children(store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + children = await list_children(store, "") + by_path = dict(children) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + + +async def test_list_children_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await list_children(store, "nope") +``` + +- [ ] **Step 2: Run to verify failure** — `uv run --group zarrs pytest tests/zarrs/test_node.py -v` → ImportError. + +- [ ] **Step 3: Add Rust functions to `node.rs`** + +```rust +#[pyfunction] +pub(crate) fn delete_node( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + if !node_exists(&storage, &node_path).map_err(runtime_err)? { + return Err(NodeNotFoundError::new_err(format!( + "no node found at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = (&node_path).try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn list_children( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult> { + let storage = resolve_store(store)?; + py.detach(move || { + let group = Group::open(storage, &path) + .map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + let children = group.children(false).map_err(runtime_err)?; + children + .into_iter() + .map(|node| { + let metadata = serde_json::to_string(node.metadata()).map_err(runtime_err)?; + Ok((node.path().as_str().to_string(), metadata)) + }) + .collect() + }) +} +``` + +Register both in `lib.rs` as before. + +- [ ] **Step 4: Add Python wrappers to `_api.py`** + +```python +async def delete_node( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the node at `path`, including all keys and child nodes under it. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. Deleting the + root node (`path=""`) clears the entire store. + """ + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + +async def list_children( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs. Paths are store-relative (no leading `/`). + + Raises `zarr.errors.NodeNotFoundError` if no group exists at `path`. + """ + with _translate_errors(): + raw = await asyncio.to_thread(_zb.list_children, resolve_store(store), _node_path(path)) + return [(child_path.lstrip("/"), json.loads(doc)) for child_path, doc in raw] +``` + +Export both from `__init__.py`. + +- [ ] **Step 5: Rebuild and test** + +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings && uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: all pass. + +- [ ] **Step 6: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs +git commit -m "feat: zarrs-backed node deletion and child listing" +``` + +--- + +### Task 7: Whole-chunk I/O (decode/encode/raw/erase) + +**Files:** +- Create: `zarrs-bindings/src/chunk.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_chunk.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_chunk.py` + +```python +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.zarrs.conftest import array_metadata +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import ( + create_new_array, + decode_chunk, + encode_chunk, + erase_chunk, + read_encoded_chunk, +) + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +def _filled( + store: Store, **kwargs: Any +) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array named 'a' via zarr-python, fill it with a ramp, and + return (data, metadata_document).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return data, doc + + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64"]) +async def test_decode_chunk_differential(store: Store, dtype: str) -> None: + data, meta = _filled(store, dtype=dtype) + observed = await decode_chunk(meta, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_decode_chunk_codecs(store: Store, compressors: Any) -> None: + data, meta = _filled(store, compressors=compressors) + observed = await decode_chunk(meta, store, "a", (0, 1)) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_decode_chunk_v2(store: Store) -> None: + data, meta = _filled(store, zarr_format=2) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_sharding(store: Store) -> None: + # with sharding, the metadata chunk grid is the shard grid + data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_missing_returns_fill_value(store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await decode_chunk(meta, store, "a", (0, 0)) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_decode_chunk_selection_not_implemented(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(NotImplementedError): + await decode_chunk(meta, store, "a", (0, 0), selection=(slice(0, 2), slice(0, 2))) + + +async def test_decode_chunk_metadata_view(store: Store) -> None: + # the read-only-view case: decode with a metadata document the store never saw + data, meta = _filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await decode_chunk(view, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_encode_chunk_differential(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + value = np.arange(16, dtype="uint16").reshape(4, 4) + await encode_chunk(meta, store, "a", (0, 1), value) + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 4:8], value) + + +async def test_encode_chunk_shape_mismatch(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + with pytest.raises(ValueError, match="chunk shape"): + await encode_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16")) + + +async def test_read_encoded_chunk_matches_store(store: Store) -> None: + _, meta = _filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0)) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_returns_none(store: Store) -> None: + arr = zarr.create_array(store=store, name="empty", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "empty", (0, 0)) is None + + +async def test_erase_chunk(store: Store) -> None: + data, meta = _filled(store) + assert await store.exists("a/c/0/0") + await erase_chunk(meta, store, "a", (0, 0)) + assert not await store.exists("a/c/0/0") + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 0:4], np.zeros((4, 4), dtype="uint16")) +``` + +- [ ] **Step 2: Run to verify failure** — `uv run --group zarrs pytest tests/zarrs/test_chunk.py -v` → ImportError. + +- [ ] **Step 3: Create `zarrs-bindings/src/chunk.rs`** + +```rust +use pyo3::exceptions::PyNotImplementedError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use zarrs::array::{Array, ArrayBytes}; +use zarrs::metadata::ArrayMetadata; +use zarrs::storage::ReadableWritableListableStorage; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err}; + +type DynArray = Array; + +/// Construct an Array view from an explicit metadata document, without +/// consulting the store for metadata. +fn array_view( + storage: ReadableWritableListableStorage, + path: &str, + metadata_json: &str, +) -> PyResult { + let metadata = ArrayMetadata::try_from(metadata_json).map_err(value_err)?; + Array::new_with_metadata(storage, path, metadata).map_err(value_err) +} + +#[pyfunction] +pub(crate) fn retrieve_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, &path, &metadata_json)?; + let bytes: ArrayBytes<'static> = + array.retrieve_chunk(&chunk_coords).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} + +#[pyfunction] +pub(crate) fn retrieve_encoded_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult>> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult>> { + let array = array_view(storage, &path, &metadata_json)?; + array + .retrieve_encoded_chunk(&chunk_coords) + .map_err(runtime_err) + })?; + Ok(data.map(|d| PyBytes::new(py, &d).unbind())) +} + +#[pyfunction] +pub(crate) fn store_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, + data: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array + .store_chunk(&chunk_coords, ArrayBytes::new_flen(data)) + .map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn erase_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array.erase_chunk(&chunk_coords).map_err(runtime_err) + }) +} +``` + +Register in `lib.rs`: add `mod chunk;` and + +```rust + m.add_function(wrap_pyfunction!(chunk::retrieve_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_encoded_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; +``` + +- [ ] **Step 4: Add Python wrappers to `_api.py`** + +Extend imports: + +```python +from typing import Any + +import numpy as np +import numpy.typing as npt +``` + +Add: + +```python +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve the numpy dtype and chunk shape from a metadata document, using + zarr-python's own metadata parsing.""" + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + if metadata.get("zarr_format") == 3: + meta3 = ArrayV3Metadata.from_dict(dict(metadata)) + grid = meta3.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return meta3.data_type.to_native_dtype(), grid.chunk_shape + meta2 = ArrayV2Metadata.from_dict(dict(metadata)) + return meta2.dtype.to_native_dtype(), meta2.chunks + + +async def decode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + selection: tuple[slice | int, ...] | None = None, + options: ZarrsOptions | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the chunk at `chunk_coords` of the array described by + `metadata`, located at `path` in `store`. + + The metadata document is authoritative: it is not read from the store. + Missing chunks decode to the fill value. `selection` (a chunk-relative + subset) is not implemented yet. + """ + if selection is not None: + raise NotImplementedError("chunk subset selection is not implemented yet") + raw = await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if the chunk does not exist. No codecs are applied.""" + result: bytes | None = await asyncio.to_thread( + _zb.retrieve_encoded_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + return result + + +async def encode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: ZarrsOptions | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk + at `chunk_coords`. `value` must match the chunk shape exactly.""" + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + arr.tobytes(), + ) + + +async def erase_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) +``` + +Export `decode_chunk`, `read_encoded_chunk`, `encode_chunk`, `erase_chunk` from `__init__.py`. + +- [ ] **Step 5: Rebuild and test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` → success +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_chunk.py -v` +Expected: all pass. Likely first-run issues and their fixes: + - v2 differential test fails on dtype byte order → constrain the v2 test to `dtype=" None` annotations, which the code above has). + +- [ ] **Step 3: Re-run the full zarrs suite** — `uv run --group zarrs pytest tests/zarrs -v` → all pass. + +- [ ] **Step 4: Verify the rest of the test suite is unaffected** + +Run: `uv run pytest tests/test_array.py tests/test_group.py -x -q` +Expected: pass (no production code outside `src/zarr/zarrs/` changed). + +- [ ] **Step 5: Commit** + +```bash +git add -A +git commit -m "chore: lint fixes and changelog for zarr.zarrs" +``` + +--- + +### Task 9: CI workflow + +**Files:** +- Create: `.github/workflows/zarrs.yml` + +- [ ] **Step 1: Create `.github/workflows/zarrs.yml`** (action SHAs copied from `.github/workflows/test.yml` — keep them identical so dependabot groups them) + +```yaml +name: Zarrs bindings + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 0 # hatch-vcs needs tags to compute zarr's version + persist-credentials: false + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + with: + python-version: '3.12' + - name: Run zarrs bindings tests + # the ubuntu runner image ships a Rust toolchain; the maturin build + # backend is fetched by uv on demand + run: uv run --group zarrs pytest tests/zarrs -v +``` + +- [ ] **Step 2: Validate the workflow** + +Run: `uvx zizmor .github/workflows/zarrs.yml` +Expected: no findings (matches the repo's zizmor policy). + +- [ ] **Step 3: Commit** + +```bash +git add .github/workflows/zarrs.yml +git commit -m "ci: test job for zarrs bindings" +``` + +--- + +## Out of scope for this plan (later phases, per spec) + +- `decode_region` / `encode_region` and chunk-subset `selection` (Phase 2: zarrs `retrieve_array_subset` / `partial_decoder`). +- `ZarrsOptions` fields (concurrency, checksum validation, direct IO), obstore native path, benchmarks (Phase 3). +- Variable-length data types, non-regular chunk grids, fancy indexing. +- Publishing the `zarrs-bindings` wheel / a `zarr[zarrs]` extra on PyPI. diff --git a/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md b/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md new file mode 100644 index 0000000000..985f325990 --- /dev/null +++ b/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md @@ -0,0 +1,1698 @@ +# Backend-agnostic CRUD layer Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Turn the low-level functional CRUD API into a backend-agnostic `zarr.crud` package with a pure-Python reference backend and the existing zarrs bindings as a second, interchangeable backend. + +**Architecture:** A narrow async `CrudBackend` protocol (byte/metadata level) plus a shared `zarr.crud` facade that holds all backend-neutral logic (selection normalization, numpy assembly, dtype handling, `read_encoded_chunk` via `store.get`). Two backends conform: `ReferenceBackend` (pure Python, wraps zarr-python's own codec pipeline / indexer / metadata machinery) and `ZarrsBackend` (wraps `_zarrs_bindings`). A registry + `zarr.config` key `crud.backend` (default `"reference"`) selects one; every facade function also takes `backend=`. + +**Tech Stack:** Python 3.12+, numpy, zarr-python internals (`BatchedCodecPipeline`, `AsyncArray`, `save_metadata`, `ArrayConfig`/`ArraySpec`, chunk-key encoding), the existing `_zarrs_bindings` Rust extension (unchanged — no Rust build needed). + +Spec: `docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md`. + +--- + +## Environment notes (read first) + +- **Run python/pytest/mypy via `uv run`.** The zarrs backend needs the extension: `uv run --group zarrs pytest ...`. The reference backend works under plain `uv run pytest ...`. +- The Claude Code bash sandbox is broken on this host (`bwrap: loopback` error). Run commands with the sandbox **disabled**. +- **No Rust changes in this plan.** The `_zarrs_bindings` pyfunctions keep their existing names (`retrieve_chunk`, `store_chunk`, `erase_chunk`, `retrieve_array_subset`, `retrieve_encoded_chunk`, `create_array`, `create_group`, `read_metadata`, `delete_node`, `list_children`); `ZarrsBackend` adapts them to the contract's verb names. No `cargo` build or `uv sync --reinstall` is required, but the `zarrs` group must already be installed (`uv sync --group zarrs`) to run the zarrs-parametrized tests. +- Pre-commit hooks (ruff strict, mypy strict over `src`+`tests`, codespell) run on `git commit`. If a hook rewrites a file, `git add` and commit again. +- Docstrings use markdown (single backticks), not RST. +- pytest is configured with `asyncio_mode = "auto"` — async tests/fixtures need no decorator. + +## File structure + +``` +src/zarr/crud/ + __init__.py # public exports; registers the reference backend at import + _backend.py # CrudBackend Protocol + NodeExistsError + _registry.py # register_backend / get_backend + config default resolution + _reference.py # ReferenceBackend (pure Python) + _api.py # shared async facade (the public functions) + neutral helpers +src/zarr/zarrs/ + __init__.py # SHRINKS: version + register ZarrsBackend; no _api re-exports + _backend.py # ZarrsBackend (wraps _zarrs_bindings) — NEW + _bridge.py # unchanged + _api.py # DELETED +src/zarr/core/config.py # add "crud": {"backend": "reference"} +tests/crud/ + __init__.py + conftest.py # store fixture, backend fixture (reference+zarrs), metadata helpers + test_registry.py # registry + default + override + test_reference_backend.py # direct reference-backend smoke tests + test_crud.py # full differential suite, parametrized over backend x store +tests/zarrs/ + __init__.py # unchanged + conftest.py # unchanged (still used by test_bridge/test_cache) + test_bridge.py # unchanged + test_cache.py # imports updated to zarr.crud read_chunk/write_chunk, backend="zarrs" + test_node.py # DELETED (covered by tests/crud/test_crud.py) + test_chunk.py # DELETED (covered by tests/crud/test_crud.py) + test_api.py # DELETED (replaced by tests/crud import coverage) +changes/+zarrs-bindings.feature.md # reworded for zarr.crud +.github/workflows/zarrs.yml # run tests/crud tests/zarrs +``` + +--- + +### Task 1: `zarr.crud` skeleton — protocol, exceptions, registry, config + +**Files:** +- Create: `src/zarr/crud/__init__.py` +- Create: `src/zarr/crud/_backend.py` +- Create: `src/zarr/crud/_registry.py` +- Modify: `src/zarr/core/config.py` +- Create: `tests/crud/__init__.py` (empty) +- Test: `tests/crud/test_registry.py` + +- [ ] **Step 1: Write the failing test** — `tests/crud/test_registry.py` + +```python +from __future__ import annotations + +import pytest + +from zarr.crud import CrudBackend, NodeExistsError, get_backend, register_backend + + +def test_node_exists_error_is_value_error() -> None: + assert issubclass(NodeExistsError, ValueError) + + +def test_default_backend_is_reference() -> None: + # the reference backend is registered at import and is the configured default + be = get_backend() + assert be is get_backend("reference") + + +def test_get_unknown_backend_raises() -> None: + with pytest.raises(KeyError, match="no CRUD backend"): + get_backend("does-not-exist") + + +def test_register_and_resolve_instance() -> None: + class Dummy: + pass + + dummy = Dummy() + register_backend("dummy-test", dummy) # type: ignore[arg-type] + try: + assert get_backend("dummy-test") is dummy + finally: + from zarr.crud import _registry + + _registry._BACKENDS.pop("dummy-test", None) + + +def test_protocol_is_runtime_checkable() -> None: + # ReferenceBackend (registered as "reference") structurally satisfies the protocol + assert isinstance(get_backend("reference"), CrudBackend) +``` + +- [ ] **Step 2: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_registry.py -v` +Expected: FAIL — `ModuleNotFoundError: No module named 'zarr.crud'` + +- [ ] **Step 3: Create `tests/crud/__init__.py`** (empty file) + +- [ ] **Step 4: Create `src/zarr/crud/_backend.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +class NodeExistsError(ValueError): + """Raised when a node already exists at a path and overwrite was not requested.""" + + +@runtime_checkable +class CrudBackend(Protocol): + """The byte/metadata-level contract a CRUD backend must implement. + + Methods take neutral types: the metadata document as a `dict`, a zarr + `Store`, and plain zarr paths (`""`, `"foo/bar"`). They return raw bytes, + parsed JSON documents, or `None`. The shared `zarr.crud` facade builds the + numpy- and selection-level API on top of these. + + `create_*` raise `zarr.crud.NodeExistsError` when a node exists and + `overwrite` is false. `read_metadata`/`delete_node`/`list_children` raise + `zarr.errors.NodeNotFoundError` when the target is missing. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: ... + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: ... + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: ... + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: ... + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: ... + + async def delete_node(self, store: Store, path: str) -> None: ... + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: ... +``` + +- [ ] **Step 5: Create `src/zarr/crud/_registry.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.config import config + +if TYPE_CHECKING: + from zarr.crud._backend import CrudBackend + +_BACKENDS: dict[str, CrudBackend] = {} + + +def register_backend(name: str, backend: CrudBackend) -> None: + """Register a CRUD backend instance under `name`.""" + _BACKENDS[name] = backend + + +def get_backend(name: str | None = None) -> CrudBackend: + """Resolve a backend by name, or the configured default when `name` is None. + + Selecting `"zarrs"` imports `zarr.zarrs` if needed so it can self-register. + """ + if name is None: + name = config.get("crud.backend") + if name not in _BACKENDS and name == "zarrs": + import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + if name not in _BACKENDS: + raise KeyError( + f"no CRUD backend registered as {name!r}; registered: {sorted(_BACKENDS)}" + ) + return _BACKENDS[name] +``` + +- [ ] **Step 6: Create `src/zarr/crud/__init__.py`** (reference backend is added in Task 2; for now register nothing) + +```python +""" +Backend-agnostic low-level functional CRUD API for zarr hierarchies. + +The public functions delegate byte- and metadata-level work to a `CrudBackend`. +Two backends ship: a pure-Python reference backend (the default) and a +zarrs-accelerated backend (`zarr.zarrs`, requires the `zarrs-bindings` +extension). Select one with the `crud.backend` config key or a per-call +`backend=` argument. + +Array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading it from the store, which +makes read-only and virtual views possible. +""" + +from zarr.crud._backend import CrudBackend, NodeExistsError +from zarr.crud._registry import get_backend, register_backend + +__all__ = [ + "CrudBackend", + "NodeExistsError", + "get_backend", + "register_backend", +] +``` + +- [ ] **Step 7: Add the config default** — `src/zarr/core/config.py` + +Find the defaults mapping passed to the `Config(...)` constructor (it contains the `"codec_pipeline"` key). Add a sibling entry: + +```python + "crud": {"backend": "reference"}, +``` + +Run to confirm it loads: `uv run python -c "from zarr.core.config import config; print(config.get('crud.backend'))"` +Expected: `reference` + +- [ ] **Step 8: Run the test (note: `test_default_backend_is_reference` and the protocol test still fail — reference backend arrives in Task 2)** + +Run: `uv run pytest tests/crud/test_registry.py -v` +Expected: `test_node_exists_error_is_value_error`, `test_get_unknown_backend_raises`, `test_register_and_resolve_instance` PASS; `test_default_backend_is_reference` and `test_protocol_is_runtime_checkable` FAIL (KeyError: no backend `reference`). That is expected at this task boundary; they pass after Task 2. + +- [ ] **Step 9: Commit** + +```bash +git add src/zarr/crud/_backend.py src/zarr/crud/_registry.py src/zarr/crud/__init__.py src/zarr/core/config.py tests/crud/__init__.py tests/crud/test_registry.py +git commit -m "feat: zarr.crud skeleton — CrudBackend protocol, registry, config" +``` + +End every commit body in this plan with: +``` +Co-Authored-By: Claude Fable 5 +``` + +--- + +### Task 2: `ReferenceBackend` (pure Python) + +**Files:** +- Create: `src/zarr/crud/_reference.py` +- Modify: `src/zarr/crud/__init__.py` +- Test: `tests/crud/test_reference_backend.py` + +All snippets below are verified against the installed zarr-python. + +- [ ] **Step 1: Write the failing test** — `tests/crud/test_reference_backend.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +import zarr +from zarr.crud import NodeExistsError, get_backend +from zarr.errors import NodeNotFoundError +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + pass + +import pytest + + +def _array_meta() -> dict: + arr = zarr.create_array(store=MemoryStore(), shape=(8, 8), chunks=(4, 4), dtype="uint16") + return dict(arr.metadata.to_dict()) + + +async def test_reference_round_trip_chunk() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await be.write_chunk(store, "a", meta, (0, 1), value.tobytes()) + raw = await be.read_chunk(store, "a", meta, (0, 1)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 4), value) + + +async def test_reference_read_subset_spans_chunks() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array(store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16") + data = np.arange(64, dtype="uint16").reshape(8, 8) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + raw = await be.read_subset(store, "a", meta, (2, 1), (5, 4)) + np.testing.assert_array_equal( + np.frombuffer(raw, dtype="uint16").reshape(5, 4), data[2:7, 1:5] + ) + + +async def test_reference_create_exists_raises() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + with pytest.raises(NodeExistsError): + await be.create_array(store, "a", meta, overwrite=False) + + +async def test_reference_read_metadata_missing_raises() -> None: + be = get_backend("reference") + with pytest.raises(NodeNotFoundError): + await be.read_metadata(MemoryStore(), "nope") +``` + +- [ ] **Step 2: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_reference_backend.py -v` +Expected: FAIL — `KeyError: no CRUD backend registered as 'reference'` + +- [ ] **Step 3: Create `src/zarr/crud/_reference.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from zarr.core.array import AsyncArray, create_codec_pipeline +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer.core import NDBuffer, default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON +from zarr.core.group import GroupMetadata +from zarr.core.metadata.io import save_metadata +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata +from zarr.crud._backend import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.storage._common import StorePath + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + """Parse a metadata document into a v2 or v3 array metadata object.""" + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _native_dtype(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> np.dtype[Any]: + """Numpy dtype in native byte order (zarrs and the facade assume native).""" + return meta_obj.dtype.to_native_dtype().newbyteorder("=") + + +def _chunk_shape(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> tuple[int, ...]: + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return tuple(grid.chunk_shape) + return tuple(meta_obj.chunks) + + +def _array_spec( + meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, ...] +) -> ArraySpec: + return ArraySpec( + shape=shape, + dtype=meta_obj.dtype, + fill_value=meta_obj.fill_value, + config=ArrayConfig.from_dict({}), + prototype=default_buffer_prototype(), + ) + + +def _meta_key(path: str, zarr_format: int) -> str: + fname = ZARR_JSON if zarr_format == 3 else ZARRAY_JSON + p = path.strip("/") + return f"{p}/{fname}" if p else fname + + +class ReferenceBackend: + """Pure-Python CRUD backend wrapping zarr-python's own machinery. + + Constructs no high-level `Array` for chunk operations (it drives the codec + pipeline directly); it does reuse `AsyncArray.getitem` for multi-chunk + subset reads, which is exactly the `BasicIndexer` + codec-pipeline read path. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = _parse_array_metadata(metadata) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = GroupMetadata.from_dict(dict(metadata)) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def _create( + self, store: Store, path: str, meta_obj: Any, zarr_format: int, *, overwrite: bool + ) -> None: + sp = StorePath(store, path.strip("/")) + proto = default_buffer_prototype() + if overwrite: + await store.delete_dir(path.strip("/")) + else: + key = _meta_key(path, zarr_format) + if await store.get(key, prototype=proto) is not None: + raise NodeExistsError(f"a node already exists at path {path!r}") + await save_metadata(sp, meta_obj, ensure_parents=True) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + from zarr.core._json import buffer_to_json_object + + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + buf = await (sp / ZARR_JSON).get(prototype=proto) + if buf is not None: + return buffer_to_json_object(buf) + buf2 = await (sp / ZARRAY_JSON).get(prototype=proto) + if buf2 is not None: + doc = buffer_to_json_object(buf2) + zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) + if zattrs is not None: + doc["attributes"] = buffer_to_json_object(zattrs) + return doc + raise NodeNotFoundError(f"no node found at path {path!r}") + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + buf = await (sp / chunk_key).get(prototype=default_buffer_prototype()) + if buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + decoded = list(await pipeline.decode_batch([(buf, spec)])) + arr = np.asarray(decoded[0].as_numpy_array(), dtype=np_dtype) + return np.ascontiguousarray(arr).tobytes() + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + np_dtype = _native_dtype(meta_obj) + async_arr = AsyncArray(metadata=meta_obj, store_path=StorePath(store, path.strip("/"))) + selection = tuple(slice(s, s + length) for s, length in zip(start, shape, strict=True)) + result = await async_arr.getitem(selection) + return np.ascontiguousarray(np.asarray(result, dtype=np_dtype)).tobytes() + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + arr = np.frombuffer(data, dtype=np_dtype).reshape(shape) + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + encoded = list(await pipeline.encode_batch([(NDBuffer.from_ndarray_like(arr), spec)])) + buf = encoded[0] + if buf is None: + await (sp / chunk_key).delete() + else: + await (sp / chunk_key).set(buf) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + meta_obj = _parse_array_metadata(metadata) + sp = StorePath(store, path.strip("/")) + await (sp / meta_obj.encode_chunk_key(coords)).delete() + + async def delete_node(self, store: Store, path: str) -> None: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + present = ( + await (sp / ZARR_JSON).get(prototype=proto) is not None + or await (sp / ZARRAY_JSON).get(prototype=proto) is not None + ) + if not present: + raise NodeNotFoundError(f"no node found at path {path!r}") + await store.delete_dir(p) + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + if ( + await (sp / ZARR_JSON).get(prototype=proto) is None + and await (sp / ZARRAY_JSON).get(prototype=proto) is None + ): + raise NodeNotFoundError(f"no node found at path {path!r}") + prefix = f"{p}/" if p else "" + children: list[tuple[str, dict[str, JSON]]] = [] + async for name in store.list_dir(prefix): + child_path = f"{p}/{name}" if p else name + child_sp = StorePath(store, child_path) + if ( + await (child_sp / ZARR_JSON).get(prototype=proto) is not None + or await (child_sp / ZARRAY_JSON).get(prototype=proto) is not None + ): + children.append((name, await self.read_metadata(store, child_path))) + return children +``` + +Notes for the implementer: +- `decode_batch`/`encode_batch` are async and return iterables — wrap in `list(...)`. +- `ArraySpec.dtype` is the `ZDType` object (`meta_obj.dtype`), **not** a numpy dtype. +- `_native_dtype` byte-swaps to native order so both backends return identical + bytes through the facade (the facade reads them with a native dtype). +- `AsyncArray(metadata=meta_obj, store_path=...)` constructs from an explicit + document without reading the store. + +- [ ] **Step 4: Register the reference backend** — append to `src/zarr/crud/__init__.py` (after the imports, before `__all__`) + +```python +from zarr.crud._reference import ReferenceBackend + +register_backend("reference", ReferenceBackend()) +``` + +and add `"ReferenceBackend"` to `__all__`. + +- [ ] **Step 5: Run the tests** + +Run: `uv run pytest tests/crud/test_reference_backend.py tests/crud/test_registry.py -v` +Expected: all PASS (the two previously-failing registry tests now pass too). + +- [ ] **Step 6: Commit** + +```bash +git add src/zarr/crud/_reference.py src/zarr/crud/__init__.py tests/crud/test_reference_backend.py +git commit -m "feat: pure-Python ReferenceBackend for zarr.crud" +``` + +--- + +### Task 3: shared facade `zarr.crud._api` + differential suite (reference backend) + +**Files:** +- Create: `src/zarr/crud/_api.py` +- Modify: `src/zarr/crud/__init__.py` +- Create: `tests/crud/conftest.py` +- Test: `tests/crud/test_crud.py` + +- [ ] **Step 1: Create `tests/crud/conftest.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + from pathlib import Path + + from zarr.abc.store import Store + + +def _zarrs_available() -> bool: + try: + import _zarrs_bindings # noqa: F401 + except ImportError: + return False + return True + + +@pytest.fixture( + params=[ + "reference", + pytest.param( + "zarrs", + marks=pytest.mark.skipif( + not _zarrs_available(), reason="zarrs-bindings is not installed" + ), + ), + ] +) +def backend(request: pytest.FixtureRequest) -> str: + """A CRUD backend name. The zarrs param is skipped when the extension is absent.""" + import zarr.crud # noqa: F401 (ensures reference is registered) + + if request.param == "zarrs": + import zarr.zarrs # noqa: F401 (registers the zarrs backend) + return request.param + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: + if request.param == "memory": + s: Store = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """An array metadata document built via zarr-python itself.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + doc.pop("attributes", None) + return doc + + +def filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array 'a', fill it with a ramp, return (data, metadata).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + doc.pop("attributes", None) + return data, doc +``` + +- [ ] **Step 2: Write the failing test** — `tests/crud/test_crud.py` + +```python +from __future__ import annotations + +import copy +import json +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.crud.conftest import array_metadata, filled +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud import ( + NodeExistsError, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +from zarr.errors import NodeNotFoundError + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = {"zarr_format": 3, "node_type": "group", "attributes": {"answer": 42}} + + +# --- node lifecycle --- + +async def test_create_new_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + + +async def test_create_new_group_existing_raises(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo", backend=backend) + + +async def test_create_overwrite_group_replaces_array(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await create_overwrite_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + + +async def test_create_new_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + a = zarr.open_array(store=store, path="arr", mode="r") + assert a.shape == (8, 8) + assert a.dtype == np.dtype("uint16") + + +async def test_create_new_array_v2(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").metadata.zarr_format == 2 + + +async def test_create_overwrite_array(backend: str, store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").shape == (8, 8) + + +async def test_read_metadata(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + observed = await read_metadata(store, "arr", backend=backend) + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope", backend=backend) + + +async def test_delete_node(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed", backend=backend) + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope", backend=backend) + + +async def test_list_children(backend: str, store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + by_path = dict(await list_children(store, "", backend=backend)) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + assert not any(p.startswith("/") for p in by_path) + + +# --- chunk I/O --- + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64", "u2"]) +async def test_read_chunk_differential(backend: str, store: Store, dtype: str) -> None: + data, meta = filled(store, dtype=dtype) + observed = await read_chunk(meta, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_read_chunk_codecs(backend: str, store: Store, compressors: Any) -> None: + data, meta = filled(store, compressors=compressors) + observed = await read_chunk(meta, store, "a", (0, 1), backend=backend) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_read_chunk_v2(backend: str, store: Store) -> None: + data, meta = filled(store, dtype=" None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_missing_is_fill(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_read_chunk_metadata_view(backend: str, store: Store) -> None: + data, meta = filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await read_chunk(view, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_read_chunk_readonly(backend: str, store: Store) -> None: + _, meta = filled(store) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + assert not observed.flags.writeable + + +async def test_write_chunk_differential(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await write_chunk(meta, store, "a", (0, 1), value, backend=backend) + np.testing.assert_array_equal(zarr.open_array(store=store, path="a", mode="r")[0:4, 4:8], value) + + +async def test_write_chunk_shape_mismatch(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + with pytest.raises(ValueError, match="chunk shape"): + await write_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16"), backend=backend) + + +async def test_delete_chunk(backend: str, store: Store) -> None: + data, meta = filled(store) + assert await store.exists("a/c/0/0") + await delete_chunk(meta, store, "a", (0, 0), backend=backend) + assert not await store.exists("a/c/0/0") + + +async def test_read_encoded_chunk_matches_store(backend: str, store: Store) -> None: + _, meta = filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0), backend=backend) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_is_none(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="e", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "e", (0, 0), backend=backend) is None + + +# --- region I/O --- + +SELECTIONS: list[Any] = [ + (slice(None), slice(None)), + (slice(2, 7), slice(1, 5)), + (slice(None), 3), + (5, slice(None)), + (3, 4), + (slice(1, 8, 2), slice(None)), + (slice(None), slice(6, 1, -2)), + (slice(-3, None), slice(None, -1)), + ..., + (..., slice(2, 4)), + (slice(0, 0), slice(None)), + (slice(2, 6),), +] + + +@pytest.mark.parametrize("sel", SELECTIONS) +async def test_read_region_differential(backend: str, store: Store, sel: Any) -> None: + data, meta = filled(store) + observed = await read_region(meta, store, "a", sel, backend=backend) + np.testing.assert_array_equal(observed, data[sel]) + + +async def test_read_region_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_region(meta, store, "a", (slice(1, 7), slice(3, 8)), backend=backend) + np.testing.assert_array_equal(observed, data[1:7, 3:8]) + + +async def test_read_region_too_many_indices(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="too many indices"): + await read_region(meta, store, "a", (0, 0, 0), backend=backend) + + +async def test_read_region_fancy_rejected(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(TypeError, match="only integers, slices"): + await read_region(meta, store, "a", ([0, 1], slice(None)), backend=backend) # type: ignore[arg-type] +``` + +- [ ] **Step 3: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_crud.py -q` +Expected: collection error — `ImportError: cannot import name 'read_chunk' from 'zarr.crud'` + +- [ ] **Step 4: Create `src/zarr/crud/_api.py`** + +```python +from __future__ import annotations + +import operator +import types +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, cast + +import numpy as np + +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud._registry import get_backend + +if TYPE_CHECKING: + from collections.abc import Mapping + + import numpy.typing as npt + + from zarr.abc.store import Store + from zarr.core.common import JSON + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + from zarr.crud._backend import CrudBackend + + +@dataclass(frozen=True, slots=True) +class CrudOptions: + """Options for CRUD operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +BasicIndex = int | slice | types.EllipsisType +BasicSelection = BasicIndex | tuple[BasicIndex, ...] + + +def _resolve_backend(backend: CrudBackend | str | None) -> CrudBackend: + if backend is None or isinstance(backend, str): + return get_backend(backend) + return backend + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve native-byte-order numpy dtype and regular chunk shape. + + Backends decode to (and encode from) the native in-memory representation, + applying any byte-order codec themselves, so the dtype is coerced to native. + """ + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + meta_obj = _parse_array_metadata(metadata) + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + chunk_shape = tuple(grid.chunk_shape) + else: + chunk_shape = tuple(meta_obj.chunks) + return meta_obj.dtype.to_native_dtype().newbyteorder("="), chunk_shape + + +def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: + shape = metadata.get("shape") + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("metadata document has no valid 'shape'") + result: list[int] = [] + for s in shape: + if not isinstance(s, (int, float)): + raise TypeError(f"shape element {s!r} is not a number") + if isinstance(s, float) and not s.is_integer(): + raise TypeError(f"shape element {s!r} is not an integer") + result.append(int(s)) + return tuple(result) + + +def _chunk_key(metadata: Mapping[str, JSON], path: str, coords: tuple[int, ...]) -> str: + meta_obj = _parse_array_metadata(metadata) + rel = meta_obj.encode_chunk_key(coords) + p = path.strip("/") + return f"{p}/{rel}" if p else rel + + +def _normalize_selection( + selection: BasicSelection, shape: tuple[int, ...] +) -> tuple[list[int], list[int], tuple[slice | int, ...]]: + """Normalize a numpy basic-indexing selection to a step-1 bounding box. + + Returns `(start, bounding_shape, post_index)`: the box to fetch and the + numpy index to apply to it (strides, reversals, integer-axis removal). Only + integers, slices, and `Ellipsis` are supported; fancy indexing raises. + """ + sel_tuple = selection if isinstance(selection, tuple) else (selection,) + + n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + if n_ellipsis == 1: + i = sel_tuple.index(Ellipsis) + n_fill = len(shape) - (len(sel_tuple) - 1) + if n_fill < 0: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] + if len(sel_tuple) > len(shape): + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) + + starts: list[int] = [] + lengths: list[int] = [] + post: list[slice | int] = [] + for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): + if isinstance(sel, slice): + start, stop, step = sel.indices(size) + n = len(range(start, stop, step)) + if n == 0: + starts.append(0) + lengths.append(0) + post.append(slice(None)) + elif step > 0: + last = start + (n - 1) * step + starts.append(start) + lengths.append(last - start + 1) + post.append(slice(None, None, step)) + else: + last = start + (n - 1) * step + starts.append(last) + lengths.append(start - last + 1) + post.append(slice(None, None, step)) + else: + assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" + try: + idx = operator.index(sel) + except TypeError: + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) from None + if idx < 0: + idx += size + if not 0 <= idx < size: + raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") + starts.append(idx) + lengths.append(1) + post.append(0) + return starts, lengths, tuple(post) + + +# --- node lifecycle --- + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group from a group metadata document. Raises `NodeExistsError` + if a node already exists at `path`. Not atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=False) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=True) + + +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array from a v2 or v3 metadata document. Raises + `NodeExistsError` if a node already exists. Not atomic against concurrent + writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=False) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=True) + + +async def read_metadata( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. Raises + `zarr.errors.NodeNotFoundError` if no node exists there.""" + return await _resolve_backend(backend).read_metadata(store, path) + + +async def delete_node( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the node at `path` and everything under it. Raises + `zarr.errors.NodeNotFoundError` if absent. `path=""` clears the store.""" + await _resolve_backend(backend).delete_node(store, path) + + +async def list_children( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs (store-relative, no leading `/`). Raises + `zarr.errors.NodeNotFoundError` if no group exists there.""" + return await _resolve_backend(backend).list_children(store, path) + + +# --- chunk I/O --- + +async def read_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the whole chunk at `chunk_coords`. The metadata document + is authoritative; missing chunks decode to the fill value. The result is a + read-only view (`.copy()` for a writable array).""" + be = _resolve_backend(backend) + raw = await be.read_chunk(store, path, metadata, tuple(chunk_coords)) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if absent. Pure store I/O (`store.get` on the chunk key): the + `backend` argument is accepted for signature uniformity but unused.""" + key = _chunk_key(metadata, path, tuple(chunk_coords)) + buf = await store.get(key, prototype=default_buffer_prototype()) + return None if buf is None else buf.to_bytes() + + +async def write_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk at + `chunk_coords`. `value` must match the chunk shape exactly.""" + be = _resolve_backend(backend) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await be.write_chunk(store, path, metadata, tuple(chunk_coords), arr.tobytes()) + + +async def delete_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await _resolve_backend(backend).delete_chunk(store, path, metadata, tuple(chunk_coords)) + + +# --- region I/O --- + +async def read_region( + metadata: Mapping[str, JSON], + store: Store, + path: str, + selection: BasicSelection, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode a region given by a numpy basic-indexing `selection` + (integers, slices with steps, `Ellipsis`). One backend call fetches the + step-1 bounding box; strides/reversals/integer-axis removal are applied as + numpy views. Missing chunks decode to the fill value. Fancy indexing raises + `TypeError`. The result is a read-only view. + + Note: a `slice(0, N, step)` reads `O(N)` bytes even though `O(N / step)` are + returned; for sparse selections over large arrays prefer `read_chunk`.""" + be = _resolve_backend(backend) + dtype, _ = _chunk_dtype_and_shape(metadata) + shape = _array_shape(metadata) + starts, lengths, post_index = _normalize_selection(selection, shape) + if 0 in lengths: + block = np.empty(lengths, dtype=dtype) + block.flags.writeable = False + else: + raw = await be.read_subset(store, path, metadata, tuple(starts), tuple(lengths)) + block = np.frombuffer(raw, dtype=dtype).reshape(lengths) + return cast("np.ndarray[Any, np.dtype[Any]]", block[post_index]) +``` + +Note: `BackendArg` is a documentation alias only; use the literal +`CrudBackend | str | None` annotations as written above. + +- [ ] **Step 5: Export the facade from `src/zarr/crud/__init__.py`** + +Add to the imports and `__all__` (keep `__all__` sorted): + +```python +from zarr.crud._api import ( + CrudOptions, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +``` + +Final `__all__`: + +```python +__all__ = [ + "CrudBackend", + "CrudOptions", + "NodeExistsError", + "ReferenceBackend", + "create_new_array", + "create_new_group", + "create_overwrite_array", + "create_overwrite_group", + "delete_chunk", + "delete_node", + "get_backend", + "list_children", + "read_chunk", + "read_encoded_chunk", + "read_metadata", + "read_region", + "register_backend", + "write_chunk", +] +``` + +- [ ] **Step 6: Run the suite against the reference backend** + +Run: `uv run pytest tests/crud/test_crud.py -q` +Expected: all PASS. The `backend` fixture's `zarrs` param is skipped (no `--group zarrs`), so every test runs once on `reference` × {memory, local}. If `test_read_chunk_differential[>u2-...]` fails, the byte-order coercion in `_reference._native_dtype` / `_chunk_dtype_and_shape` is wrong — both must end in `.newbyteorder("=")`; do not weaken the assertion. + +- [ ] **Step 7: Commit** + +```bash +git add src/zarr/crud/_api.py src/zarr/crud/__init__.py tests/crud/conftest.py tests/crud/test_crud.py +git commit -m "feat: zarr.crud shared facade + differential suite (reference backend)" +``` + +--- + +### Task 4: `ZarrsBackend` + shrink `zarr.zarrs` + migrate zarrs tests + +**Files:** +- Create: `src/zarr/zarrs/_backend.py` +- Modify: `src/zarr/zarrs/__init__.py` +- Delete: `src/zarr/zarrs/_api.py` +- Delete: `tests/zarrs/test_node.py`, `tests/zarrs/test_chunk.py`, `tests/zarrs/test_api.py` +- Modify: `tests/zarrs/test_cache.py` + +- [ ] **Step 1: Create `src/zarr/zarrs/_backend.py`** + +```python +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from typing import TYPE_CHECKING, cast + +import _zarrs_bindings as _zb + +from zarr.crud import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _node_path(path: str) -> str: + """Convert a zarr path (`""`, `"foo/bar"`) to a zarrs node path (`"/"`, + `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + except _zb.NodeExistsError as err: + raise NodeExistsError(str(err)) from err + + +class ZarrsBackend: + """CRUD backend backed by the Rust `zarrs` crate via `_zarrs_bindings`. + + Owns the zarrs-specific plumbing: JSON-serializing the metadata document, + the `/`-prefixed node-path form, store resolution, offloading the blocking + Rust calls to a worker thread, and translating binding exceptions to the + canonical `zarr.crud` / `zarr.errors` types. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + return cast("dict[str, JSON]", json.loads(raw)) + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_array_subset, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(start), + list(shape), + ) + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + data, + ) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def delete_node(self, store: Store, path: str) -> None: + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: + with _translate_errors(): + raw: list[tuple[str, str]] = await asyncio.to_thread( + _zb.list_children, resolve_store(store), _node_path(path) + ) + return [ + (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) + for child_path, doc in raw + ] +``` + +- [ ] **Step 2: Rewrite `src/zarr/zarrs/__init__.py`** + +```python +""" +The zarrs CRUD backend for `zarr.crud`, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +Importing this module registers the `"zarrs"` backend. Requires the +`zarrs-bindings` extension (in-repo Rust crate; `uv sync --group zarrs`). Select +it with `zarr.config.set({"crud.backend": "zarrs"})` or per call via +`backend="zarrs"`. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +from zarr.crud import register_backend +from zarr.zarrs._backend import ZarrsBackend + +__version__: str = _zarrs_bindings.version() + +register_backend("zarrs", ZarrsBackend()) + +__all__ = ["ZarrsBackend", "__version__"] +``` + +- [ ] **Step 3: Delete the moved module and obsolete tests** + +```bash +git rm src/zarr/zarrs/_api.py tests/zarrs/test_node.py tests/zarrs/test_chunk.py tests/zarrs/test_api.py +``` + +- [ ] **Step 4: Update `tests/zarrs/test_cache.py`** — change imports from the old `zarr.zarrs` functions to the `zarr.crud` facade with the zarrs backend. + +Replace the import block: + +```python +from zarr.zarrs import decode_chunk, encode_chunk +``` + +with: + +```python +from zarr.crud import read_chunk, write_chunk +``` + +Then in that file replace every `decode_chunk(` call with `read_chunk(` and every `encode_chunk(` call with `write_chunk(`, adding `backend="zarrs"` as the final keyword argument to each so they exercise the cached zarrs path. For example: + +```python + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") +... + await write_chunk(meta, store, "a", (0, 0), new, backend="zarrs") +``` + +The cache assertions (`zb.array_cache_len()` / `zb.clear_array_cache()`) and the `import _zarrs_bindings as zb` line are unchanged. The module-level `pytest.importorskip("_zarrs_bindings", ...)` stays. + +- [ ] **Step 5: Add the zarrs param coverage — already wired** + +`tests/crud/conftest.py` already parametrizes `backend` over `["reference", "zarrs"]` with the zarrs case skipped when the extension is missing. No change needed; running with `--group zarrs` now exercises it. + +- [ ] **Step 6: Run everything with the zarrs extension** + +Run: `uv run --group zarrs pytest tests/crud tests/zarrs -q` +Expected: all PASS. `tests/crud/test_crud.py` now runs each test on both `reference` and `zarrs` × {memory, local}; `tests/zarrs/test_cache.py` and `test_bridge.py` pass. If a differential test passes on `reference` but fails on `zarrs` (or vice versa), the two backends disagree — investigate the backend, never weaken the assertion. + +- [ ] **Step 7: Run without the extension (reference-only path stays green)** + +Run: `uv run pytest tests/crud -q` +Expected: all PASS, zarrs params skipped. (`tests/zarrs` is not collectable without the extension; that's fine — its module-level `importorskip` skips it.) + +- [ ] **Step 8: Commit** + +```bash +git add src/zarr/zarrs tests/zarrs +git commit -m "feat: ZarrsBackend conforms to CrudBackend; zarr.zarrs is now a backend" +``` + +--- + +### Task 5: changelog, CI, and final verification + +**Files:** +- Modify: `changes/+zarrs-bindings.feature.md` +- Modify: `.github/workflows/zarrs.yml` + +- [ ] **Step 1: Reword the changelog fragment** — overwrite `changes/+zarrs-bindings.feature.md` + +```markdown +Added `zarr.crud`, an experimental backend-agnostic low-level functional API for +zarr hierarchy CRUD (`create_*`, `read_chunk`, `read_region`, `read_encoded_chunk`, +`write_chunk`, `delete_chunk`, `read_metadata`, `delete_node`, `list_children`). +Array routines take an explicit metadata document, enabling read-only views. +Operations delegate to a pluggable `CrudBackend`: a pure-Python reference backend +(the default) or the zarrs-accelerated backend in `zarr.zarrs`, backed by the Rust +[zarrs](https://zarrs.dev) crate via the in-repo `zarrs-bindings` PyO3 crate. +Select a backend with the `crud.backend` config key or a per-call `backend=` +argument. Build the zarrs backend for development with `uv sync --group zarrs`. +``` + +- [ ] **Step 2: Update the CI test command** — `.github/workflows/zarrs.yml` + +Change the test step's `run:` from: + +```yaml + run: uv run --group zarrs pytest tests/zarrs -v +``` + +to: + +```yaml + run: uv run --group zarrs pytest tests/crud tests/zarrs -v +``` + +Validate: `uvx zizmor .github/workflows/zarrs.yml` → no findings. + +- [ ] **Step 3: Lint and type-check the new code** + +Run: `uv run --group dev ruff format src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Run: `uv run --group dev ruff check --fix src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Run: `uv run --group dev --group zarrs mypy src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Expected: all clean. (mypy is strict; the facade and backends are fully annotated.) + +- [ ] **Step 4: Full suites, both with and without the extension** + +Run: `uv run --group zarrs pytest tests/crud tests/zarrs -q` → all pass +Run: `uv run pytest tests/crud -q` → all pass (zarrs skipped) +Run (regression — the rest of zarr-python is untouched): `uv run pytest tests/test_array.py tests/test_group.py -q` → pass + +- [ ] **Step 5: Commit** + +```bash +git add changes/+zarrs-bindings.feature.md .github/workflows/zarrs.yml +git commit -m "docs/ci: zarr.crud changelog and CI coverage" +``` + +--- + +## Out of scope (per spec) + +- Wiring `zarr.crud` under zarr-python's `Array`/`Group` classes. +- Entrypoint-based backend discovery (registration is explicit/import-time). +- A write-side region operation (`write_region`). +- Renaming the Rust `_zarrs_bindings` pyfunctions (private; adapted by `ZarrsBackend`). +- `CrudOptions` fields (concurrency, checksums) — still a placeholder. diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md new file mode 100644 index 0000000000..22cb0b5785 --- /dev/null +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -0,0 +1,210 @@ +# zarrs-backed low-level functional API for zarr-python + +Date: 2026-06-11 +Status: approved +Branch: `zarrs-bindings` + +## Goal + +Give zarr-python a low-level, functional API for zarr hierarchy CRUD whose +implementation delegates to the Rust [`zarrs`](https://docs.rs/zarrs) crate via +new PyO3 bindings. Every array routine takes a metadata document as an explicit +parameter, so callers can operate on read-only or virtual views of arrays +(e.g. decode a chunk with metadata the store never saw, or read a chunk as raw +bytes without decoding). + +Non-goals for this work: rewiring zarr-python's `Array`/`Group` classes or the +codec-pipeline registry through this API (possible later), fancy +(non-slice) indexing, and use of zarrs's experimental async feature. + +## Background + +- zarr-python is pure Python (hatchling). Its `Store` ABC + (`src/zarr/abc/store.py`) is async; metadata classes live under + `src/zarr/core/metadata/`. +- The Rust `zarrs` crate (~0.23) supports exactly the metadata-driven shape we + need: `Array::new_with_metadata(storage, path, metadata)` and + `Group::new_with_metadata(...)` construct nodes from a metadata document + without touching the store; `store_metadata()` persists separately. Chunk and + region I/O: `retrieve_chunk`, `retrieve_encoded_chunk` (raw bytes), + `retrieve_array_subset`, `partial_decoder` (sharding-aware), and the + corresponding `store_*` methods. `ArrayMetadata`/`GroupMetadata` parse + directly from JSON strings (v2 or v3; v2 converts internally). +- The existing `zarrs` PyPI package (github.com/zarrs/zarrs-python) exposes only + a codec pipeline (`CodecPipelineImpl`) and supports only a fixed set of + native stores. It cannot provide the API designed here, but its build setup + (maturin, PyO3 abi3, tokio/rayon) is the reference for ours. + +## Architecture + +Two distributions in this repo, hard boundary between them: + +1. **Rust crate `zarrs-bindings`** under `packages/` (`packages/zarrs-bindings/`, + alongside the existing `zarr-metadata` subpackage), + built with maturin (PyO3, `abi3-py312`), publishing wheel `zarrs-bindings` + with native module `_zarrs_bindings`. It is a thin, mechanical binding over + `zarrs`: functions/pyclasses take metadata as a **JSON string**, a + store-config object, a node path, and return bytes / numpy arrays. It knows + nothing about zarr-python except the store sniffing described below. +2. **Python subpackage `zarr.zarrs`** in zarr-python: the public functional + API. Owns conversion between zarr-python types (`dict` metadata documents, + `zarr.abc.store.Store`, numpy arrays) and the binding layer, plus + validation, ergonomics, and error translation. Imports `_zarrs_bindings` + lazily and raises a helpful `ImportError` naming the `zarr[zarrs]` extra if + it is missing. + +zarr-python's own wheel remains pure Python; `zarrs-bindings` becomes an +optional dependency (`zarr[zarrs]`). + +## Public API (`zarr.zarrs`) + +All functions are `async def`. Parameters: + +- `metadata`: `dict[str, JSON]` — the literal metadata document (`zarr.json`, + or v2 `.zarray`/`.zgroup` equivalents). Never read from the store by the + array routines. +- `store`: `zarr.abc.store.Store`. +- `path`: node path within the store (str, `""` = root). +- `chunk_coords`: `tuple[int, ...]` grid coordinates. +- `selection`: numpy-style basic indexing — integers, slices (including steps; strided/reversed selections fetch the step-1 bounding box in one call and apply numpy views), and `Ellipsis`. Fancy indexing (integer/boolean arrays) and `np.newaxis` are not supported. +- `options`: every function also accepts keyword-only + `options: ZarrsOptions | None = None` (omitted from the signatures below for + brevity) — a dataclass holding concurrency limits and checksum validation + flags. Defaults are applied when omitted; in Phase 1 the dataclass exists + but carries only defaults (fields become meaningful in Phase 3). + +```python +# node lifecycle +async def create_new_group(metadata, store, path) -> None # error if node exists +async def create_overwrite_group(metadata, store, path) -> None +async def create_new_array(metadata, store, path) -> None +async def create_overwrite_array(metadata, store, path) -> None +async def read_metadata(store, path) -> dict[str, JSON] # array or group doc +async def delete_node(store, path) -> None +async def list_children(store, path) -> list[tuple[str, dict]] # (path, metadata) + +# chunk-level I/O +async def decode_chunk(metadata, store, path, chunk_coords, *, selection=None) -> np.ndarray +async def read_encoded_chunk(metadata, store, path, chunk_coords) -> bytes | None +async def encode_chunk(metadata, store, path, chunk_coords, value) -> None +async def erase_chunk(metadata, store, path, chunk_coords) -> None + +# region-level I/O (selection in array coordinates, may span chunks) +async def decode_region(metadata, store, path, selection) -> np.ndarray +async def encode_region(metadata, store, path, selection, value) -> None +``` + +Mapping to zarrs primitives: + +| API function | zarrs primitive | +|---|---| +| `create_new_group` / `create_overwrite_group` | `Group::new_with_metadata` + `store_metadata` (existence check first for `new`) | +| `create_new_array` / `create_overwrite_array` | `Array::new_with_metadata` + `store_metadata` | +| `read_metadata` | `Array::open` / `Group::open` metadata retrieval | +| `delete_node` | `erase_metadata` + chunk erasure / prefix delete | +| `list_children` | `Group::children` / `traverse` | +| `decode_chunk` (no selection) | `retrieve_chunk` | +| `decode_chunk` (selection) | `partial_decoder(chunk).partial_decode` (sharding-aware) | +| `read_encoded_chunk` | `retrieve_encoded_chunk` | +| `encode_chunk` | `store_chunk` | +| `erase_chunk` | `erase_chunk` | +| `decode_region` | `retrieve_array_subset` | +| `encode_region` | `store_array_subset` | + +## Store bridge + +A Rust-side `StoreConfig` resolver, tried in priority order: + +1. `zarr.storage.LocalStore` → native `zarrs_filesystem` store. +2. obstore-backed `ObjectStore` → `zarrs_object_store` (Phase 3). +3. **Anything else** → generic `PyStore`: a Rust struct implementing + `ReadableStorageTraits` / `WritableStorageTraits` / + `ListableStorageTraits` over a Python callback object. + +The callback path: the async API function wraps the user's `Store` in a small +sync Python shim whose methods submit coroutines to zarr-python's existing +sync event-loop thread (`zarr.core.sync`, +`asyncio.run_coroutine_threadsafe(...)` + blocking result). Rust calls the +shim while holding no locks of its own. This makes any conformant `Store` +(Memory, Zip, Logging, Wrapper, user-defined) work without Rust knowing its +type. Deadlock safety relies on the existing invariant that code running on +the zarr sync loop never blocks on these Rust entry points. + +## Sync/async seam + +The public API is async to match zarr-python conventions. Internally each +function calls a blocking Rust entry point via `asyncio.to_thread`; the Rust +side releases the GIL during I/O and compute (reacquiring it only inside +`PyStore` callbacks). zarrs's experimental async feature is not used. + +## Array construction cache + +`Array::new_with_metadata` (serde-parsing the metadata document and building the +codec chain) is the dominant per-call cost on the native path — measured at +~20µs for a bytes-only array up to ~80µs for sharded+blosc, against single-digit +µs of actual chunk I/O on a warm filesystem. To amortize it across the common +"open one array, then do many chunk operations" pattern, the chunk/region +routines memoize the constructed `Array` in a process-global LRU cache +(capacity 128) keyed on `(filesystem root, node path, metadata JSON)`. + +This is safe because a zarrs `Array` caches no chunk data — it is metadata plus +codec chain plus a storage handle — so every read/write still goes through to +the store, and a correctly-keyed hit is behaviorally identical to a fresh build. +The key must include all three components: the same document at a different path +or store is a different array. Only native filesystem stores are cached; the +generic `PyStore` callback path has no stable cross-call identity to key on and +is left uncached (a future change may cache it if a store can supply a stable +value-based token). No invalidation hook is needed: delete/overwrite with +different metadata yields a different key, and an entry for a deleted-and-rebuilt +array with identical metadata stays valid because reads go through to the store. +A poisoned cache mutex is recovered rather than propagated, so the cache can +never wedge array I/O. Measured win: 14–20% faster per repeated call on a local +store, free on every hit. + +## Error handling + +The binding layer raises a small set of typed exceptions defined in one place: +`NodeExistsError`, `NodeNotFoundError`, and `ValueError` subclasses for +metadata-parse failures. In Phase 1 the translation surface is deliberately +small: `zarr.zarrs` re-raises the bindings' `NodeNotFoundError` as +`zarr.errors.NodeNotFoundError`; `NodeExistsError` is exposed as +`zarr.zarrs.NodeExistsError`. Exceptions raised by Python store callbacks are +flattened to a `RuntimeError` carrying the original message — the original +exception type and traceback are lost crossing the Rust boundary. Faithful +propagation of store-callback exceptions (and richer mapping onto +`zarr.errors` types) is deferred to a later phase. + +## Testing + +`tests/zarrs/`, module-level skip when `_zarrs_bindings` is not importable. + +- **Differential tests** are the core: every operation checked against + zarr-python's own implementation on the same store — write with zarr-python, + read with zarrs, and vice versa; metadata documents produced by both must + round-trip. +- Parametrized over: `MemoryStore` (exercises generic bridge) and `LocalStore` + (native path); zarr formats v2 and v3; a codec matrix including + `sharding_indexed`. +- Read-only-view tests: decode a chunk using a metadata dict not present in + the store; `read_encoded_chunk` returns bytes identical to `store.get`. +- A CI job builds the crate with `maturin develop` and runs `tests/zarrs/`. + Existing CI jobs are untouched (the suite skips without the extension). + +## Phasing + +1. **Phase 1**: crate scaffolding (maturin, CI build), store bridge (native + LocalStore + generic PyStore), node lifecycle functions, whole-chunk + `decode_chunk` / `read_encoded_chunk` / `encode_chunk` / `erase_chunk`. +2. **Phase 2**: `decode_region` (read side of region I/O) is implemented on + this branch. `encode_region` and chunk-subset `selection` for `decode_chunk` + via partial decoders remain Phase 2. +3. **Phase 3**: `ZarrsOptions` surface (concurrency, checksum validation, + direct IO), obstore native path, benchmarks vs. the pure-Python pipeline. + +## Naming decisions + +- Python API: `zarr.zarrs`. +- Rust crate / PyPI distribution: `zarrs-bindings` (PyPI name `zarrs` is taken + by the existing project); native module `_zarrs_bindings`. +- Function names follow the requested `create_new_*` / `create_overwrite_*` + pattern; reads are `decode_*` / `read_*`, writes `encode_*`. diff --git a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md new file mode 100644 index 0000000000..bf8043513b --- /dev/null +++ b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md @@ -0,0 +1,259 @@ +# Backend-agnostic CRUD layer for zarr-python + +Date: 2026-06-15 +Status: approved +Branch: `zarrs-bindings` + +## Goal + +Turn the low-level functional CRUD API (introduced as `zarr.zarrs` earlier on +this branch) into a backend-agnostic layer, with the Rust zarrs bindings as one +of several interchangeable implementations. Define the CRUD contract abstractly, +provide a pure-Python reference backend (no Rust required), and make the zarrs +bindings conform to the same contract. + +This validates the abstraction by having two real backends agree with each other +and with zarr-python, and it gives users a no-Rust fallback. + +Non-goals for this change (deliberately deferred): + +- Wiring the CRUD layer under zarr-python's own `Array`/`Group` classes. +- Entrypoint-based backend discovery (this change uses explicit import-time + registration). +- A write-side region operation (`write_region`) remains future work. + +## Background + +The current `zarr.zarrs._api` is a flat module of 13 async functions that +delegate to the `_zarrs_bindings` Rust extension. It already separates two +concerns that this design formalizes into a hard boundary: + +- **Backend-neutral glue:** `_normalize_selection`, `_array_shape`, + `_chunk_dtype_and_shape`, numpy assembly (`np.frombuffer`/reshape/strided + views), native-dtype coercion, options handling, error translation. +- **Genuinely zarrs-specific work:** producing/consuming raw chunk bytes, + reading array subsets as bytes, writing metadata documents — all via + `_zarrs_bindings` and the `_bridge.StoreShim`/`resolve_store` plumbing. + +The public surface (`zarr.zarrs.decode_region`, etc.) is unreleased on this +branch, so it can move without backward-compatibility constraints. + +zarr-python already contains everything a pure-Python backend needs: +`BatchedCodecPipeline` (`src/zarr/core/codec_pipeline.py`), `BasicIndexer` +(`src/zarr/core/indexing.py`), `save_metadata` (`src/zarr/core/metadata/io.py`), +metadata parsing (`ArrayV3Metadata.from_dict` / `ArrayV2Metadata.from_dict`), +and chunk-key encoding (`src/zarr/core/chunk_key_encodings.py`). + +## Architecture + +Two packages with a hard boundary. + +### `zarr.crud` (new, backend-neutral) + +- `_backend.py` — the `CrudBackend` `Protocol` (the narrow byte/metadata + contract below) plus the canonical exceptions. +- `_api.py` — the shared async facade: the 13 public functions moved out of + `zarr.zarrs`, holding all backend-neutral logic. Each function resolves a + backend (from the `backend` argument or the registry default) and calls its + byte/metadata methods, then does selection normalization, dtype handling, and + numpy assembly. +- `_reference.py` — `ReferenceBackend`, pure Python, wrapping zarr-python's own + codec/indexing/metadata machinery. Always importable; the default backend. +- `_registry.py` — `register_backend(name, backend)`, `get_backend(name)`, and + the config-driven default resolution. +- `__init__.py` — re-exports the facade functions, `CrudBackend`, + `ZarrsOptions`, the exceptions, and `register_backend`. + +### `zarr.zarrs` (shrinks to the zarrs provider) + +- `_backend.py` — `ZarrsBackend`, implementing `CrudBackend` by wrapping + `_zarrs_bindings`. Owns the zarrs-isms that move out of the facade: + `json.dumps` of the metadata dict, the `/`-prefixed zarrs node-path form + (formerly `_node_path`), `_bridge.resolve_store`, and translation of + `_zarrs_bindings` exceptions into the `zarr.crud` canonical exceptions. +- `_bridge.py` — unchanged (`StoreShim`, `resolve_store`). +- the Rust crate `zarrs-bindings/` and the construction cache — unchanged. +- registers itself as backend `"zarrs"` at import time. + +## The `CrudBackend` contract + +Narrow, byte/metadata level. Methods pass neutral types — the metadata document +as a `dict`, the zarr `Store`, and plain zarr paths (`""`, `"foo/bar"`) — and +return raw bytes / JSON-as-dict / `None`. Each backend serializes and bridges as +it needs. + +```python +class CrudBackend(Protocol): + async def create_array(self, store, path, metadata, *, overwrite: bool) -> None: ... + async def create_group(self, store, path, metadata, *, overwrite: bool) -> None: ... + async def read_metadata(self, store, path) -> dict[str, JSON]: ... + async def read_chunk(self, store, path, metadata, coords) -> bytes: ... + async def read_subset(self, store, path, metadata, start, shape) -> bytes: ... + async def write_chunk(self, store, path, metadata, coords, data: bytes) -> None: ... + async def delete_chunk(self, store, path, metadata, coords) -> None: ... + async def delete_node(self, store, path) -> None: ... + async def list_children(self, store, path) -> list[tuple[str, dict[str, JSON]]]: ... +``` + +Nine methods. `read_encoded_chunk` is deliberately **not** a backend method — +see below; it is a backend-independent facade helper over `store.get`. + +Byte conventions: `read_chunk`/`read_subset` return C-contiguous raw bytes in the +array's native byte order for the requested chunk / step-1 bounding box; +`write_chunk` takes the same. `read_metadata`/`list_children` return parsed JSON +documents as dicts. + +### Two read-addressing axes (no overlap) + +Reads are addressed in one of two coordinate spaces, and the two never overlap: + +- **Chunk-grid coordinates** — `read_chunk(coords)` / `read_encoded_chunk(coords)` + return a whole chunk addressed by its grid position. `read_chunk` (a backend + method) decodes and returns the *full* chunk shape, including the fill-padded + overhang of edge chunks; `read_encoded_chunk` (a facade helper, not a backend + method) returns the raw stored bytes or `None`. These pair with `write_chunk` / + `delete_chunk`, which are chunk-grid-addressed backend methods. +- **Array-element coordinates** — `read_subset(start, shape)` returns an + arbitrary box in array space, which generally spans multiple chunks and is + clipped to the array bounds. The facade's `read_region(selection)` normalizes + a numpy selection to a step-1 bounding box and calls it. + +`read_chunk` takes no `selection` parameter. A sub-region *within* a single +chunk is simply a `read_region` whose bounding box lies inside one chunk; the +backend already decodes only the overlapping chunk(s) (sharding-aware in the +zarrs backend), so a chunk-relative partial-read needs no separate API. The +`Store.get(key, byte_range=)` analogue is therefore `read_region` over a +single-chunk box, not a parameter on `read_chunk`; `read_subset` itself has no +single-`get` analogue — it is closer to "`get_partial_values` across many keys, +stitched into one array." + +### `read_encoded_chunk` is facade-level, not a backend method + +Reading a chunk's raw stored bytes is just `store.get(chunk_key)`, and the chunk +key is computable from the metadata document alone via zarr-python's +`chunk_key_encoding` — no decoding, no codec pipeline, nothing backend-specific. +Both backends would implement it identically. So the facade implements +`read_encoded_chunk(store, path, metadata, coords)` directly as: encode the chunk +key from the metadata, `store.get` it, return the bytes or `None`. It works the +same regardless of which backend (or none) is selected, which is correct since it +is pure store I/O. Under sharding the chunk key holds the whole shard blob, and +this returns exactly that raw object. + +This raw read can also be *expressed* through `read_chunk` by supplying a view +metadata document (`data_type: uint8` + a single `bytes` codec, identity decode) +— a nice demonstration that the read-only-view mechanism is general — but that +route requires knowing the encoded byte length up front to set the chunk shape (a +`store.getsize` round-trip) and would synthesize a fill-valued array for a missing +chunk instead of returning `None`. So `store.get` is the correct implementation +for fetching stored bytes; the view trick is the general tool for *reinterpreting* +decoded data under a different dtype/shape, which `read_chunk` already supports. + +## Method naming + +Both the public facade and the backend contract use a single, consistent verb +set: **create / read / write / delete / list**. No `decode`/`encode`/`retrieve`/ +`store`/`erase` synonyms. + +Public facade (`zarr.crud`): + +| Function | Verb | Notes | +|---|---|---| +| `create_new_group` / `create_overwrite_group` | create | node lifecycle | +| `create_new_array` / `create_overwrite_array` | create | node lifecycle | +| `read_metadata` | read | array or group document | +| `read_chunk` | read | decoded chunk → `ndarray` | +| `read_encoded_chunk` | read | raw stored bytes, no decode (facade-only, `store.get`) | +| `read_region` | read | numpy basic-indexing selection → `ndarray` | +| `write_chunk` | write | encode + store a chunk | +| `delete_chunk` | delete | remove one chunk | +| `delete_node` | delete | remove a node + descendants | +| `list_children` | list | direct children of a group | + +Facade → backend mapping for the byte-level methods: `read_chunk` → +`backend.read_chunk`, `read_region` → `backend.read_subset` (the facade +normalizes the selection to a step-1 bounding box `(start, shape)`), +`write_chunk` → `backend.write_chunk`, `delete_chunk` → `backend.delete_chunk`. +`read_encoded_chunk` maps to no backend method — the facade serves it from +`store.get`. The two distinct names `read_region` (selection-based, public) and +`read_subset` (bounding-box bytes, backend) are intentional: they have different +signatures and the facade is the adapter between them. + +## Facade / backend split + +What stays in the `zarr.crud` facade (written once, backend-neutral): + +- selection normalization (`_normalize_selection`), shape/dtype resolution + (`_array_shape`, `_chunk_dtype_and_shape`), native-dtype coercion; +- numpy assembly: `np.frombuffer(...).reshape(...)` and the strided/reversed/ + integer-axis post-index views; read-only result guarantee; +- the empty-selection short circuit (no backend call); +- `read_encoded_chunk`: encode the chunk key from the metadata and `store.get` + it (no backend involved); +- `ZarrsOptions` acceptance (still a placeholder) and backend resolution. + +What moves into each backend: + +- `ZarrsBackend`: `json.dumps`, the `/`-prefixed node-path form, + `resolve_store`, calling `_zarrs_bindings`, exception translation. +- `ReferenceBackend`: `ArrayV3Metadata.from_dict`/`ArrayV2Metadata.from_dict`, + building a `BatchedCodecPipeline` and `ChunkGrid`/`BasicIndexer`, assembling + `batch_info` and calling `codec_pipeline.read`/`write`, `save_metadata`, + `store.delete_dir`, and `list_dir` + per-child metadata reads. + +## Backend selection + +- A registry in `zarr.crud._registry`: `register_backend(name, backend)`, + `get_backend(name) -> CrudBackend`. +- A `zarr.config` key `crud.backend`, default `"reference"`. The pure-Python + backend always works and is predictable; `"zarrs"` opts into the accelerator + and is registered when `zarr.zarrs` is imported. +- Every facade function accepts `backend: CrudBackend | str | None = None`. + `None` → registry default; a string → registry lookup; an instance → used + directly. This enables side-by-side testing of backends. + +## Error handling + +`zarr.crud` defines the canonical exceptions: reuse +`zarr.errors.NodeNotFoundError`, and keep a `NodeExistsError` (exposed as +`zarr.crud.NodeExistsError`). Each backend raises these directly: + +- `ReferenceBackend` raises them at the point of detection. +- `ZarrsBackend` translates `_zarrs_bindings.NodeExistsError` / + `_zarrs_bindings.NodeNotFoundError` into the canonical types. + +The facade therefore no longer needs the `_translate_errors` shim. Phase-1 +fidelity limits (store-callback exceptions flattened to `RuntimeError` across the +Rust boundary) are unchanged for the zarrs backend; the reference backend +surfaces native exceptions directly. + +## Testing + +- Shared differential suite moves to `tests/crud/`, parametrized over + `backend ∈ {reference, zarrs}` × `store ∈ {memory, local}`. Each test writes + with zarr-python and reads through the facade (and vice versa), so the two + backends are checked against zarr-python *and*, transitively, against each + other. The zarrs-parametrized cases skip when `_zarrs_bindings` is not + installed (xdist-safe module-level `importorskip` in a zarrs-only conftest + helper, or a skip marker on the zarrs param). +- Zarrs-only tests stay in `tests/zarrs/`: the construction cache + (`test_cache.py`) and the store bridge (`test_bridge.py`). +- A focused `tests/crud/test_registry.py`: default resolution, `register_backend`, + string vs instance `backend=` override. +- `uv run --group zarrs pytest tests/crud tests/zarrs` is the full local check; + `uv run pytest tests/crud` (no zarrs group) must pass with the reference + backend alone and skip the zarrs params. + +## Migration notes + +- Move the 13 functions and the neutral helpers from `zarr.zarrs._api` into + `zarr.crud._api`; delete `zarr.zarrs._api`. No aliases in `zarr.zarrs`. +- Rename to the consistent verb set in the move (no compatibility aliases, since + the surface is unreleased): `decode_chunk` → `read_chunk`, `decode_region` → + `read_region`, `encode_chunk` → `write_chunk`, `erase_chunk` → `delete_chunk`. + `read_metadata`, `read_encoded_chunk`, `delete_node`, `list_children`, and the + `create_*` functions keep their names. +- `zarr.zarrs.__init__` exports only what is needed to register and identify the + zarrs backend (`ZarrsBackend`, and re-registers `"zarrs"` on import). +- The changelog fragment is updated to describe `zarr.crud` as the public CRUD + surface with pluggable backends, and `zarr.zarrs` as the zarrs backend. +- The CI job continues to build the crate and now runs `tests/crud tests/zarrs`. diff --git a/packages/zarrs-bindings/Cargo.lock b/packages/zarrs-bindings/Cargo.lock new file mode 100644 index 0000000000..9c9a91f203 --- /dev/null +++ b/packages/zarrs-bindings/Cargo.lock @@ -0,0 +1,1831 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "auto_impl" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "blosc-src" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9046dd58971db0226346fde214143d16a6eb12f535b5320d0ea94fcea420631" +dependencies = [ + "cc", + "libz-sys", + "lz4-sys", + "snappy_src", + "zstd-sys", +] + +[[package]] +name = "blusc" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4e0c17eaa785d2673fe58c22fc817946c2330ed47f3d9f79835d65950d32a45" +dependencies = [ + "flate2", + "lz4_flex", + "pkg-config", + "snap", + "zstd", +] + +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", + "unicode-xid", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "inventory" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" +dependencies = [ + "rustversion", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" +dependencies = [ + "cfg-if", + "futures-util", + "wasm-bindgen", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libz-sys" +version = "1.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bc9657773828b90eeb625adff10eeac83cc21bbfd8e23a03eaa8a33c9e28d9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "lz4_flex" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90071f8077f8e40adfc4b7fe9cd495ce316263f19e75c2211eeff3fdf475a3d9" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "moka" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "parking_lot", + "portable-atomic", + "smallvec", + "tagptr", + "uuid", +] + +[[package]] +name = "monostate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb4cc965c89dd0615a9e822ff8002f7633d2466143d51bd58693e4b2c75aabad" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23f5b99488110875b5904839d396c2cdfaf241ff6622638acb879cc7effad5de" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ndarray" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "positioned-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ec4b80060f033312b99b6874025d9503d2af87aef2dd4c516e253fbfcdada7" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" +dependencies = [ + "libc", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", +] + +[[package]] +name = "pyo3-build-config" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" +dependencies = [ + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quick_cache" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3db184a8b66cfe87f0263a1de147a6b554c864d1767c6f7fa4eb0e5497b565" +dependencies = [ + "ahash", + "equivalent", + "hashbrown 0.16.1", + "parking_lot", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rayon_iter_concurrent_limit" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09ee01023de07fa073ce14c37cbe0a9e099c6b0b60a29cf4af6d04d9553fed7" +dependencies = [ + "rayon", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "snappy_src" +version = "0.2.5+snappy.1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e1432067a55bcfb1fd522d2aca6537a4fcea32bba87ea86921226d14f9bad53" +dependencies = [ + "cc", + "link-cplusplus", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "target-lexicon" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-segmentation" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe_cell_slice" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6659959f702dcdaad77bd6e42a9409a32ceccc06943ec93c8a4306be00eb6cf1" + +[[package]] +name = "uuid" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zarrs" +version = "0.23.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8132307b8fc041fd21f68c7987103fb6e038b11f9838c16ec43b798f5480ccf5" +dependencies = [ + "async-lock", + "base64", + "blosc-src", + "blusc", + "bytemuck", + "bytes", + "crc32c", + "derive_more", + "flate2", + "getrandom 0.3.4", + "half", + "inventory", + "itertools", + "itoa", + "libz-sys", + "log", + "lru 0.16.4", + "moka", + "ndarray", + "num", + "num-complex", + "paste", + "quick_cache", + "rayon", + "rayon_iter_concurrent_limit", + "serde", + "serde_json", + "thiserror", + "thread_local", + "unsafe_cell_slice", + "uuid", + "zarrs_chunk_grid", + "zarrs_chunk_key_encoding", + "zarrs_codec", + "zarrs_data_type", + "zarrs_filesystem", + "zarrs_metadata", + "zarrs_metadata_ext", + "zarrs_plugin", + "zarrs_storage", + "zstd", +] + +[[package]] +name = "zarrs-bindings" +version = "0.1.0" +dependencies = [ + "lru 0.12.5", + "pyo3", + "serde_json", + "zarrs", +] + +[[package]] +name = "zarrs_chunk_grid" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cf67386fd96a0336cd3e5ab5ca6cb14e0e05aee80f1acae8c4d3cf562a8bb65" +dependencies = [ + "derive_more", + "inventory", + "itertools", + "rayon", + "thiserror", + "tinyvec", + "zarrs_metadata", + "zarrs_plugin", +] + +[[package]] +name = "zarrs_chunk_key_encoding" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9040e7feaa92d1904d492acd0cd91b97214f1791c5b5738e6c05b2ca4145a382" +dependencies = [ + "derive_more", + "inventory", + "zarrs_metadata", + "zarrs_plugin", + "zarrs_storage", +] + +[[package]] +name = "zarrs_codec" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "383a129a6a0cbb2c80cdba23809e5cab85159756464b7d0f112468a495c128da" +dependencies = [ + "async-trait", + "bytemuck", + "derive_more", + "futures", + "inventory", + "itertools", + "rayon", + "thiserror", + "unsafe_cell_slice", + "zarrs_chunk_grid", + "zarrs_data_type", + "zarrs_metadata", + "zarrs_plugin", + "zarrs_storage", +] + +[[package]] +name = "zarrs_data_type" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc7c594c9363278fcd9db4c205514f009944206eb093ea7ad40b85f50009f31" +dependencies = [ + "derive_more", + "half", + "inventory", + "num", + "paste", + "serde", + "serde_json", + "thiserror", + "zarrs_metadata", + "zarrs_plugin", +] + +[[package]] +name = "zarrs_filesystem" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "270efeb0181651aee5460b3232f2fc83e91bd646cefe75001d1c8f9a4f3abf81" +dependencies = [ + "bytes", + "derive_more", + "itertools", + "libc", + "page_size", + "pathdiff", + "positioned-io", + "thiserror", + "walkdir", + "zarrs_storage", +] + +[[package]] +name = "zarrs_metadata" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60c4c363a8a302d7babb3c29017850a7b4e0af6ca5f9ba2946263a185b62fea" +dependencies = [ + "derive_more", + "half", + "monostate", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "zarrs_metadata_ext" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2048e07848ca99c7450518e0584929300b1b6a3cf442f18b26ffd3520814bd5b" +dependencies = [ + "derive_more", + "monostate", + "num", + "serde", + "serde_json", + "serde_repr", + "thiserror", + "zarrs_metadata", +] + +[[package]] +name = "zarrs_plugin" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cbe0ed432aee86856f70ca33be36eaf4a0dae21ab730750d9280a7ca1e95046" +dependencies = [ + "paste", + "regex", + "serde_json", + "thiserror", +] + +[[package]] +name = "zarrs_storage" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d098796d2ed4cf94896569615101e0432e870a7665396da5cc32300fb68f7c1" +dependencies = [ + "auto_impl", + "bytes", + "derive_more", + "itertools", + "thiserror", + "unsafe_cell_slice", +] + +[[package]] +name = "zerocopy" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/packages/zarrs-bindings/Cargo.toml b/packages/zarrs-bindings/Cargo.toml new file mode 100644 index 0000000000..ab06ef3517 --- /dev/null +++ b/packages/zarrs-bindings/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "zarrs-bindings" +version = "0.1.0" +edition = "2024" +rust-version = "1.91" +publish = false +license = "MIT" +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" + +[lib] +name = "_zarrs_bindings" +crate-type = ["cdylib"] + +[dependencies] +lru = "0.12" +pyo3 = { version = "0.28", features = ["abi3-py312"] } +serde_json = "1" +zarrs = "0.23" + +[profile.release] +lto = "thin" diff --git a/packages/zarrs-bindings/pyproject.toml b/packages/zarrs-bindings/pyproject.toml new file mode 100644 index 0000000000..4212a64b56 --- /dev/null +++ b/packages/zarrs-bindings/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["maturin>=1.7,<2"] +build-backend = "maturin" + +[project] +name = "zarrs-bindings" +dynamic = ["version"] +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" +requires-python = ">=3.12" +license = "MIT" + +[tool.maturin] +module-name = "_zarrs_bindings" +strip = true diff --git a/packages/zarrs-bindings/src/chunk.rs b/packages/zarrs-bindings/src/chunk.rs new file mode 100644 index 0000000000..556995924b --- /dev/null +++ b/packages/zarrs-bindings/src/chunk.rs @@ -0,0 +1,168 @@ +use std::num::NonZeroUsize; +use std::sync::{Arc, Mutex, OnceLock}; + +use lru::LruCache; +use pyo3::exceptions::PyNotImplementedError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use zarrs::array::{Array, ArrayBytes, ArraySubset}; +use zarrs::metadata::ArrayMetadata; +use zarrs::storage::ReadableWritableListableStorage; + +use crate::store::resolve_store_with_key; +use crate::{runtime_err, value_err}; + +type DynArray = Array; + +/// Cache of constructed Arrays keyed by (filesystem root, node path, metadata +/// JSON). Only native filesystem stores are cached (see `resolve_store_with_key`). +/// Bounded by an LRU; entries hold only a filesystem path + codec chain, no data. +type CacheKey = (String, String, String); +static ARRAY_CACHE: OnceLock>>> = OnceLock::new(); + +fn array_cache() -> &'static Mutex>> { + ARRAY_CACHE.get_or_init(|| Mutex::new(LruCache::new(NonZeroUsize::new(128).unwrap()))) +} + +/// Acquire the array cache lock, recovering gracefully from a poisoned mutex +/// (e.g. a thread panicked while holding it). The worst case is a stale or +/// partially-updated cache entry — far preferable to wedging all array I/O. +fn lock_cache() -> std::sync::MutexGuard<'static, LruCache>> { + array_cache().lock().unwrap_or_else(|e| e.into_inner()) +} + +fn build_array( + storage: ReadableWritableListableStorage, + path: &str, + metadata_json: &str, +) -> PyResult { + let metadata = ArrayMetadata::try_from(metadata_json).map_err(value_err)?; + Array::new_with_metadata(storage, path, metadata).map_err(value_err) +} + +/// Construct (or fetch from cache) an Array view from an explicit metadata +/// document, without consulting the store for metadata. When `cache_key` is +/// `Some(root)` the result is memoized on (root, path, metadata_json). +fn array_view( + storage: ReadableWritableListableStorage, + cache_key: Option, + path: &str, + metadata_json: &str, +) -> PyResult> { + if let Some(root) = cache_key { + let key = (root, path.to_string(), metadata_json.to_string()); + if let Some(array) = lock_cache().get(&key).cloned() { + return Ok(array); + } + let array = Arc::new(build_array(storage, path, metadata_json)?); + lock_cache().put(key, Arc::clone(&array)); + Ok(array) + } else { + Ok(Arc::new(build_array(storage, path, metadata_json)?)) + } +} + +#[pyfunction] +pub(crate) fn array_cache_len() -> usize { + lock_cache().len() +} + +#[pyfunction] +pub(crate) fn clear_array_cache() { + lock_cache().clear(); +} + +#[pyfunction] +pub(crate) fn retrieve_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult> { + let (storage, cache_key) = resolve_store_with_key(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, cache_key, &path, &metadata_json)?; + let bytes: ArrayBytes<'static> = + array.retrieve_chunk(&chunk_coords).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} + +#[pyfunction] +pub(crate) fn retrieve_encoded_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult>> { + let (storage, cache_key) = resolve_store_with_key(store)?; + let data = py.detach(move || -> PyResult>> { + let array = array_view(storage, cache_key, &path, &metadata_json)?; + array + .retrieve_encoded_chunk(&chunk_coords) + .map_err(runtime_err) + })?; + Ok(data.map(|d| PyBytes::new(py, &d).unbind())) +} + +#[pyfunction] +pub(crate) fn store_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, + data: Vec, +) -> PyResult<()> { + let (storage, cache_key) = resolve_store_with_key(store)?; + py.detach(move || { + let array = array_view(storage, cache_key, &path, &metadata_json)?; + array + .store_chunk(&chunk_coords, ArrayBytes::new_flen(data)) + .map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn erase_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult<()> { + let (storage, cache_key) = resolve_store_with_key(store)?; + py.detach(move || { + let array = array_view(storage, cache_key, &path, &metadata_json)?; + array.erase_chunk(&chunk_coords).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn retrieve_array_subset( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + start: Vec, + shape: Vec, +) -> PyResult> { + let (storage, cache_key) = resolve_store_with_key(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, cache_key, &path, &metadata_json)?; + let subset = ArraySubset::new_with_start_shape(start, shape).map_err(value_err)?; + let bytes: ArrayBytes<'static> = + array.retrieve_array_subset(&subset).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} diff --git a/packages/zarrs-bindings/src/lib.rs b/packages/zarrs-bindings/src/lib.rs new file mode 100644 index 0000000000..61f947480f --- /dev/null +++ b/packages/zarrs-bindings/src/lib.rs @@ -0,0 +1,52 @@ +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::*; + +mod chunk; +mod node; +mod store; + +pyo3::create_exception!( + _zarrs_bindings, + NodeExistsError, + PyValueError, + "A node already exists at the given path." +); +pyo3::create_exception!( + _zarrs_bindings, + NodeNotFoundError, + PyValueError, + "No node was found at the given path." +); + +pub(crate) fn runtime_err(err: impl std::fmt::Display) -> PyErr { + PyRuntimeError::new_err(err.to_string()) +} + +pub(crate) fn value_err(err: impl std::fmt::Display) -> PyErr { + PyValueError::new_err(err.to_string()) +} + +#[pyfunction] +fn version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + +#[pymodule] +fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("NodeExistsError", m.py().get_type::())?; + m.add("NodeNotFoundError", m.py().get_type::())?; + m.add_function(wrap_pyfunction!(version, m)?)?; + m.add_function(wrap_pyfunction!(node::create_array, m)?)?; + m.add_function(wrap_pyfunction!(node::create_group, m)?)?; + m.add_function(wrap_pyfunction!(node::delete_node, m)?)?; + m.add_function(wrap_pyfunction!(node::list_children, m)?)?; + m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_encoded_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_array_subset, m)?)?; + m.add_function(wrap_pyfunction!(chunk::array_cache_len, m)?)?; + m.add_function(wrap_pyfunction!(chunk::clear_array_cache, m)?)?; + Ok(()) +} diff --git a/packages/zarrs-bindings/src/node.rs b/packages/zarrs-bindings/src/node.rs new file mode 100644 index 0000000000..35a057ab31 --- /dev/null +++ b/packages/zarrs-bindings/src/node.rs @@ -0,0 +1,122 @@ +use pyo3::prelude::*; +use zarrs::array::Array; +use zarrs::group::Group; +use zarrs::metadata::{ArrayMetadata, GroupMetadata}; +use zarrs::node::{Node, NodePath, node_exists}; +use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; + +use crate::store::resolve_store; +use crate::{NodeExistsError, NodeNotFoundError, runtime_err, value_err}; + +/// `path` arguments throughout this module are zarrs node paths, e.g. "/" or +/// "/foo/bar" (already normalized by the Python layer's `_node_path`). +pub(crate) fn parse_node_path(path: &str) -> PyResult { + NodePath::new(path).map_err(value_err) +} + +/// When a node exists at `node_path`: erase it (and everything under it) if +/// `overwrite`, otherwise raise `NodeExistsError`. +pub(crate) fn prepare_target( + storage: &ReadableWritableListableStorage, + node_path: &NodePath, + overwrite: bool, +) -> PyResult<()> { + if node_exists(storage, node_path).map_err(runtime_err)? { + if !overwrite { + return Err(NodeExistsError::new_err(format!( + "a node already exists at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = node_path.try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err)?; + } + Ok(()) +} + +#[pyfunction] +pub(crate) fn create_group( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = GroupMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let group = Group::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + group.store_metadata().map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn create_array( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = ArrayMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let array = Array::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + array.store_metadata().map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn read_metadata( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult { + let storage = resolve_store(store)?; + py.detach(move || { + let node = + Node::open(&storage, &path).map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + serde_json::to_string(node.metadata()).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn delete_node(py: Python<'_>, store: &Bound<'_, PyAny>, path: String) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + if !node_exists(&storage, &node_path).map_err(runtime_err)? { + return Err(NodeNotFoundError::new_err(format!( + "no node found at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = (&node_path).try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn list_children( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult> { + let storage = resolve_store(store)?; + py.detach(move || { + let group = + Group::open(storage, &path).map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + let children = group.children(false).map_err(runtime_err)?; + children + .into_iter() + .map(|node| { + let metadata = serde_json::to_string(node.metadata()).map_err(runtime_err)?; + Ok((node.path().as_str().to_string(), metadata)) + }) + .collect() + }) +} diff --git a/packages/zarrs-bindings/src/store.rs b/packages/zarrs-bindings/src/store.rs new file mode 100644 index 0000000000..c58de37387 --- /dev/null +++ b/packages/zarrs-bindings/src/store.rs @@ -0,0 +1,225 @@ +use std::sync::Arc; + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; +use zarrs::filesystem::FilesystemStore; +use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; +use zarrs::storage::{ + Bytes, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, + ReadableStorageTraits, ReadableWritableListableStorage, StorageError, StoreKey, StoreKeys, + StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; + +/// A zarrs store backed by a Python `zarr.zarrs._bridge.StoreShim`. +/// +/// Every method attaches to the Python interpreter and calls the shim, which +/// blocks on the zarr event loop. Blocking waits in Python release the GIL, so +/// the loop thread can make progress while a Rust worker waits here. +pub(crate) struct PyStore(Py); + +fn py_err(err: PyErr) -> StorageError { + StorageError::Other(err.to_string()) +} + +fn invalid(err: impl std::fmt::Display) -> StorageError { + StorageError::Other(err.to_string()) +} + +impl PyStore { + fn get_with_range( + &self, + key: &StoreKey, + range: Option<&ByteRange>, + ) -> Result { + Python::attach(|py| { + let shim = self.0.bind(py); + let result = match range { + None => shim.call_method1("get", (key.as_str(),)), + Some(ByteRange::FromStart(offset, length)) => { + shim.call_method1("get_range", (key.as_str(), *offset, *length)) + } + Some(ByteRange::Suffix(suffix)) => { + shim.call_method1("get_suffix", (key.as_str(), *suffix)) + } + } + .map_err(py_err)?; + if result.is_none() { + Ok(None) + } else { + let bytes: Vec = result.extract().map_err(py_err)?; + Ok(Some(Bytes::from(bytes))) + } + }) + } +} + +impl ReadableStorageTraits for PyStore { + fn get(&self, key: &StoreKey) -> Result { + self.get_with_range(key, None) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + let mut out = Vec::new(); + for byte_range in byte_ranges { + match self.get_with_range(key, Some(&byte_range))? { + Some(bytes) => out.push(Ok(bytes)), + None => return Ok(None), + } + } + Ok(Some(Box::new(out.into_iter()))) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize", (key.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } + + fn supports_get_partial(&self) -> bool { + true + } +} + +impl WritableStorageTraits for PyStore { + fn set(&self, key: &StoreKey, value: Bytes) -> Result<(), StorageError> { + Python::attach(|py| { + let data = PyBytes::new(py, &value); + self.0 + .bind(py) + .call_method1("set", (key.as_str(), data)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn set_partial_many( + &self, + key: &StoreKey, + offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + // read-modify-write fallback provided by zarrs + zarrs::storage::store_set_partial_many(self, key, offset_values) + } + + fn supports_set_partial(&self) -> bool { + false + } + + fn erase(&self, key: &StoreKey) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete", (key.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn erase_prefix(&self, prefix: &StorePrefix) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete_prefix", (prefix.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } +} + +impl ListableStorageTraits for PyStore { + fn list(&self) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method0("list") + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method1("list_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let (keys, prefixes): (Vec, Vec) = self + .0 + .bind(py) + .call_method1("list_dir", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + let keys = keys + .into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect::, StorageError>>()?; + let prefixes = prefixes + .into_iter() + .map(|p| StorePrefix::new(p).map_err(invalid)) + .collect::, StorageError>>()?; + Ok(StoreKeysPrefixes::new(keys, prefixes)) + }) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } +} + +/// Like `resolve_store`, but also returns a cache key for the constructed +/// storage: `Some(root)` for native filesystem stores (which are safe to key an +/// Array cache on), `None` for the generic Python-callback path (uncached). +pub(crate) fn resolve_store_with_key( + obj: &Bound<'_, PyAny>, +) -> PyResult<(ReadableWritableListableStorage, Option)> { + if let Ok(config) = obj.cast::() { + if let Some(root) = config.get_item("filesystem")? { + let root: String = root.extract()?; + let store = + FilesystemStore::new(&root).map_err(|e| PyValueError::new_err(e.to_string()))?; + return Ok((Arc::new(store), Some(root))); + } + return Err(PyValueError::new_err("unrecognized store configuration")); + } + Ok((Arc::new(PyStore(obj.clone().unbind())), None)) +} + +/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` +/// output) into a zarrs storage handle. +pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { + Ok(resolve_store_with_key(obj)?.0) +} diff --git a/pyproject.toml b/pyproject.toml index 9b372192e9..4a61b749f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ exclude = [ "/.github", "/bench", "/docs", + "/packages/zarrs-bindings", ] [project] @@ -146,6 +147,10 @@ dev = [ "universal-pathlib", "mypy==2.1.0", ] +zarrs = [ + {include-group = "test"}, + "zarrs-bindings", +] [tool.coverage.report] exclude_also = [ @@ -448,6 +453,7 @@ addopts = [ "--doctest-modules", "--ignore=tests/test_regression/scripts", "--ignore=src/zarr/_cli", + "--ignore=src/zarr/zarrs", ] filterwarnings = [ "error", @@ -503,3 +509,6 @@ ignore-words-list = "astroid" [project.entry-points.pytest11] zarr = "zarr.testing" + +[tool.uv.sources] +zarrs-bindings = { path = "packages/zarrs-bindings" } diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 7dcbc78e31..288f56de69 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -107,6 +107,7 @@ def enable_gpu(self) -> ConfigSet: "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, + "crud": {"backend": "reference"}, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", diff --git a/src/zarr/crud/__init__.py b/src/zarr/crud/__init__.py new file mode 100644 index 0000000000..33b01668b2 --- /dev/null +++ b/src/zarr/crud/__init__.py @@ -0,0 +1,55 @@ +""" +Backend-agnostic low-level functional CRUD API for zarr hierarchies. + +The public functions delegate byte- and metadata-level work to a `CrudBackend`. +Two backends ship: a pure-Python reference backend (the default) and a +zarrs-accelerated backend (`zarr.zarrs`, requires the `zarrs-bindings` +extension). Select one with the `crud.backend` config key or a per-call +`backend=` argument. + +Array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading it from the store, which +makes read-only and virtual views possible. +""" + +from zarr.crud._api import ( + CrudOptions, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +from zarr.crud._backend import CrudBackend, NodeExistsError +from zarr.crud._reference import ReferenceBackend +from zarr.crud._registry import get_backend, register_backend + +register_backend("reference", ReferenceBackend()) + +__all__ = [ + "CrudBackend", + "CrudOptions", + "NodeExistsError", + "ReferenceBackend", + "create_new_array", + "create_new_group", + "create_overwrite_array", + "create_overwrite_group", + "delete_chunk", + "delete_node", + "get_backend", + "list_children", + "read_chunk", + "read_encoded_chunk", + "read_metadata", + "read_region", + "register_backend", + "write_chunk", +] diff --git a/src/zarr/crud/_api.py b/src/zarr/crud/_api.py new file mode 100644 index 0000000000..91aeef5007 --- /dev/null +++ b/src/zarr/crud/_api.py @@ -0,0 +1,344 @@ +from __future__ import annotations + +import operator +import types +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, cast + +import numpy as np + +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud._common import parse_array_metadata +from zarr.crud._registry import get_backend + +if TYPE_CHECKING: + from collections.abc import Mapping + + import numpy.typing as npt + + from zarr.abc.store import Store + from zarr.core.common import JSON + from zarr.crud._backend import CrudBackend + + +@dataclass(frozen=True, slots=True) +class CrudOptions: + """Options for CRUD operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +BasicIndex = int | slice | types.EllipsisType +BasicSelection = BasicIndex | tuple[BasicIndex, ...] + + +def _resolve_backend(backend: CrudBackend | str | None) -> CrudBackend: + if backend is None or isinstance(backend, str): + return get_backend(backend) + return backend + + +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve native-byte-order numpy dtype and regular chunk shape. + + Backends decode to (and encode from) the native in-memory representation, + applying any byte-order codec themselves, so the dtype is coerced to native. + """ + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + meta_obj = parse_array_metadata(metadata) + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + chunk_shape = tuple(grid.chunk_shape) + else: + chunk_shape = tuple(meta_obj.chunks) + return meta_obj.dtype.to_native_dtype().newbyteorder("="), chunk_shape + + +def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: + shape = metadata.get("shape") + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("metadata document has no valid 'shape'") + result: list[int] = [] + for s in shape: + if not isinstance(s, (int, float)): + raise TypeError(f"shape element {s!r} is not a number") + if isinstance(s, float) and not s.is_integer(): + raise TypeError(f"shape element {s!r} is not an integer") + result.append(int(s)) + return tuple(result) + + +def _chunk_key(metadata: Mapping[str, JSON], path: str, coords: tuple[int, ...]) -> str: + meta_obj = parse_array_metadata(metadata) + rel = meta_obj.encode_chunk_key(coords) + p = path.strip("/") + return f"{p}/{rel}" if p else rel + + +def _normalize_selection( + selection: BasicSelection, shape: tuple[int, ...] +) -> tuple[list[int], list[int], tuple[slice | int, ...]]: + """Normalize a numpy basic-indexing selection to a step-1 bounding box. + + Returns `(start, bounding_shape, post_index)`: the box to fetch and the + numpy index to apply to it (strides, reversals, integer-axis removal). Only + integers, slices, and `Ellipsis` are supported; fancy indexing raises. + """ + sel_tuple = selection if isinstance(selection, tuple) else (selection,) + + n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + if n_ellipsis == 1: + i = sel_tuple.index(Ellipsis) + n_fill = len(shape) - (len(sel_tuple) - 1) + if n_fill < 0: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] + if len(sel_tuple) > len(shape): + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) + + starts: list[int] = [] + lengths: list[int] = [] + post: list[slice | int] = [] + for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): + if isinstance(sel, slice): + start, stop, step = sel.indices(size) + n = len(range(start, stop, step)) + if n == 0: + starts.append(0) + lengths.append(0) + post.append(slice(None)) + elif step > 0: + last = start + (n - 1) * step + starts.append(start) + lengths.append(last - start + 1) + post.append(slice(None, None, step)) + else: + last = start + (n - 1) * step + starts.append(last) + lengths.append(start - last + 1) + post.append(slice(None, None, step)) + else: + assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" + try: + idx = operator.index(sel) + except TypeError: + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) from None + if idx < 0: + idx += size + if not 0 <= idx < size: + raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") + starts.append(idx) + lengths.append(1) + post.append(0) + return starts, lengths, tuple(post) + + +# --- node lifecycle --- + + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group from a group metadata document. Raises `NodeExistsError` + if a node already exists at `path`. Not atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=False) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=True) + + +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array from a v2 or v3 metadata document. Raises + `NodeExistsError` if a node already exists. Not atomic against concurrent + writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=False) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=True) + + +async def read_metadata( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. Raises + `zarr.errors.NodeNotFoundError` if no node exists there.""" + return await _resolve_backend(backend).read_metadata(store, path) + + +async def delete_node( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the node at `path` and everything under it. Raises + `zarr.errors.NodeNotFoundError` if absent. `path=""` clears the store.""" + await _resolve_backend(backend).delete_node(store, path) + + +async def list_children( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs (store-relative, no leading `/`). Raises + `zarr.errors.NodeNotFoundError` if no group exists there.""" + return await _resolve_backend(backend).list_children(store, path) + + +# --- chunk I/O --- + + +async def read_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the whole chunk at `chunk_coords`. The metadata document + is authoritative; missing chunks decode to the fill value. The result is a + read-only view (`.copy()` for a writable array).""" + be = _resolve_backend(backend) + raw = await be.read_chunk(store, path, metadata, tuple(chunk_coords)) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if absent. Pure store I/O (`store.get` on the chunk key): the + `backend` argument is accepted for signature uniformity but unused.""" + key = _chunk_key(metadata, path, tuple(chunk_coords)) + buf = await store.get(key, prototype=default_buffer_prototype()) + return None if buf is None else buf.to_bytes() + + +async def write_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk at + `chunk_coords`. `value` must match the chunk shape exactly.""" + be = _resolve_backend(backend) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await be.write_chunk(store, path, metadata, tuple(chunk_coords), arr.tobytes()) + + +async def delete_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await _resolve_backend(backend).delete_chunk(store, path, metadata, tuple(chunk_coords)) + + +# --- region I/O --- + + +async def read_region( + metadata: Mapping[str, JSON], + store: Store, + path: str, + selection: BasicSelection, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode a region given by a numpy basic-indexing `selection` + (integers, slices with steps, `Ellipsis`). One backend call fetches the + step-1 bounding box; strides/reversals/integer-axis removal are applied as + numpy views. Missing chunks decode to the fill value. Fancy indexing raises + `TypeError`. The result is a read-only view. + + Note: a `slice(0, N, step)` reads `O(N)` bytes even though `O(N / step)` are + returned; for sparse selections over large arrays prefer `read_chunk`.""" + be = _resolve_backend(backend) + dtype, _ = _chunk_dtype_and_shape(metadata) + shape = _array_shape(metadata) + starts, lengths, post_index = _normalize_selection(selection, shape) + if 0 in lengths: + block = np.empty(lengths, dtype=dtype) + block.flags.writeable = False + else: + raw = await be.read_subset(store, path, metadata, tuple(starts), tuple(lengths)) + block = np.frombuffer(raw, dtype=dtype).reshape(lengths) + return cast("np.ndarray[Any, np.dtype[Any]]", block[post_index]) diff --git a/src/zarr/crud/_backend.py b/src/zarr/crud/_backend.py new file mode 100644 index 0000000000..638dacf6f6 --- /dev/null +++ b/src/zarr/crud/_backend.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +class NodeExistsError(ValueError): + """Raised when a node already exists at a path and overwrite was not requested.""" + + +@runtime_checkable +class CrudBackend(Protocol): + """The byte/metadata-level contract a CRUD backend must implement. + + Methods take neutral types: the metadata document as a `dict`, a zarr + `Store`, and plain zarr paths (`""`, `"foo/bar"`). They return raw bytes, + parsed JSON documents, or `None`. The shared `zarr.crud` facade builds the + numpy- and selection-level API on top of these. + + `create_*` raise `zarr.crud.NodeExistsError` when a node exists and + `overwrite` is false. `read_metadata`/`delete_node`/`list_children` raise + `zarr.errors.NodeNotFoundError` when the target is missing. + + Note: because this protocol is `runtime_checkable`, `isinstance` checks only + verify that the method names exist, not their signatures or that they are + async. Static type checking (mypy) is the authoritative conformance check. + + `read_chunk` and `read_subset` must return immutable `bytes` (not + `bytearray`): the facade wraps them with `numpy.frombuffer`, which yields a + read-only array only for immutable buffers. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: ... + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: ... + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: ... + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: ... + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: ... + + async def delete_node(self, store: Store, path: str) -> None: ... + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: ... diff --git a/src/zarr/crud/_common.py b/src/zarr/crud/_common.py new file mode 100644 index 0000000000..4837edfa03 --- /dev/null +++ b/src/zarr/crud/_common.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata + +if TYPE_CHECKING: + from collections.abc import Mapping + + from zarr.core.common import JSON + + +def parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + """Parse a metadata document into a v2 or v3 array metadata object.""" + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) diff --git a/src/zarr/crud/_reference.py b/src/zarr/crud/_reference.py new file mode 100644 index 0000000000..78db4c6e04 --- /dev/null +++ b/src/zarr/crud/_reference.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from zarr.core.array import AsyncArray, create_codec_pipeline +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer.core import NDBuffer, default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON +from zarr.core.group import GroupMetadata +from zarr.core.metadata.io import save_metadata +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata +from zarr.crud._backend import NodeExistsError +from zarr.crud._common import parse_array_metadata +from zarr.errors import NodeNotFoundError +from zarr.storage._common import StorePath + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _native_dtype(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> np.dtype[Any]: + """Numpy dtype in native byte order (zarrs and the facade assume native).""" + return meta_obj.dtype.to_native_dtype().newbyteorder("=") + + +def _chunk_shape(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> tuple[int, ...]: + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return tuple(grid.chunk_shape) + return tuple(meta_obj.chunks) + + +def _array_spec(meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, ...]) -> ArraySpec: + order = meta_obj.order if isinstance(meta_obj, ArrayV2Metadata) else "C" + return ArraySpec( + shape=shape, + dtype=meta_obj.dtype, + fill_value=meta_obj.fill_value, + config=ArrayConfig.from_dict({"order": order}), + prototype=default_buffer_prototype(), + ) + + +def _is_all_fill_value( + arr: np.ndarray[Any, np.dtype[Any]], fill_value: Any, dtype: np.dtype[Any] +) -> bool: + """Whether every element of `arr` equals the fill value (NaN-aware for floats).""" + if fill_value is None: + return False + fill = np.asarray(fill_value, dtype=dtype) + if np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.complexfloating): + return bool(np.array_equal(arr, np.broadcast_to(fill, arr.shape), equal_nan=True)) + return bool(np.all(arr == fill)) + + +class ReferenceBackend: + """Pure-Python CRUD backend wrapping zarr-python's own machinery. + + Constructs no high-level `Array` for chunk operations (it drives the codec + pipeline directly); it does reuse `AsyncArray.getitem` for multi-chunk + subset reads, which is exactly the `BasicIndexer` + codec-pipeline read path. + """ + + async def _node_exists(self, store: Store, path: str) -> bool: + proto = default_buffer_prototype() + sp = StorePath(store, path.strip("/")) + for meta_key in (ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON): + if await (sp / meta_key).get(prototype=proto) is not None: + return True + return False + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = parse_array_metadata(metadata) + await self._create(store, path, meta_obj, overwrite=overwrite) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = GroupMetadata.from_dict(dict(metadata)) + await self._create(store, path, meta_obj, overwrite=overwrite) + + async def _create(self, store: Store, path: str, meta_obj: Any, *, overwrite: bool) -> None: + sp = StorePath(store, path.strip("/")) + if overwrite: + await store.delete_dir(path.strip("/")) + elif await self._node_exists(store, path): + raise NodeExistsError(f"a node already exists at path {path!r}") + await save_metadata(sp, meta_obj, ensure_parents=True) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + from zarr.core._json import buffer_to_json_object + + proto = default_buffer_prototype() + sp = StorePath(store, path.strip("/")) + buf = await (sp / ZARR_JSON).get(prototype=proto) + if buf is not None: + return buffer_to_json_object(buf) + for meta_key in (ZARRAY_JSON, ZGROUP_JSON): + b = await (sp / meta_key).get(prototype=proto) + if b is not None: + doc = buffer_to_json_object(b) + zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) + if zattrs is not None: + doc["attributes"] = buffer_to_json_object(zattrs) + return doc + raise NodeNotFoundError(f"no node found at path {path!r}") + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + meta_obj = parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + buf = await (sp / chunk_key).get(prototype=default_buffer_prototype()) + if buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + decoded = list(await pipeline.decode([(buf, spec)])) + nd_buf = decoded[0] + if nd_buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + arr = np.asarray(nd_buf.as_numpy_array(), dtype=np_dtype) + return np.ascontiguousarray(arr).tobytes() + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + meta_obj = parse_array_metadata(metadata) + np_dtype = _native_dtype(meta_obj) + async_arr = AsyncArray(metadata=meta_obj, store_path=StorePath(store, path.strip("/"))) + selection = tuple(slice(s, s + length) for s, length in zip(start, shape, strict=True)) + result = await async_arr.getitem(selection) + return np.ascontiguousarray(np.asarray(result, dtype=np_dtype)).tobytes() + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + meta_obj = parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + arr = np.frombuffer(data, dtype=np_dtype).reshape(shape) + if _is_all_fill_value(arr, meta_obj.fill_value, np_dtype): + await (sp / chunk_key).delete() + return + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + encoded = list(await pipeline.encode([(NDBuffer.from_ndarray_like(arr), spec)])) + buf = encoded[0] + if buf is None: + await (sp / chunk_key).delete() + else: + await (sp / chunk_key).set(buf) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + meta_obj = parse_array_metadata(metadata) + sp = StorePath(store, path.strip("/")) + await (sp / meta_obj.encode_chunk_key(coords)).delete() + + async def delete_node(self, store: Store, path: str) -> None: + if not await self._node_exists(store, path): + raise NodeNotFoundError(f"no node found at path {path!r}") + await store.delete_dir(path.strip("/")) + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: + p = path.strip("/") + if not await self._node_exists(store, path): + raise NodeNotFoundError(f"no node found at path {path!r}") + prefix = f"{p}/" if p else "" + children: list[tuple[str, dict[str, JSON]]] = [] + async for name in store.list_dir(prefix): + child_path = f"{p}/{name}" if p else name + if await self._node_exists(store, child_path): + children.append((name, await self.read_metadata(store, child_path))) + return children diff --git a/src/zarr/crud/_registry.py b/src/zarr/crud/_registry.py new file mode 100644 index 0000000000..84fde1bc20 --- /dev/null +++ b/src/zarr/crud/_registry.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.config import config + +if TYPE_CHECKING: + from zarr.crud._backend import CrudBackend + +# Backends are registered at import time (reference by zarr.crud, zarrs by +# zarr.zarrs). CPython's import lock plus the GIL make this dict safe without +# additional locking. +_BACKENDS: dict[str, CrudBackend] = {} + + +def register_backend(name: str, backend: CrudBackend) -> None: + """Register a CRUD backend instance under `name`.""" + _BACKENDS[name] = backend + + +def get_backend(name: str | None = None) -> CrudBackend: + """Resolve a backend by name, or the configured default when `name` is None. + + Selecting `"zarrs"` imports `zarr.zarrs` if needed so it can self-register. + """ + if name is None: + name = config.get("crud.backend") + if name not in _BACKENDS and name == "zarrs": + # "reference" is pre-registered by zarr.crud at import; "zarrs" lives in a + # separate package that may not be imported yet, so load it on demand. + try: + import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + except ImportError as e: + raise ImportError( + "the 'zarrs' CRUD backend requires the zarrs-bindings extension; " + "install it with: uv sync --group zarrs" + ) from e + if name not in _BACKENDS: + raise KeyError(f"no CRUD backend registered as {name!r}; registered: {sorted(_BACKENDS)}") + return _BACKENDS[name] diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py new file mode 100644 index 0000000000..bff68ade62 --- /dev/null +++ b/src/zarr/zarrs/__init__.py @@ -0,0 +1,26 @@ +""" +The zarrs CRUD backend for `zarr.crud`, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +Importing this module registers the `"zarrs"` backend. Requires the +`zarrs-bindings` extension (in-repo Rust crate; `uv sync --group zarrs`). Select +it with `zarr.config.set({"crud.backend": "zarrs"})` or per call via +`backend="zarrs"`. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +from zarr.crud import register_backend +from zarr.zarrs._backend import ZarrsBackend + +__version__: str = _zarrs_bindings.version() + +register_backend("zarrs", ZarrsBackend()) + +__all__ = ["ZarrsBackend", "__version__"] diff --git a/src/zarr/zarrs/_backend.py b/src/zarr/zarrs/_backend.py new file mode 100644 index 0000000000..e95759660b --- /dev/null +++ b/src/zarr/zarrs/_backend.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from typing import TYPE_CHECKING, cast + +import _zarrs_bindings as _zb + +from zarr.crud import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _node_path(path: str) -> str: + """Convert a zarr path (`""`, `"foo/bar"`) to a zarrs node path (`"/"`, + `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + except _zb.NodeExistsError as err: + raise NodeExistsError(str(err)) from err + + +class ZarrsBackend: + """CRUD backend backed by the Rust `zarrs` crate via `_zarrs_bindings`. + + Owns the zarrs-specific plumbing: JSON-serializing the metadata document, + the `/`-prefixed node-path form, store resolution, offloading the blocking + Rust calls to a worker thread, and translating binding exceptions to the + canonical `zarr.crud` / `zarr.errors` types. + + Known limitation: creating a Zarr v2 *group* with attributes writes a + non-standard `.zattrs` (the attributes nested under an ``"attributes"`` key) + that zarr-python and other readers interpret incorrectly. This is a + zarrs-crate behavior; the pure-Python reference backend writes the standard + layout. Prefer the reference backend for writing v2 groups until the zarrs + crate is fixed. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + return cast("dict[str, JSON]", json.loads(raw)) + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_array_subset, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(start), + list(shape), + ) + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + data, + ) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def delete_node(self, store: Store, path: str) -> None: + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: + with _translate_errors(): + raw: list[tuple[str, str]] = await asyncio.to_thread( + _zb.list_children, resolve_store(store), _node_path(path) + ) + return [ + (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) + for child_path, doc in raw + ] diff --git a/src/zarr/zarrs/_bridge.py b/src/zarr/zarrs/_bridge.py new file mode 100644 index 0000000000..e7632647ad --- /dev/null +++ b/src/zarr/zarrs/_bridge.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import builtins +from typing import TYPE_CHECKING + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.sync import _collect_aiterator, sync +from zarr.storage import LocalStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + +# Alias to avoid shadowing the `list` builtin with the `StoreShim.list` method +# in mypy's class-scope name resolution. +_list = builtins.list + + +class StoreShim: + """ + Synchronous adapter over an async `Store`, called from Rust worker threads. + + Each method blocks the calling thread by submitting a coroutine to the zarr + event-loop thread (`zarr.core.sync`). Methods must never be called from the + zarr event-loop thread itself; the Rust bindings only call them from + `asyncio.to_thread` worker threads. + """ + + def __init__(self, store: Store) -> None: + self._store = store + self._prototype = default_buffer_prototype() + + def get(self, key: str) -> bytes | None: + buf = sync(self._store.get(key, prototype=self._prototype)) + return None if buf is None else buf.to_bytes() + + def get_range(self, key: str, offset: int, length: int | None) -> bytes | None: + byte_range = ( + RangeByteRequest(offset, offset + length) + if length is not None + else OffsetByteRequest(offset) + ) + buf = sync(self._store.get(key, prototype=self._prototype, byte_range=byte_range)) + return None if buf is None else buf.to_bytes() + + def get_suffix(self, key: str, suffix: int) -> bytes | None: + buf = sync( + self._store.get(key, prototype=self._prototype, byte_range=SuffixByteRequest(suffix)) + ) + return None if buf is None else buf.to_bytes() + + def set(self, key: str, value: bytes) -> None: + sync(self._store.set(key, self._prototype.buffer.from_bytes(value))) + + def delete(self, key: str) -> None: + sync(self._store.delete(key)) + + def delete_prefix(self, prefix: str) -> None: + sync(self._store.delete_dir(prefix.rstrip("/"))) + + def getsize(self, key: str) -> int | None: + try: + return sync(self._store.getsize(key)) + except FileNotFoundError: + return None + + def getsize_prefix(self, prefix: str) -> int: + return sync(self._store.getsize_prefix(prefix.rstrip("/"))) + + def list(self) -> _list[str]: + return sorted(sync(_collect_aiterator(self._store.list()))) + + def list_prefix(self, prefix: str) -> _list[str]: + return sorted(sync(_collect_aiterator(self._store.list_prefix(prefix)))) + + def list_dir(self, prefix: str) -> tuple[_list[str], _list[str]]: + """Return `(keys, prefixes)` directly under `prefix`, as zarrs expects: + full keys, and child prefixes ending in `/`.""" + stripped = prefix.rstrip("/") + children = sorted(sync(_collect_aiterator(self._store.list_dir(stripped)))) + keys: _list[str] = [] + prefixes: _list[str] = [] + # A child is classified as a key iff it exists as one. Zarr hierarchies + # never store a bare key alongside same-named subkeys (e.g. "a" and + # "a/b"), so a name is never both a key and a prefix. + # TODO: replace the per-child exists() round-trip with a single listing + # pass when this becomes a bottleneck (remote stores). + for child in children: + full = f"{stripped}/{child}" if stripped else child + if sync(self._store.exists(full)): + keys.append(full) + else: + prefixes.append(full + "/") + return keys, prefixes + + +def resolve_store(store: Store) -> StoreShim | dict[str, str]: + """ + Convert a zarr `Store` into the representation `_zarrs_bindings` expects: + a config dict for stores with a native Rust implementation, otherwise a + `StoreShim` that Rust calls back into. + """ + if isinstance(store, LocalStore) and not store.read_only: + return {"filesystem": str(store.root)} + return StoreShim(store) diff --git a/tests/crud/__init__.py b/tests/crud/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/crud/conftest.py b/tests/crud/conftest.py new file mode 100644 index 0000000000..fbf2cf9e02 --- /dev/null +++ b/tests/crud/conftest.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + from pathlib import Path + + from zarr.abc.store import Store + + +def _zarrs_available() -> bool: + """Return True only if the zarrs CrudBackend is fully usable (registered).""" + try: + import _zarrs_bindings # noqa: F401 + except ImportError: + return False + try: + import zarr.zarrs + except ImportError: + return False + # The module might exist but not yet register the zarrs CrudBackend (e.g. + # Task 4 not yet merged). Verify registration before enabling the param. + try: + import zarr.crud + + zarr.crud.get_backend("zarrs") + except (ImportError, KeyError): + return False + return True + + +@pytest.fixture( + params=[ + "reference", + pytest.param( + "zarrs", + marks=pytest.mark.skipif( + not _zarrs_available(), reason="zarrs-bindings is not installed" + ), + ), + ] +) +def backend(request: pytest.FixtureRequest) -> str: + """A CRUD backend name. The zarrs param is skipped when the extension is absent.""" + import zarr.crud + + if request.param == "zarrs": + import zarr.zarrs # noqa: F401 (registers the zarrs backend) + return str(request.param) + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: + if request.param == "memory": + s: Store = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """An array metadata document built via zarr-python itself.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + doc.pop("attributes", None) + return doc + + +def filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array 'a', fill it with a ramp, return (data, metadata).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + doc.pop("attributes", None) + return data, doc diff --git a/tests/crud/test_crud.py b/tests/crud/test_crud.py new file mode 100644 index 0000000000..d4aa79e334 --- /dev/null +++ b/tests/crud/test_crud.py @@ -0,0 +1,319 @@ +from __future__ import annotations + +import copy +import json +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.crud.conftest import array_metadata, filled +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud import ( + NodeExistsError, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +from zarr.errors import NodeNotFoundError + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = {"zarr_format": 3, "node_type": "group", "attributes": {"answer": 42}} +GROUP_META_V2: dict[str, Any] = {"zarr_format": 2, "attributes": {"answer": 42}} + + +# --- node lifecycle --- + + +async def test_create_new_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + + +async def test_v2_group_attrs_zarr_python_compatible_reference(store: Store) -> None: + # The reference backend writes standard v2 `.zattrs` (the bare attributes + # dict), so zarr-python and other readers see the right attributes. + await create_new_group(GROUP_META_V2, store, "g2", backend="reference") + assert dict(zarr.open_group(store=store, path="g2", mode="r").attrs) == {"answer": 42} + + +@pytest.mark.xfail( + reason="the zarrs backend writes v2 group attributes in a non-standard `.zattrs` " + "layout (nested under an 'attributes' key) that zarr-python reads back wrong; " + "tracked zarrs-crate limitation", + strict=True, +) +async def test_v2_group_attrs_zarr_python_compatible_zarrs(store: Store) -> None: + pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") + import zarr.zarrs + + await create_new_group(GROUP_META_V2, store, "g2", backend="zarrs") + assert dict(zarr.open_group(store=store, path="g2", mode="r").attrs) == {"answer": 42} + + +async def test_create_new_group_existing_raises(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo", backend=backend) + + +async def test_create_overwrite_group_replaces_array(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await create_overwrite_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + + +async def test_create_new_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + a = zarr.open_array(store=store, path="arr", mode="r") + assert a.shape == (8, 8) + assert a.dtype == np.dtype("uint16") + + +async def test_create_new_array_v2(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").metadata.zarr_format == 2 + + +async def test_create_overwrite_array(backend: str, store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").shape == (8, 8) + + +async def test_read_metadata(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + observed = await read_metadata(store, "arr", backend=backend) + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope", backend=backend) + + +async def test_delete_node(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed", backend=backend) + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope", backend=backend) + + +async def test_list_children(backend: str, store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + by_path = dict(await list_children(store, "", backend=backend)) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + assert not any(p.startswith("/") for p in by_path) + + +async def test_create_read_delete_v2_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META_V2, store, "g2", backend=backend) + meta = await read_metadata(store, "g2", backend=backend) + assert meta["zarr_format"] == 2 + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META_V2, store, "g2", backend=backend) + await delete_node(store, "g2", backend=backend) + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "g2", backend=backend) + + +async def test_read_metadata_v2_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + meta = await read_metadata(store, "arr", backend=backend) + assert meta["zarr_format"] == 2 + + +# --- chunk I/O --- + + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64", "u2"]) +async def test_read_chunk_differential(backend: str, store: Store, dtype: str) -> None: + data, meta = filled(store, dtype=dtype) + observed = await read_chunk(meta, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_read_chunk_codecs(backend: str, store: Store, compressors: Any) -> None: + data, meta = filled(store, compressors=compressors) + observed = await read_chunk(meta, store, "a", (0, 1), backend=backend) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_read_chunk_v2(backend: str, store: Store) -> None: + data, meta = filled(store, dtype=" None: + data, meta = filled(store, dtype="uint16", zarr_format=2, order="F") + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_missing_is_fill(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_read_chunk_metadata_view(backend: str, store: Store) -> None: + data, meta = filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await read_chunk(view, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_read_chunk_readonly(backend: str, store: Store) -> None: + _, meta = filled(store) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + assert not observed.flags.writeable + + +async def test_write_chunk_differential(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await write_chunk(meta, store, "a", (0, 1), value, backend=backend) + np.testing.assert_array_equal(zarr.open_array(store=store, path="a", mode="r")[0:4, 4:8], value) + + +async def test_write_chunk_shape_mismatch(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + with pytest.raises(ValueError, match="chunk shape"): + await write_chunk( + meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16"), backend=backend + ) + + +async def test_delete_chunk(backend: str, store: Store) -> None: + _data, meta = filled(store) + assert await store.exists("a/c/0/0") + await delete_chunk(meta, store, "a", (0, 0), backend=backend) + assert not await store.exists("a/c/0/0") + + +async def test_write_all_fill_chunk_is_dropped(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=0 + ) + meta = dict(arr.metadata.to_dict()) + await write_chunk(meta, store, "a", (0, 0), np.zeros((4, 4), dtype="uint16"), backend=backend) + assert not await store.exists("a/c/0/0") + np.testing.assert_array_equal( + await read_chunk(meta, store, "a", (0, 0), backend=backend), + np.zeros((4, 4), dtype="uint16"), + ) + + +async def test_overwrite_chunk_with_fill_removes_it(backend: str, store: Store) -> None: + _data, meta = filled(store) # chunk (0,0) exists with nonzero data, fill_value default 0 + assert await store.exists("a/c/0/0") + await write_chunk(meta, store, "a", (0, 0), np.zeros((4, 4), dtype="uint16"), backend=backend) + assert not await store.exists("a/c/0/0") + + +async def test_read_encoded_chunk_matches_store(backend: str, store: Store) -> None: + _, meta = filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0), backend=backend) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_is_none(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="e", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "e", (0, 0), backend=backend) is None + + +# --- region I/O --- + +SELECTIONS: list[Any] = [ + (slice(None), slice(None)), + (slice(2, 7), slice(1, 5)), + (slice(None), 3), + (5, slice(None)), + (3, 4), + (slice(1, 8, 2), slice(None)), + (slice(None), slice(6, 1, -2)), + (slice(-3, None), slice(None, -1)), + ..., + (..., slice(2, 4)), + (slice(0, 0), slice(None)), + (slice(2, 6),), +] + + +@pytest.mark.parametrize("sel", SELECTIONS) +async def test_read_region_differential(backend: str, store: Store, sel: Any) -> None: + data, meta = filled(store) + observed = await read_region(meta, store, "a", sel, backend=backend) + np.testing.assert_array_equal(observed, data[sel]) + + +async def test_read_region_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_region(meta, store, "a", (slice(1, 7), slice(3, 8)), backend=backend) + np.testing.assert_array_equal(observed, data[1:7, 3:8]) + + +async def test_read_region_too_many_indices(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="too many indices"): + await read_region(meta, store, "a", (0, 0, 0), backend=backend) + + +async def test_read_region_fancy_rejected(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(TypeError, match="only integers, slices"): + await read_region(meta, store, "a", ([0, 1], slice(None)), backend=backend) # type: ignore[arg-type] + + +async def test_read_region_out_of_bounds(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="out of bounds"): + await read_region(meta, store, "a", (8, slice(None)), backend=backend) diff --git a/tests/crud/test_reference_backend.py b/tests/crud/test_reference_backend.py new file mode 100644 index 0000000000..4fef43427b --- /dev/null +++ b/tests/crud/test_reference_backend.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +import zarr +from zarr.crud import NodeExistsError, get_backend +from zarr.errors import NodeNotFoundError +from zarr.storage import MemoryStore + + +def _array_meta() -> dict[str, Any]: + arr = zarr.create_array(store=MemoryStore(), shape=(8, 8), chunks=(4, 4), dtype="uint16") + return dict(arr.metadata.to_dict()) + + +async def test_reference_round_trip_chunk() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await be.write_chunk(store, "a", meta, (0, 1), value.tobytes()) + raw = await be.read_chunk(store, "a", meta, (0, 1)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 4), value) + + +async def test_reference_read_subset_spans_chunks() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array(store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16") + data = np.arange(64, dtype="uint16").reshape(8, 8) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + raw = await be.read_subset(store, "a", meta, (2, 1), (5, 4)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(5, 4), data[2:7, 1:5]) + + +async def test_reference_create_exists_raises() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + with pytest.raises(NodeExistsError): + await be.create_array(store, "a", meta, overwrite=False) + + +async def test_reference_read_metadata_missing_raises() -> None: + be = get_backend("reference") + with pytest.raises(NodeNotFoundError): + await be.read_metadata(MemoryStore(), "nope") + + +async def test_reference_v2_fortran_order_round_trip() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array( + store=store, name="f", shape=(4, 6), chunks=(4, 6), dtype="uint16", order="F", zarr_format=2 + ) + data = np.arange(24, dtype="uint16").reshape(4, 6) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + meta.pop("attributes", None) + # read_chunk must return native C-contiguous bytes matching the logical data + raw = await be.read_chunk(store, "f", meta, (0, 0)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 6), data) + # write_chunk must store data zarr-python reads back correctly + new = (data + 100).astype("uint16") + await be.write_chunk(store, "f", meta, (0, 0), np.ascontiguousarray(new).tobytes()) + back = zarr.open_array(store=store, path="f", mode="r") + np.testing.assert_array_equal(back[:, :], new) diff --git a/tests/crud/test_registry.py b/tests/crud/test_registry.py new file mode 100644 index 0000000000..f5a8f8b829 --- /dev/null +++ b/tests/crud/test_registry.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import pytest + +from zarr.crud import CrudBackend, NodeExistsError, get_backend, register_backend + + +def test_node_exists_error_is_value_error() -> None: + assert issubclass(NodeExistsError, ValueError) + + +def test_default_backend_is_reference() -> None: + # the reference backend is registered at import and is the configured default + be = get_backend() + assert be is get_backend("reference") + + +def test_get_unknown_backend_raises() -> None: + with pytest.raises(KeyError, match="no CRUD backend"): + get_backend("does-not-exist") + + +def test_register_and_resolve_instance() -> None: + class Dummy: + pass + + dummy = Dummy() + register_backend("dummy-test", dummy) # type: ignore[arg-type] + try: + assert get_backend("dummy-test") is dummy # type: ignore[comparison-overlap] + finally: + from zarr.crud import _registry + + _registry._BACKENDS.pop("dummy-test", None) + + +def test_protocol_is_runtime_checkable() -> None: + # ReferenceBackend (registered as "reference") structurally satisfies the protocol + assert isinstance(get_backend("reference"), CrudBackend) diff --git a/tests/zarrs/__init__.py b/tests/zarrs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py new file mode 100644 index 0000000000..092bce5473 --- /dev/null +++ b/tests/zarrs/conftest.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + from pathlib import Path + + from zarr.abc.store import Store + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: + """A writable store: MemoryStore exercises the generic Python-callback bridge, + LocalStore exercises the native zarrs filesystem store.""" + s: Store + if request.param == "memory": + s = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """Build an array metadata document using zarr-python itself, so the + documents fed to zarrs always match what zarr-python would write.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return doc diff --git a/tests/zarrs/test_bridge.py b/tests/zarrs/test_bridge.py new file mode 100644 index 0000000000..f997b052f2 --- /dev/null +++ b/tests/zarrs/test_bridge.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + +from zarr.storage import LocalStore, MemoryStore +from zarr.zarrs._bridge import StoreShim, resolve_store + +if TYPE_CHECKING: + from pathlib import Path + + +def test_shim_get_set_delete() -> None: + shim = StoreShim(MemoryStore()) + assert shim.get("a/b") is None + shim.set("a/b", b"xyz") + assert shim.get("a/b") == b"xyz" + assert shim.get_range("a/b", 1, 1) == b"y" + assert shim.get_range("a/b", 1, None) == b"yz" + assert shim.get_suffix("a/b", 2) == b"yz" + assert shim.getsize("a/b") == 3 + assert shim.getsize("missing") is None + assert shim.get_range("missing", 0, 1) is None + assert shim.get_suffix("missing", 1) is None + shim.delete("a/b") + assert shim.get("a/b") is None + + +def test_shim_listing() -> None: + shim = StoreShim(MemoryStore()) + shim.set("zarr.json", b"{}") + shim.set("a/zarr.json", b"{}") + shim.set("a/c/0/0", b"\x00") + assert shim.list() == ["a/c/0/0", "a/zarr.json", "zarr.json"] + assert shim.list_prefix("a/") == ["a/c/0/0", "a/zarr.json"] + assert shim.list_dir("a/") == (["a/zarr.json"], ["a/c/"]) + assert shim.list_dir("") == (["zarr.json"], ["a/"]) + assert shim.getsize_prefix("a/") == 3 + shim.delete_prefix("a/") + assert shim.list() == ["zarr.json"] + + +def test_resolve_store(tmp_path: Path) -> None: + local = LocalStore(tmp_path) + assert resolve_store(local) == {"filesystem": str(tmp_path)} + # read-only LocalStore must go through the shim so writes are rejected in Python + assert isinstance(resolve_store(LocalStore(tmp_path, read_only=True)), StoreShim) + assert isinstance(resolve_store(MemoryStore()), StoreShim) diff --git a/tests/zarrs/test_cache.py b/tests/zarrs/test_cache.py new file mode 100644 index 0000000000..9af72555d8 --- /dev/null +++ b/tests/zarrs/test_cache.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + +import _zarrs_bindings as zb + +import zarr +import zarr.zarrs # registers the "zarrs" CrudBackend +from zarr.crud import read_chunk, write_chunk +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from pathlib import Path + + +def _meta(store: Any, name: str = "a") -> dict[str, Any]: + arr = zarr.create_array(store=store, name=name, shape=(8, 8), chunks=(4, 4), dtype="uint16") + arr[:, :] = np.arange(64, dtype="uint16").reshape(8, 8) + return dict(arr.metadata.to_dict()) + + +@pytest.fixture(autouse=True) +def _clear_cache() -> None: + zb.clear_array_cache() + + +async def test_localstore_populates_cache(tmp_path: Path) -> None: + store = await LocalStore.open(root=tmp_path / "s") + meta = _meta(store) + assert zb.array_cache_len() == 0 + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") + assert zb.array_cache_len() == 1 + # second op on the SAME array reuses the entry, does not grow the cache + await read_chunk(meta, store, "a", (1, 1), backend="zarrs") + assert zb.array_cache_len() == 1 + + +async def test_memorystore_is_not_cached() -> None: + store = MemoryStore() + meta = _meta(store) + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") + assert zb.array_cache_len() == 0 + + +async def test_distinct_metadata_distinct_entries(tmp_path: Path) -> None: + store = await LocalStore.open(root=tmp_path / "s") + meta_a = _meta(store, "a") + meta_b = _meta(store, "b") + await read_chunk(meta_a, store, "a", (0, 0), backend="zarrs") + await read_chunk(meta_b, store, "b", (0, 0), backend="zarrs") + assert zb.array_cache_len() == 2 + + +async def test_cache_keyed_on_root_not_just_metadata(tmp_path: Path) -> None: + # two stores at different roots, identical metadata + path, different data. + # A correct cache (keyed on root) must return each store's own data. + s1 = await LocalStore.open(root=tmp_path / "s1") + s2 = await LocalStore.open(root=tmp_path / "s2") + a1 = zarr.create_array(store=s1, name="a", shape=(4, 4), chunks=(4, 4), dtype="uint16") + a1[:, :] = 1 + a2 = zarr.create_array(store=s2, name="a", shape=(4, 4), chunks=(4, 4), dtype="uint16") + a2[:, :] = 2 + meta = dict(a1.metadata.to_dict()) # identical metadata document + out1 = await read_chunk(meta, s1, "a", (0, 0), backend="zarrs") + out2 = await read_chunk(meta, s2, "a", (0, 0), backend="zarrs") + np.testing.assert_array_equal(out1, np.full((4, 4), 1, dtype="uint16")) + np.testing.assert_array_equal(out2, np.full((4, 4), 2, dtype="uint16")) + assert zb.array_cache_len() == 2 + + +async def test_cache_reflects_writes_through_store(tmp_path: Path) -> None: + # after the Array is cached, a write via the cached Array must be visible to + # a subsequent read (proves the cache does not stale-cache chunk data) + store = await LocalStore.open(root=tmp_path / "s") + meta = _meta(store) + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") # caches the Array + new = np.full((4, 4), 99, dtype="uint16") + await write_chunk(meta, store, "a", (0, 0), new, backend="zarrs") # write via (cached) Array + out = await read_chunk(meta, store, "a", (0, 0), backend="zarrs") + np.testing.assert_array_equal(out, new) diff --git a/uv.lock b/uv.lock index dedaf964fa..f63cda4bca 100644 --- a/uv.lock +++ b/uv.lock @@ -4062,6 +4062,21 @@ test = [ { name = "tomlkit" }, { name = "uv" }, ] +zarrs = [ + { name = "coverage" }, + { name = "hypothesis" }, + { name = "numpydoc" }, + { name = "pytest" }, + { name = "pytest-accept" }, + { name = "pytest-asyncio" }, + { name = "pytest-benchmark" }, + { name = "pytest-codspeed" }, + { name = "pytest-cov" }, + { name = "pytest-xdist" }, + { name = "tomlkit" }, + { name = "uv" }, + { name = "zarrs-bindings" }, +] [package.metadata] requires-dist = [ @@ -4168,3 +4183,22 @@ test = [ { name = "tomlkit", specifier = "==0.15.0" }, { name = "uv", specifier = "==0.11.19" }, ] +zarrs = [ + { name = "coverage", specifier = ">=7.10" }, + { name = "hypothesis" }, + { name = "numpydoc" }, + { name = "pytest" }, + { name = "pytest-accept" }, + { name = "pytest-asyncio" }, + { name = "pytest-benchmark" }, + { name = "pytest-codspeed" }, + { name = "pytest-cov" }, + { name = "pytest-xdist" }, + { name = "tomlkit" }, + { name = "uv" }, + { name = "zarrs-bindings", directory = "packages/zarrs-bindings" }, +] + +[[package]] +name = "zarrs-bindings" +source = { directory = "packages/zarrs-bindings" }