From 52decc2a7e0baedfcc0d06176c10deeeb0947feb Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:24:05 -0400 Subject: [PATCH 1/4] chore: CI worfklows for missing docs and RST docstrings --- .github/workflows/docs.yml | 5 + ci/check_documented_exports.py | 131 +++++++++++++++++++++++ ci/lint_docs.py | 187 +++++++++++++++++++++++++++++++++ src/zarr/core/metadata/v2.py | 2 +- src/zarr/testing/store.py | 6 +- 5 files changed, 327 insertions(+), 4 deletions(-) create mode 100644 ci/check_documented_exports.py create mode 100644 ci/lint_docs.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ab745beec1..02d87081ad 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -24,6 +24,11 @@ jobs: persist-credentials: false - uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 - run: uv sync --group docs + # Fast source-level guards that need no built site, so they run before the (slower) + # build for a quick failure: every public export is in the API reference, and no + # docstring/Markdown carries reStructuredText markup that MkDocs won't render. + - run: uv run python ci/check_documented_exports.py + - run: uv run python ci/lint_docs.py # --strict turns warnings into errors, so a docs code block that fails to execute # at build time (e.g. a non-exec python fence disrupting a later exec="true" block) # fails CI instead of merging as a silent warning. diff --git a/ci/check_documented_exports.py b/ci/check_documented_exports.py new file mode 100644 index 0000000000..4db5072552 --- /dev/null +++ b/ci/check_documented_exports.py @@ -0,0 +1,131 @@ +"""Check that every public top-level export is in the API reference. + +The API reference is authored as explicit mkdocstrings directives (``::: target``) +under ``docs/api/`` -- one per documented symbol -- rather than autodoc, so a newly +added ``zarr.__all__`` entry will not appear in the docs until someone writes a page +for it (or it becomes a rendered member of an already-documented module). This script +catches that gap: it resolves every ``:::`` target, expands module directives into the +members they render (honoring ``members: false``), and asserts each name in +``zarr.__all__`` resolves to a documented object. + +Usage: + python ci/check_documented_exports.py + +Raises ValueError if any public export is undocumented. +""" + +from __future__ import annotations + +import importlib +import re +from pathlib import Path +from types import ModuleType +from typing import Any + +import zarr + +REPO_ROOT = Path(__file__).parent.parent.resolve() +API_DOCS_ROOT = REPO_ROOT / "docs" / "api" + +# Names in zarr.__all__ that are intentionally absent from the API reference. +# Keep this list short and justified -- it is the only escape hatch from the guard. +EXEMPT_EXPORTS = { + "__version__", # version string, not an API symbol + "print_debug_info", # debugging helper, deliberately not in the reference +} + +# A mkdocstrings autodoc directive: `::: some.dotted.target` at the start of a line. +DIRECTIVE_RE = re.compile(r"^:::[ \t]+(?P\S+)", re.MULTILINE) +# `members: false` (or `members: []`) within a directive's option block disables +# rendering of a module's members. +MEMBERS_DISABLED_RE = re.compile(r"^\s+members:\s*(false|\[\s*\])\s*$") + + +def resolve(target: str) -> Any: + """Resolve a `:::` target (a dotted path) to the Python object it documents.""" + try: + return importlib.import_module(target) + except ImportError: + pass + module_path, _, attr = target.rpartition(".") + try: + return getattr(importlib.import_module(module_path), attr) + except (ImportError, AttributeError): + return None + + +def members_disabled(text: str, directive_start: int) -> bool: + """Return True if the directive starting at `directive_start` sets members: false. + + Scans the indented option block immediately following the `:::` line, stopping at + the first non-indented line (the end of this directive's block).""" + for line in text[directive_start:].splitlines()[1:]: + if line.strip() == "": + continue + if not line.startswith((" ", "\t")): + break + if MEMBERS_DISABLED_RE.match(line): + return True + return False + + +def documented_object_ids() -> set[int]: + """Collect the id()s of every object rendered by a `:::` directive under docs/api. + + A directive pointing at an object documents that object. A directive pointing at a + module documents the module's public members (its ``__all__`` if defined, else its + public attributes) unless the directive sets ``members: false``.""" + documented: set[int] = set() + for md_file in sorted(API_DOCS_ROOT.rglob("*.md")): + text = md_file.read_text(encoding="utf-8") + for match in DIRECTIVE_RE.finditer(text): + obj = resolve(match.group("target")) + if obj is None: + continue + documented.add(id(obj)) + if isinstance(obj, ModuleType) and not members_disabled(text, match.start()): + member_names = getattr(obj, "__all__", None) or [ + name for name in dir(obj) if not name.startswith("_") + ] + for name in member_names: + member = getattr(obj, name, None) + if member is not None: + documented.add(id(member)) + return documented + + +def find_undocumented_exports() -> list[str]: + documented = documented_object_ids() + missing = [] + for name in zarr.__all__: + if name in EXEMPT_EXPORTS: + continue + if id(getattr(zarr, name)) not in documented: + missing.append(name) + return sorted(missing) + + +def main() -> None: + if not API_DOCS_ROOT.exists(): + raise FileNotFoundError(f"{API_DOCS_ROOT} does not exist.") + + missing = find_undocumented_exports() + if not missing: + print(f"All {len(zarr.__all__)} public exports are documented.") + return + + lines = [ + f"Found {len(missing)} public export(s) in zarr.__all__ missing from the API " + "reference (docs/api/):\n", + ] + lines.extend(f" - zarr.{name}" for name in missing) + lines.append( + "\nAdd a `::: zarr.` page under docs/api/zarr/ (and register it in " + "mkdocs.yml and docs/api/zarr/index.md), or -- if the export is intentionally " + "undocumented -- add it to EXEMPT_EXPORTS in this script with a reason." + ) + raise ValueError("\n".join(lines)) + + +if __name__ == "__main__": + main() diff --git a/ci/lint_docs.py b/ci/lint_docs.py new file mode 100644 index 0000000000..3ceda21c92 --- /dev/null +++ b/ci/lint_docs.py @@ -0,0 +1,187 @@ +"""Lint docstrings and Markdown for reStructuredText markup that won't render. + +This project renders API docs with mkdocstrings (``docstring_style: numpy``) and prose +with MkDocs + Markdown -- not Sphinx/reStructuredText. RST constructs that survive from +older docstrings (or muscle memory) are not interpreted: a Sphinx role passes through as +literal text instead of becoming a link, an ``.. note::`` directive renders as a stray +line, and a ``:param:`` field list never becomes a documented parameter. + +Crucially, none of this is caught by the rest of the docs CI. ``mkdocs build --strict`` +sees the residue as ordinary prose (no warning), and ``ci/check_unlinked_types.py`` only +finds cross-references mkdocstrings *attempted* to resolve -- a raw ``:class:`` role is +never attempted, so it leaves no unlinked-type span. This linter fills that gap with a +fast, source-level check that needs no docs build. + +Checks (all are RST syntax that silently fails under MkDocs/mkdocstrings): + + sphinx-role :class:`X`, :func:`X`, :py:meth:`X` -> [`X`][zarr.X] + rst-directive .. note:: / .. code-block:: python -> MkDocs admonition / fenced code + rst-field :param x:, :returns:, :rtype: -> numpydoc Parameters/Returns/Raises + rst-link `text `_ -> [text](https://example) + +Usage: + python ci/lint_docs.py [PATH ...] + +PATH defaults to the repo-root ``src/zarr`` and ``docs``. Each PATH may be a file or a +directory (directories are searched for ``*.py`` and ``*.md``). Exits non-zero if any +issues are found. +""" + +from __future__ import annotations + +import ast +import re +import sys +from dataclasses import dataclass +from pathlib import Path + +REPO_ROOT = Path(__file__).parent.parent.resolve() +DEFAULT_PATHS = (REPO_ROOT / "src" / "zarr", REPO_ROOT / "docs") + +# A Sphinx interpreted-text role: an optional domain, a role name, then a backtick +# target -- e.g. :class:`Foo` or :py:meth:`Foo.bar`. Requires the trailing backtick so +# plain "::" (RST literal markers, time strings, mkdocs-material :icon: shortcodes) and +# URLs ("https://") never match. +SPHINX_ROLE = re.compile(r":[a-zA-Z_]\w*(?::[a-zA-Z_]\w*)?:`[^`\n]+`") + +# An RST directive line: ".. name::" (with or without an argument after it). RST hyperlink +# targets (".. _label:") and comments (".. text") lack the "::" and are not flagged. +RST_DIRECTIVE = re.compile(r"^\s*\.\.[ \t]+[\w-]+::") + +# An RST field-list entry used for docstring fields. The role names above (class, func, +# ...) are deliberately excluded so a role is reported as a role, not a field. +RST_FIELD = re.compile( + r"^\s*:(param|parameter|arg|argument|key|keyword|kwarg|type|returns?|rtype" + r"|raises?|except|exception|yields?|ytype|var|cvar|ivar)\b[^:]*:" +) + +# An RST external hyperlink: `text `_ +RST_LINK = re.compile(r"`[^`\n]+\n]+>`_") + +CHECKS = ( + ("sphinx-role", SPHINX_ROLE), + ("rst-directive", RST_DIRECTIVE), + ("rst-field", RST_FIELD), + ("rst-link", RST_LINK), +) + + +@dataclass(frozen=True) +class Finding: + path: Path + line: int + category: str + snippet: str + + def format(self) -> str: + try: + location: Path | str = self.path.relative_to(REPO_ROOT) + except ValueError: + location = self.path + return f" {location}:{self.line}: [{self.category}] {self.snippet.strip()}" + + +def _scan_line(text: str) -> list[str]: + """Return every RST-residue category found in a single line (a line can carry more + than one, e.g. a role and an external link).""" + return [category for category, pattern in CHECKS if pattern.search(text)] + + +def lint_python(path: Path) -> list[Finding]: + """Scan the docstrings (module, classes, functions) of a Python file. + + Only docstrings are checked -- they are what mkdocstrings renders -- so RST-looking + text inside ordinary code or string literals is never misreported.""" + source = path.read_text(encoding="utf-8") + try: + tree = ast.parse(source) + except SyntaxError as exc: # pragma: no cover - surfaced, not silently skipped + return [Finding(path, exc.lineno or 0, "syntax-error", str(exc.msg))] + + findings: list[Finding] = [] + doc_nodes = (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef) + for node in ast.walk(tree): + if not isinstance(node, doc_nodes): + continue + docstring = ast.get_docstring(node, clean=False) + if not docstring: + continue + # node.body[0].value is the docstring literal; its lineno is the line the string + # opens on, so content line i maps to source line (start + i). + start = node.body[0].value.lineno # type: ignore[attr-defined] + for offset, line in enumerate(docstring.splitlines()): + findings.extend( + Finding(path, start + offset, category, line) for category in _scan_line(line) + ) + return findings + + +def lint_markdown(path: Path) -> list[Finding]: + """Scan a Markdown file, skipping fenced code blocks (``` or ~~~).""" + findings: list[Finding] = [] + fence: str | None = None + for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + stripped = line.lstrip() + if fence is None and stripped.startswith(("```", "~~~")): + fence = stripped[:3] + continue + if fence is not None: + if stripped.startswith(fence): + fence = None + continue + findings.extend(Finding(path, lineno, category, line) for category in _scan_line(line)) + return findings + + +def iter_files(paths: tuple[Path, ...]) -> list[Path]: + files: list[Path] = [] + for path in paths: + if path.is_file(): + files.append(path) + elif path.is_dir(): + files.extend(sorted(path.rglob("*.py"))) + files.extend(sorted(path.rglob("*.md"))) + else: + raise FileNotFoundError(f"{path} does not exist") + return files + + +def lint(paths: tuple[Path, ...]) -> list[Finding]: + findings: list[Finding] = [] + for file in iter_files(paths): + if file.suffix == ".py": + findings.append(lint_python(file)) + elif file.suffix == ".md": + findings.append(lint_markdown(file)) + return [f for group in findings for f in group] + + +def main() -> int: + args = sys.argv[1:] + paths = tuple(Path(a).resolve() for a in args) if args else DEFAULT_PATHS + findings = lint(paths) + + if not findings: + print("No reStructuredText residue found in docstrings or Markdown.") + return 0 + + print( + f"Found {len(findings)} reStructuredText construct(s) that will not render under " + "MkDocs/mkdocstrings:\n", + file=sys.stderr, + ) + for finding in findings: + print(finding.format(), file=sys.stderr) + print( + "\nReplace RST markup with its MkDocs equivalent (see ci/lint_docs.py header):\n" + " sphinx-role :class:`X` -> [`X`][zarr.X]\n" + " rst-directive .. note:: -> MkDocs admonition (!!! note)\n" + " rst-field :param x: -> numpydoc Parameters/Returns/Raises section\n" + " rst-link `text `_ -> [text](url)", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + main() diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index ac32521239..91515d87b9 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -120,7 +120,7 @@ def ndim(self) -> int: def chunk_grid(self) -> ChunkGrid: """Backwards-compatible chunk grid property. - .. deprecated:: + !!! warning "Deprecated" Access the chunk grid via the array layer instead. This property will be removed in a future release. """ diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 81024c85c8..88852ca0c6 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -38,21 +38,21 @@ class StoreTests[S: Store, B: Buffer]: @staticmethod def _require_get_sync(store: S) -> SupportsGetSync: - """Skip unless *store* implements :class:`SupportsGetSync`.""" + """Skip unless *store* implements [`SupportsGetSync`][zarr.abc.store.SupportsGetSync].""" if not isinstance(store, SupportsGetSync): pytest.skip("store does not implement SupportsGetSync") return store # type: ignore[unreachable] @staticmethod def _require_set_sync(store: S) -> SupportsSetSync: - """Skip unless *store* implements :class:`SupportsSetSync`.""" + """Skip unless *store* implements [`SupportsSetSync`][zarr.abc.store.SupportsSetSync].""" if not isinstance(store, SupportsSetSync): pytest.skip("store does not implement SupportsSetSync") return store # type: ignore[unreachable] @staticmethod def _require_delete_sync(store: S) -> SupportsDeleteSync: - """Skip unless *store* implements :class:`SupportsDeleteSync`.""" + """Skip unless *store* implements [`SupportsDeleteSync`][zarr.abc.store.SupportsDeleteSync].""" if not isinstance(store, SupportsDeleteSync): pytest.skip("store does not implement SupportsDeleteSync") return store # type: ignore[unreachable] From b4837a42f2a4ca8f5bf71f41fc124117c86bbe82 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 19:40:34 -0400 Subject: [PATCH 2/4] Add linter for docs --- .markdownlint-cli2.jsonc | 54 ++++++++++++++ .pre-commit-config.yaml | 10 +++ ci/lint_docs.py | 89 +++++++++++++++++++++--- docs/contributing.md | 23 +++--- docs/index.md | 14 ++-- docs/quick-start.md | 2 - docs/release-notes.md | 2 + docs/user-guide/attributes.md | 1 + docs/user-guide/cli.md | 2 +- docs/user-guide/consolidated_metadata.md | 1 - docs/user-guide/data_types.md | 7 ++ docs/user-guide/experimental.md | 1 - docs/user-guide/extending.md | 1 + docs/user-guide/groups.md | 1 - docs/user-guide/installation.md | 3 +- docs/user-guide/performance.md | 6 +- docs/user-guide/storage.md | 8 ++- docs/user-guide/v3_migration.md | 2 - 18 files changed, 185 insertions(+), 42 deletions(-) create mode 100644 .markdownlint-cli2.jsonc diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc new file mode 100644 index 0000000000..3dfdf96856 --- /dev/null +++ b/.markdownlint-cli2.jsonc @@ -0,0 +1,54 @@ +// markdownlint-cli2 configuration for zarr-python docs. +// +// We keep the rules that catch real rendering/structure problems and disable those that +// are pure style, conflict with house conventions, or fire false positives against our +// MkDocs/mkdocstrings + pymdownx toolchain. Complementary, not overlapping, with +// ci/lint_docs.py (RST residue + list-breaking fences) and `mkdocs build --strict`. +{ + "config": { + "default": true, + + // House style: Markdown paragraphs are single unwrapped lines, so line length is not + // a meaningful constraint. + "MD013": false, + + // Purely stylistic marker/emphasis choices -- not worth the churn across existing docs. + "MD004": false, // ul bullet style (-, *, +) + "MD007": false, // ul indentation width + "MD050": false, // strong (bold) style + "MD035": false, // hr style + + // False positives from our toolchain: + // mkdocstrings cross-refs `[`X`][zarr.X]` read as undefined reference links (MD052); + // pymdownx.magiclink auto-links bare URLs (MD034); + // md_in_html lets us embed intentional raw HTML (MD033); + // generated/included files (api stubs, snippets) need not open with an H1 (MD041). + "MD052": false, + "MD034": false, + "MD033": false, + "MD041": false, + + // Duplicate headings are legitimate under different sections (e.g. repeated + // "Documentation"); only flag true sibling duplicates. + "MD024": { "siblings_only": true }, + + // Opinionated table/link/command rules with low value for these docs. + "MD055": false, // table pipe style + "MD060": false, // table column style + "MD059": false, // "descriptive" link text (no "click here") + "MD014": false, // $ before commands without shown output + + // markdownlint does not understand MkDocs `!!!` admonitions, so it reads their + // 4-space-indented bodies as indented code blocks and flags them (and, via inferred + // file style, flags real fenced blocks too). Cannot coexist with our admonitions. + "MD046": false // code block style (fenced vs indented) + // Kept on (structural / real rendering bugs): MD012 (multiple blanks), MD022/MD031/MD032 + // (blanks around headings/fences/lists), MD025 (single H1), MD029 (ordered-list prefix), + // MD040 (fenced code language), MD042 (empty links), + // MD047 (trailing newline), MD056 (table column count), among others. + }, + "globs": ["docs/**/*.md"], + "ignores": [ + "docs/api/**" // mkdocstrings stubs (`::: zarr.X`) + ] +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb2e8c3c6f..e8f5d7d9cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,16 @@ repos: - id: check-yaml exclude: mkdocs.yml - id: trailing-whitespace + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.22.1 + hooks: + # Markdown structure/hygiene. Rule selection and ignores are in + # .markdownlint-cli2.jsonc; complements ci/lint_docs.py (RST residue, + # list-breaking fences) and `mkdocs build --strict`. Scoped to docs/ to + # match the config's globs (pre-commit passes filenames, which would + # otherwise override that scoping and lint all repo Markdown). + - id: markdownlint-cli2 + files: ^docs/ - repo: local hooks: - id: mypy diff --git a/ci/lint_docs.py b/ci/lint_docs.py index 3ceda21c92..ebc6b4b6a7 100644 --- a/ci/lint_docs.py +++ b/ci/lint_docs.py @@ -12,12 +12,22 @@ never attempted, so it leaves no unlinked-type span. This linter fills that gap with a fast, source-level check that needs no docs build. -Checks (all are RST syntax that silently fails under MkDocs/mkdocstrings): +Checks fall into two groups -- RST markup that silently fails under MkDocs/mkdocstrings, +and a Markdown structural problem that renders as valid-but-wrong HTML (so `mkdocs build` +emits no warning): sphinx-role :class:`X`, :func:`X`, :py:meth:`X` -> [`X`][zarr.X] rst-directive .. note:: / .. code-block:: python -> MkDocs admonition / fenced code rst-field :param x:, :returns:, :rtype: -> numpydoc Parameters/Returns/Raises rst-link `text `_ -> [text](https://example) + list-break unindented code fence between list items -> indent the fence under its item + +The ``list-break`` check catches a fenced code block at column 0 placed *between* two list +items: because the fence is not indented into the preceding item, Markdown ends the list at +the fence and the following item starts a fresh list -- renumbering an ordered list (1, 1, 2 +instead of 1, 2, 3) or breaking the grouping/spacing of any list. markdownlint's MD029 only +notices this for sequentially-numbered ordered lists; lazily-numbered (1., 1.) and unordered +lists slip past it, so this structural check covers the gap. Usage: python ci/lint_docs.py [PATH ...] @@ -58,6 +68,11 @@ # An RST external hyperlink: `text `_ RST_LINK = re.compile(r"`[^`\n]+\n]+>`_") +# A list item at column 0: an ordered marker (1. / 1)) or a bullet (-, *, +) followed by +# whitespace and content. Leading-whitespace (nested/continuation) lines are intentionally +# not matched -- the list-break check only fires on top-level items. +LIST_ITEM = re.compile(r"^(?:\d+[.)]|[-*+])\s+\S") + CHECKS = ( ("sphinx-role", SPHINX_ROLE), ("rst-directive", RST_DIRECTIVE), @@ -116,11 +131,62 @@ def lint_python(path: Path) -> list[Finding]: return findings +def find_list_breaking_fences(lines: list[str]) -> list[tuple[int, str]]: + """Return ``(lineno, snippet)`` for each fenced code block at column 0 that splits a + list -- i.e. one whose nearest non-blank neighbours on both sides are top-level list + items. Such a fence is not indented into the preceding item, so Markdown closes the + list at the fence and the following item starts a new one. The fix is to indent the + fence (4 spaces) so it nests inside its list item. See the module docstring. + + Conservative on purpose: it requires a list item *directly* before and after (a + continuation line or paragraph in between is not matched), keeping false positives low + for a check that fails CI.""" + # Index fenced blocks as (open_index, close_index), 0-based. An unterminated fence is + # malformed Markdown that `mkdocs build` will surface, so it is ignored here. + blocks: list[tuple[int, int]] = [] + fence: str | None = None + open_idx = -1 + for i, line in enumerate(lines): + stripped = line.lstrip() + if fence is None: + if stripped.startswith(("```", "~~~")): + fence = stripped[:3] + open_idx = i + elif stripped.startswith(fence): + blocks.append((open_idx, i)) + fence = None + + def neighbour(start: int, step: int) -> str | None: + j = start + step + while 0 <= j < len(lines): + if lines[j].strip(): + return lines[j] + j += step + return None + + findings: list[tuple[int, str]] = [] + for open_i, close_i in blocks: + if lines[open_i][:1].isspace(): + continue # indented fence: already nested in the list item, not a break + before = neighbour(open_i, -1) + after = neighbour(close_i, +1) + if ( + before is not None + and after is not None + and LIST_ITEM.match(before) + and LIST_ITEM.match(after) + ): + findings.append((open_i + 1, lines[open_i])) + return findings + + def lint_markdown(path: Path) -> list[Finding]: - """Scan a Markdown file, skipping fenced code blocks (``` or ~~~).""" + """Scan a Markdown file: RST residue in prose (skipping fenced code blocks), plus + fenced code blocks that break a list (see find_list_breaking_fences).""" + lines = path.read_text(encoding="utf-8").splitlines() findings: list[Finding] = [] fence: str | None = None - for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + for lineno, line in enumerate(lines, start=1): stripped = line.lstrip() if fence is None and stripped.startswith(("```", "~~~")): fence = stripped[:3] @@ -130,6 +196,10 @@ def lint_markdown(path: Path) -> list[Finding]: fence = None continue findings.extend(Finding(path, lineno, category, line) for category in _scan_line(line)) + findings.extend( + Finding(path, lineno, "list-break", snippet) + for lineno, snippet in find_list_breaking_fences(lines) + ) return findings @@ -162,26 +232,27 @@ def main() -> int: findings = lint(paths) if not findings: - print("No reStructuredText residue found in docstrings or Markdown.") + print("No reStructuredText residue or list-breaking fences found in docs.") return 0 print( - f"Found {len(findings)} reStructuredText construct(s) that will not render under " - "MkDocs/mkdocstrings:\n", + f"Found {len(findings)} docs issue(s) -- RST markup that will not render under " + "MkDocs/mkdocstrings, or Markdown that renders as valid-but-wrong HTML:\n", file=sys.stderr, ) for finding in findings: print(finding.format(), file=sys.stderr) print( - "\nReplace RST markup with its MkDocs equivalent (see ci/lint_docs.py header):\n" + "\nFix each issue (see ci/lint_docs.py header):\n" " sphinx-role :class:`X` -> [`X`][zarr.X]\n" " rst-directive .. note:: -> MkDocs admonition (!!! note)\n" " rst-field :param x: -> numpydoc Parameters/Returns/Raises section\n" - " rst-link `text `_ -> [text](url)", + " rst-link `text `_ -> [text](url)\n" + " list-break fence between items -> indent the fence 4 spaces to nest it", file=sys.stderr, ) return 1 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/docs/contributing.md b/docs/contributing.md index 750f7c7a65..bcfd1dbfbd 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -12,23 +12,23 @@ If you find a bug, please raise a [GitHub issue](https://github.com/zarr-develop 1. A minimal, self-contained snippet of Python code reproducing the problem. You can format the code nicely using markdown, e.g.: -```python exec="false" reason="illustrative pseudocode with a '# etc.' placeholder, not runnable" -import zarr -g = zarr.group() -# etc. -``` + ```python exec="false" reason="illustrative pseudocode with a '# etc.' placeholder, not runnable" + import zarr + g = zarr.group() + # etc. + ``` 2. An explanation of why the current behaviour is wrong/not desired, and what you expect instead. 3. Information about the version of Zarr, along with versions of dependencies and the Python interpreter, and installation information. The version of Zarr can be obtained from the `zarr.__version__` property. Please also state how Zarr was installed, e.g., "installed via pip into a virtual environment", or "installed using conda". Information about other packages installed can be obtained by executing `pip freeze` (if using pip to install packages) or `conda env export` (if using conda to install packages) from the operating system command prompt. The version of the Python interpreter can be obtained by running a Python interactive session, e.g.: -```console -python -``` + ```console + python + ``` -```ansi -Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin -``` + ```ansi + Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin + ``` ## Enhancement proposals @@ -398,7 +398,6 @@ The Zarr library is an implementation of a file format standard defined external If an existing Zarr format version changes, or a new version of the Zarr format is released, then the Zarr library will generally require changes. It is very likely that a new Zarr format will require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the Zarr library will almost certainly come in new `major` release. When the Zarr library adds support for a new Zarr format, there may be a period of accelerated changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase breaking changes may be more frequent than usual. - ## Experimental API policy The `zarr.experimental` namespace contains features that are under active development and may change without notice. When contributing to or depending on experimental features, please keep the following in mind: diff --git a/docs/index.md b/docs/index.md index b8c2b07ee7..fee5d6d2b8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,7 +6,6 @@ [Developer Chat](https://ossci.zulipchat.com/) | [Zarr specifications](https://zarr-specs.readthedocs.io) - Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, compression, and various backends, making it a versatile choice for scientific and large-scale data. @@ -37,22 +36,20 @@ conda install --channel conda-forge zarr
-- [:material-clock-fast:{ .lg .middle } __Quick start__](quick-start.md) +- [:material-clock-fast:{ .lg .middle } __Quick start__](quick-start.md) --- New to Zarr? Check out the quick start guide. It contains a brief introduction to Zarr's main concepts and links to additional tutorials. - -- [:material-book-open:{ .lg .middle } __User guide__](user-guide/installation.md) +- [:material-book-open:{ .lg .middle } __User guide__](user-guide/installation.md) --- A detailed guide for how to use Zarr-Python. - -- [:material-api:{ .lg .middle } __API Reference__](api/zarr/open.md) +- [:material-api:{ .lg .middle } __API Reference__](api/zarr/open.md) --- @@ -61,8 +58,7 @@ conda install --channel conda-forge zarr which parameters can be used. It assumes that you have an understanding of the key concepts. - -- [:material-account-group:{ .lg .middle } __Contributor's Guide__](contributing.md) +- [:material-account-group:{ .lg .middle } __Contributor's Guide__](contributing.md) --- @@ -72,7 +68,6 @@ conda install --channel conda-forge zarr
- ## Project Status More information about the Zarr format can be found on the [main website](https://zarr.dev). @@ -80,6 +75,7 @@ More information about the Zarr format can be found on the [main website](https: If you are using Zarr-Python, we would [love to hear about it](https://github.com/zarr-developers/community/issues/19). ### Funding and Support + The project is fiscally sponsored by [NumFOCUS](https://numfocus.org/), a US 501(c)(3) public charity, and development has been supported by the [MRC Centre for Genomics and Global Health](https://github.com/cggh/) diff --git a/docs/quick-start.md b/docs/quick-start.md index 0bad4f2e34..250acb5674 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -46,7 +46,6 @@ written to a `LocalStore` in the `data/example-1.zarr` directory. Zarr supports data compression and filters. For example, to use Blosc compression: - ```python exec="true" session="quickstart" source="above" result="code" # Create a 2D Zarr array with Blosc compression @@ -69,7 +68,6 @@ print(z.info) This compresses the data using the Blosc codec with shuffle enabled for better compression. - ### Hierarchical Groups Zarr allows you to create hierarchical groups, similar to directories: diff --git a/docs/release-notes.md b/docs/release-notes.md index 4511c9815e..337ca06c44 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -1,5 +1,7 @@ # Release notes + + ## 3.2.1 (2026-05-05) diff --git a/docs/user-guide/attributes.md b/docs/user-guide/attributes.md index d5961ed38a..9d156f14bf 100644 --- a/docs/user-guide/attributes.md +++ b/docs/user-guide/attributes.md @@ -20,6 +20,7 @@ print('foo' in root.attrs) ```python exec="true" session="attributes" source="above" result="ansi" print(root.attrs['foo']) ``` + ```python exec="true" session="attributes" source="above" result="ansi" print(sorted(z.attrs)) ``` diff --git a/docs/user-guide/cli.md b/docs/user-guide/cli.md index 13fcb6f1b6..b727dd7b48 100644 --- a/docs/user-guide/cli.md +++ b/docs/user-guide/cli.md @@ -114,4 +114,4 @@ zarr --verbose remove-metadata v2 path/to/input.zarr ## Equivalent functions All features of the command-line interface are also available via functions under -`zarr.metadata`. \ No newline at end of file +`zarr.metadata`. diff --git a/docs/user-guide/consolidated_metadata.md b/docs/user-guide/consolidated_metadata.md index d8066b6846..c055295cfa 100644 --- a/docs/user-guide/consolidated_metadata.md +++ b/docs/user-guide/consolidated_metadata.md @@ -101,7 +101,6 @@ removed, or modified, consolidated metadata may not be desirable. of the metadata, at the time they read the root node with its consolidated metadata. - ## Stores Without Support for Consolidated Metadata Some stores may want to opt out of the consolidated metadata mechanism. This diff --git a/docs/user-guide/data_types.md b/docs/user-guide/data_types.md index 6f6bb05033..957ddd88a7 100644 --- a/docs/user-guide/data_types.md +++ b/docs/user-guide/data_types.md @@ -194,9 +194,11 @@ Python supports nearly all of the data types in NumPy. If you need a data type t here, it's possible to create it yourself: see [Adding New Data Types](#adding-new-data-types). #### Boolean + - [Boolean][zarr.dtype.Bool] #### Integral + - [Signed 8-bit integer][zarr.dtype.Int8] - [Signed 16-bit integer][zarr.dtype.Int16] - [Signed 32-bit integer][zarr.dtype.Int32] @@ -207,6 +209,7 @@ here, it's possible to create it yourself: see [Adding New Data Types](#adding-n - [Unsigned 64-bit integer][zarr.dtype.UInt64] #### Floating-point + - [16-bit floating-point][zarr.dtype.Float16] - [32-bit floating-point][zarr.dtype.Float32] - [64-bit floating-point][zarr.dtype.Float64] @@ -214,19 +217,23 @@ here, it's possible to create it yourself: see [Adding New Data Types](#adding-n - [128-bit complex floating-point][zarr.dtype.Complex128] #### String + - [Fixed-length UTF-32 string][zarr.dtype.FixedLengthUTF32] - [Variable-length UTF-8 string][zarr.dtype.VariableLengthUTF8] #### Bytes + - [Fixed-length null-terminated bytes][zarr.dtype.NullTerminatedBytes] - [Fixed-length raw bytes][zarr.dtype.RawBytes] - [Variable-length bytes][zarr.dtype.VariableLengthBytes] #### Temporal + - [DateTime64][zarr.dtype.DateTime64] - [TimeDelta64][zarr.dtype.TimeDelta64] #### Struct-like + - [Structured][zarr.dtype.Structured] !!! note "Zarr V3 Structured Data Types" diff --git a/docs/user-guide/experimental.md b/docs/user-guide/experimental.md index 1c6d952c7c..85c7e99f41 100644 --- a/docs/user-guide/experimental.md +++ b/docs/user-guide/experimental.md @@ -78,7 +78,6 @@ print(f"Speedup is {speedup}") Cache effectiveness is particularly pronounced with repeated access to the same data chunks. - ## Cache Configuration The CacheStore can be configured with several parameters: diff --git a/docs/user-guide/extending.md b/docs/user-guide/extending.md index cefeb114c5..b5584f8323 100644 --- a/docs/user-guide/extending.md +++ b/docs/user-guide/extending.md @@ -14,6 +14,7 @@ in the following ways: [numcodecs.registry.register_codec](https://numcodecs.readthedocs.io/en/stable/registry.html#numcodecs.registry.register_codec). There are three types of codecs in Zarr: + - array-to-array - array-to-bytes - bytes-to-bytes diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md index 5faa26a281..f1013d126e 100644 --- a/docs/user-guide/groups.md +++ b/docs/user-guide/groups.md @@ -130,4 +130,3 @@ Groups also have the [`zarr.Group.tree`][] method, e.g.: ```python exec="true" session="groups" source="above" result="ansi" print(root.tree()) ``` - diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index c902acf171..5621834592 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -39,7 +39,7 @@ conda install -c conda-forge zarr Conda does not support optional dependencies, so you will have to manually install any packages needed to enable extra functionality. -# Nightly wheels +## Nightly wheels Development wheels are built nightly and published to the [scientific-python-nightly-wheels](https://anaconda.org/scientific-python-nightly-wheels) index. To install the latest nightly build: @@ -48,6 +48,7 @@ pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python- ``` Note that nightly wheels may be unstable and are intended for testing purposes. + ## Dependency support Zarr has endorsed [Scientific-Python SPEC 0](https://scientific-python.org/specs/spec-0000/) and now follows the version support window as outlined below: diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index 685ce7fe7e..5da5269342 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -119,7 +119,6 @@ The order of chunks **within each shard** can be changed via the `subchunk_write By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality. [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired, and `colexicographic` is its reverse. `unordered` makes no guarantee about the order in which subchunks are laid out within a shard. - ### Empty chunks It is possible to configure how Zarr handles the storage of chunks that are "empty" @@ -215,11 +214,13 @@ zarr.config.set({'async.concurrency': 128}) ``` Higher concurrency values can improve throughput when: + - Working with remote storage (e.g., S3, GCS) where network latency is high - Reading/writing many small chunks in parallel - The storage backend can handle many concurrent requests Lower concurrency values may be beneficial when: + - Working with local storage with limited I/O bandwidth - Memory is constrained (each concurrent operation requires buffer space) - Using Zarr within a parallel computing framework (see below) @@ -252,7 +253,7 @@ concurrently. **Important**: When using many Dask threads, you may need to reduce both Zarr's `async.concurrency` and `threading.max_workers` settings to avoid creating too many concurrent operations. The total number of concurrent I/O operations can be roughly estimated as: -``` +```text total_concurrency ≈ dask_threads × zarr_async_concurrency ``` @@ -292,6 +293,7 @@ You may need to experiment with different values to find the optimal balance for Zarr arrays are designed to be thread-safe for concurrent reads and writes from multiple threads within the same process. However, proper synchronization is required when writing to overlapping regions from multiple threads. For multi-process parallelism, Zarr provides safe concurrent writes as long as: + - Different processes write to different chunks - The storage backend supports atomic writes (most do) diff --git a/docs/user-guide/storage.md b/docs/user-guide/storage.md index d5f840ab4b..7fdfb8942b 100644 --- a/docs/user-guide/storage.md +++ b/docs/user-guide/storage.md @@ -41,17 +41,21 @@ group = zarr.create_group(store=data) print(group) ``` + [](){#user-guide-store-like} + ### StoreLike `StoreLike` values can be: - a `Path` or string indicating a location on the local file system. This will create a [local store](#local-store): + ```python exec="true" session="storage" source="above" result="ansi" group = zarr.open_group(store='data/foo/bar') print(group) ``` + ```python exec="true" session="storage" source="above" result="ansi" from pathlib import Path group = zarr.open_group(store=Path('data/foo/bar')) @@ -59,6 +63,7 @@ print(group) ``` - an FSSpec URI string, indicating a [remote store](#remote-store) location: + ```python exec="true" session="storage" source="above" result="ansi" # Note: requires s3fs to be installed group = zarr.open_group( @@ -70,10 +75,12 @@ print(group) ``` - an empty dictionary or None, which will create a new [memory store](#memory-store): + ```python exec="true" session="storage" source="above" result="ansi" group = zarr.create_group(store={}) print(group) ``` + ```python exec="true" session="storage" source="above" result="ansi" group = zarr.create_group(store=None) print(group) @@ -151,7 +158,6 @@ store = zarr.storage.FsspecStore(fs) print(store) ``` - ### Memory Store The [`zarr.storage.MemoryStore`][] is an in-memory store that allows for serialization of diff --git a/docs/user-guide/v3_migration.md b/docs/user-guide/v3_migration.md index 1680547d93..1f7bf26822 100644 --- a/docs/user-guide/v3_migration.md +++ b/docs/user-guide/v3_migration.md @@ -151,8 +151,6 @@ The following stores have been renamed or changed: | `DirectoryStore` | [`zarr.storage.LocalStore`][] | | `FSStore` | [`zarr.storage.FsspecStore`][] | | `TempStore` | Use [`tempfile.TemporaryDirectory`][] with [`LocalStore`][zarr.storage.LocalStore] | -| `zarr. - A number of deprecated stores were also removed. See issue #1274 for more details on the removal of these stores. From e837bb280be8b2162f130add5f52573770f7253f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 19:46:30 -0400 Subject: [PATCH 3/4] Add link checking workflow --- .github/workflows/links.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/links.yml diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml new file mode 100644 index 0000000000..47c2786111 --- /dev/null +++ b/.github/workflows/links.yml @@ -0,0 +1,32 @@ +name: Check links + +on: + repository_dispatch: + workflow_dispatch: + pull_request: + schedule: + - cron: "00 18 * * *" + +jobs: + linkChecker: + runs-on: ubuntu-latest + permissions: + issues: write # required for peter-evans/create-issue-from-file + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - name: Link Checker + id: lychee + uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0 + # with: + # fail: false + + # - name: Create Issue From File + # if: steps.lychee.outputs.exit_code != 0 + # uses: peter-evans/create-issue-from-file@fca9117c27cdc29c6c4db3b86c48e4115a786710 # v6.0.0 + # with: + # title: Link Checker Report + # content-filepath: ./lychee/out.md + # labels: report, automated issue From 4e2a02e7eef2f0d32f633dbaf002f404761ded46 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 17 Jun 2026 19:59:58 -0400 Subject: [PATCH 4/4] fix links --- README.md | 4 ++-- lychee.toml | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 lychee.toml diff --git a/README.md b/README.md index 45410eeb7b..947aba7e91 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ Build Status - - build status + + build status diff --git a/lychee.toml b/lychee.toml new file mode 100644 index 0000000000..38b2b8ab7a --- /dev/null +++ b/lychee.toml @@ -0,0 +1,20 @@ +# Configuration for the lychee link checker (https://lychee.cli.rs/). +# Auto-discovered as ./lychee.toml by the lychee GitHub Action. + +# Files lychee should not scan for links. +exclude_path = [ + # mkdocs-material theme overrides: hrefs are Jinja expressions like + # `{{ '../' ~ base_url }}`, not real URLs, so lychee cannot resolve them. + "docs/overrides", + # Design notes: working records that point at transient artifacts (commits, + # fork branches, compare URLs) which are expected to disappear over time. + "design", +] + +# URL patterns to ignore (regex, matched against the full URL). +exclude = [ + # Local docs preview server shown in the contributing guide ("hatch run serve"), + # documentation of a command rather than a reachable link. + '^https?://0\.0\.0\.0', + '^https?://(localhost|127\.0\.0\.1)(:\d+)?', +]