zarrs · LDeakin · Mar 3, 2026 · Mar 3, 2026 · LDeakin · Mar 3, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,7 @@ crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 pyo3 = { version = "0.27.1", features = ["abi3-py311"] }
-zarrs = { version = "0.23.0", features = ["async", "zlib", "pcodec", "bz2"] }
+zarrs = { version = "0.23.6", features = ["async", "zlib", "pcodec", "bz2"] }
 rayon_iter_concurrent_limit = "0.2.0"
 rayon = "1.10.0"
 # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path
@@ -29,3 +29,8 @@ zarrs_object_store = "0.5.0" # object_store 0.12
 
 [profile.release]
 lto = true
+
+[patch.crates-io]
+zarrs = { git = "https://github.com/zarrs/zarrs.git", branch = "feat/DecodeMode" }
+zarrs_storage = { git = "https://github.com/zarrs/zarrs.git", branch = "feat/DecodeMode" }
+zarrs_codec = { git = "https://github.com/zarrs/zarrs.git", branch = "feat/DecodeMode" }
diff --git a/README.md b/README.md
@@ -49,6 +49,10 @@ The `ZarrsCodecPipeline` specific options are:
   - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
 - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
   - Defaults to `False`.
+- `codec_pipeline.decode_mode`: controls the decode path used when reading a chunk subset.
+  - `"auto"` (default): use the full-chunk decode path when the requested subset covers the entire chunk, and the partial-decoder path otherwise.
+  - `"partial"`: always use the partial-decoder path, even for whole-chunk reads. Useful for codecs (e.g. sharding) where partial decoding is more efficient even for full-chunk reads.
+  - `"full"`: always decode the full chunk and extract the subset.
 - `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
   - Defaults to `False`.
 
@@ -63,6 +67,7 @@ zarr.config.set({
         "chunk_concurrent_maximum": None,
         "chunk_concurrent_minimum": 4,
         "direct_io": False,
+        "decode_mode": None,
         "strict": False
     }
 })

diff --git a/examples/.gitignore b/examples/.gitignore
@@ -0,0 +1 @@
+issue_152.zarr
diff --git a/examples/issue_152.py b/examples/issue_152.py
@@ -0,0 +1,54 @@
+import platform
+import subprocess
+import time
+
+import numpy as np
+import zarr
+
+
+def clear_cache():
+    if platform.system() == "Darwin":
+        subprocess.call(["sync", "&&", "sudo", "purge"])
+    elif platform.system() == "Linux":
+        subprocess.call(["sudo", "sh", "-c", "sync; echo 3 > /proc/sys/vm/drop_caches"])
+    else:
+        raise Exception("Unsupported platform")
+
+
+zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
+
+# zarr.config.set({"codec_pipeline.decode_mode": "auto"})
+# full read took:  3.3279900550842285
+# partial shard read (4095) took:  1.211921215057373
+# partial shard read (4096) took:  2.3402509689331055
+
+zarr.config.set({"codec_pipeline.decode_mode": "partial"})
+# full read took:  2.2892508506774902
+# partial shard read (4095) took:  1.1934266090393066
+# partial shard read (4096) took:  1.1788337230682373
+
+z = zarr.create_array(
+    "examples/issue_152.zarr",
+    shape=(8192, 4, 128, 128),
+    shards=(4096, 4, 128, 128),
+    chunks=(1, 1, 128, 128),
+    dtype=np.float64,
+    overwrite=True,
+)
+z[...] = np.random.randn(8192, 4, 128, 128)
-z[...] = np.random.randn(8192, 4, 128, 128)
+z[...] = np.ones((8192, 4, 128, 128))
-z[...] = np.random.randn(8192, 4, 128, 128)
+z[...] = np.ones((8192, 4, 128, 128))
+
+clear_cache()
+t = time.time()
+z[...]
+print("full read took: ", time.time() - t)
+
+clear_cache()
+t = time.time()
+z[:4095, ...]
+print("partial shard read (4095) took: ", time.time() - t)
+
+
+clear_cache()
+t = time.time()
+z[:4096, ...]
+print("partial shard read (4096) took: ", time.time() - t)
diff --git a/python/zarrs/pipeline.py b/python/zarrs/pipeline.py
@@ -62,6 +62,7 @@ def get_codec_pipeline_impl(
             ),
             num_threads=config.get("threading.max_workers", None),
             direct_io=config.get("codec_pipeline.direct_io", False),
+            decode_mode=config.get("codec_pipeline.decode_mode", None),
         )
     except TypeError as e:
         if strict:

diff --git a/src/lib.rs b/src/lib.rs
@@ -21,7 +21,7 @@ use utils::is_whole_chunk;
 use zarrs::array::{
     ArrayBytes, ArrayBytesDecodeIntoTarget, ArrayBytesFixedDisjointView, ArrayMetadata,
     ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecChain, CodecOptions, DataType,
-    FillValue, StoragePartialDecoder, copy_fill_value_into, update_array_bytes,
+    DecodeMode, FillValue, StoragePartialDecoder, copy_fill_value_into, update_array_bytes,
 };
 use zarrs::config::global_config;
 use zarrs::convert::array_metadata_v2_to_v3;
@@ -218,6 +218,7 @@ impl CodecPipelineImpl {
         chunk_concurrent_maximum=None,
         num_threads=None,
         direct_io=false,
+        decode_mode=None,
     ))]
     #[new]
     fn new(
@@ -228,6 +229,7 @@ impl CodecPipelineImpl {
         chunk_concurrent_maximum: Option<usize>,
         num_threads: Option<usize>,
         direct_io: bool,
+        decode_mode: Option<&str>,
     ) -> PyResult<Self> {
         store_config.direct_io(direct_io);
         let metadata = serde_json::from_str(array_metadata).map_py_err::<PyTypeError>()?;
@@ -239,7 +241,19 @@ impl CodecPipelineImpl {
         };
         let codec_chain =
             Arc::new(CodecChain::from_metadata(&metadata_v3.codecs).map_py_err::<PyTypeError>()?);
-        let codec_options = CodecOptions::default().with_validate_checksums(validate_checksums);
+        let decode_mode = match decode_mode {
+            None | Some("auto") => DecodeMode::Auto,
+            Some("partial") => DecodeMode::Partial,
+            Some("full") => DecodeMode::Full,
+            Some(s) => {
+                return Err(PyErr::new::<PyValueError, _>(format!(
+                    "invalid decode_mode {s:?}, expected \"auto\", \"partial\", or \"full\""
+                )));
+            }
+        };
+        let codec_options = CodecOptions::default()
+            .with_validate_checksums(validate_checksums)
+            .with_decode_mode(decode_mode);
 
         let chunk_concurrent_minimum =
             chunk_concurrent_minimum.unwrap_or(global_config().chunk_concurrent_minimum());
@@ -300,7 +314,9 @@ impl CodecPipelineImpl {
         // Assemble partial decoders ahead of time and in parallel
         let partial_chunk_items = chunk_descriptions
             .iter()
-            .filter(|item| !(is_whole_chunk(item)))
+            .filter(|item| {
+                !is_whole_chunk(item) || codec_options.decode_mode() == DecodeMode::Partial
+            })
             .unique_by(|item| item.key.clone())
             .collect::<Vec<_>>();
         let mut partial_decoder_cache: HashMap<StoreKey, Arc<dyn ArrayPartialDecoderTraits>> =
@@ -350,7 +366,7 @@ impl CodecPipelineImpl {
                 };
                 let target = ArrayBytesDecodeIntoTarget::Fixed(&mut output_view);
                 // See zarrs::array::Array::retrieve_chunk_subset_into
-                if is_whole_chunk(&item) {
+                if is_whole_chunk(&item) && codec_options.decode_mode() != DecodeMode::Partial {
                     // See zarrs::array::Array::retrieve_chunk_into
                     if let Some(chunk_encoded) =
                         self.store.get(&item.key).map_py_err::<PyRuntimeError>()?