Skip to content

Commit 6d1d922

Browse files
committed
Ensure chunk markings are preserved by other combinators
1 parent f9706e4 commit 6d1d922

6 files changed

Lines changed: 47 additions & 29 deletions

File tree

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ plugins:
2424
source_dirs:
2525
- nav_heading: [Documentation]
2626
base: src
27+
ignore: ["_chunked.py"]
2728
- exclude:
2829
glob:
2930
- requirements.txt

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "numcodecs-combinators"
7-
version = "0.2.9"
7+
version = "0.2.10"
88
description = "Combinator codecs for the `numcodecs` buffer compression API"
99
readme = "README.md"
1010
license = { file = "LICENSE" }
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import numpy as np
2+
3+
4+
class ChunkedNdArray(np.ndarray):
5+
__slots__ = ()
6+
7+
def __new__(cls, array):
8+
return np.asarray(array).view(cls)
9+
10+
@property
11+
def chunked(self) -> bool:
12+
return True

src/numcodecs_combinators/best.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ def encode(self, buf: Buffer) -> bytes:
7070
if len(self) == 0:
7171
return buf
7272

73-
data = numcodecs.compat.ensure_ndarray(buf)
73+
data = (
74+
buf if isinstance(buf, np.ndarray) else numcodecs.compat.ensure_ndarray(buf)
75+
)
7476

7577
best_size = np.inf
7678
best_index = None

src/numcodecs_combinators/framed.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from numcodecs.abc import Codec
1717
from typing_extensions import Buffer, Self # MSPV 3.12
1818

19+
from ._chunked import ChunkedNdArray
1920
from .abc import CodecCombinatorMixin
2021

2122

@@ -74,6 +75,8 @@ def encode(self, buf: Buffer) -> bytes:
7475
Encoded and framed data as a bytestring.
7576
"""
7677

78+
chunked = getattr(buf, "chunked", False)
79+
7780
encoded = buf
7881
encoded_ndarray = np.asarray(
7982
numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
@@ -82,7 +85,9 @@ def encode(self, buf: Buffer) -> bytes:
8285
frames = [(encoded_ndarray.dtype, encoded_ndarray.shape)]
8386

8487
for codec in self:
85-
encoded = codec.encode(encoded_ndarray)
88+
encoded = codec.encode(
89+
ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
90+
)
8691
encoded_ndarray = np.asarray(
8792
numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
8893
)
@@ -132,6 +137,8 @@ def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
132137
buffer protocol.
133138
"""
134139

140+
chunked = getattr(out, "chunked", False)
141+
135142
b = numcodecs.compat.ensure_bytes(buf)
136143

137144
b_io = BytesIO(b)
@@ -165,10 +172,11 @@ def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
165172
decoded = decoded.byteswap()
166173

167174
for codec, (dtype, shape) in zip(reversed(self), frames[:-1][::-1]):
175+
empty = np.empty(shape, dtype)
168176
decoded = (
169177
codec.decode(
170178
decoded,
171-
out=np.empty(shape, dtype),
179+
out=ChunkedNdArray(empty) if chunked else empty,
172180
)
173181
.view(dtype)
174182
.reshape(shape)

src/numcodecs_combinators/stack.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from numcodecs.abc import Codec
2121

22+
from ._chunked import ChunkedNdArray
2223
from .abc import CodecCombinatorMixin
2324

2425

@@ -89,11 +90,22 @@ def encode(self, buf: Buffer) -> Buffer:
8990
protocol.
9091
"""
9192

93+
if len(self) == 0:
94+
return buf
95+
96+
chunked = getattr(buf, "chunked", False)
97+
9298
encoded = buf
9399
for codec in self:
94-
encoded = codec.encode(
100+
encoded_ndarray = np.asarray(
95101
numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
96102
)
103+
encoded = codec.encode(
104+
ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
105+
)
106+
107+
if getattr(encoded, "chunked", False):
108+
return np.array(encoded).view(np.ndarray) # type: ignore
97109
return encoded
98110

99111
def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
@@ -151,7 +163,7 @@ def encode_decode(self, buf: Buffer) -> Buffer:
151163
silhouettes.append((encoded.shape, encoded.dtype))
152164
encoded = np.asarray(
153165
numcodecs.compat.ensure_contiguous_ndarray_like(
154-
codec.encode(_MaybeChunkedNdArray(encoded) if chunked else encoded),
166+
codec.encode(ChunkedNdArray(encoded) if chunked else encoded),
155167
flatten=False,
156168
)
157169
)
@@ -162,12 +174,13 @@ def encode_decode(self, buf: Buffer) -> Buffer:
162174
shape, dtype = silhouettes.pop()
163175
out = np.empty(shape=shape, dtype=dtype)
164176
decoded = (
165-
codec.decode(decoded, _MaybeChunkedNdArray(out) if chunked else out)
177+
codec.decode(decoded, ChunkedNdArray(out) if chunked else out)
166178
.view(dtype)
167179
.reshape(shape)
168180
)
169181

170-
decoded = decoded.view(np.ndarray)
182+
if getattr(decoded, "chunked", False):
183+
decoded = decoded.view(np.ndarray)
171184

172185
if isinstance(decoded, type(buf)):
173186
return decoded
@@ -205,7 +218,8 @@ def encode_decode_data_array(self, da: "xr.DataArray") -> "xr.DataArray":
205218

206219
import xarray as xr
207220

208-
chunked = da.chunks is not None
221+
if da.chunks is None:
222+
return da.copy(data=self.encode_decode(da.values)) # type: ignore
209223

210224
def encode_decode_data_array_single_chunk(
211225
da: xr.DataArray,
@@ -217,7 +231,7 @@ def encode_decode_data_array_single_chunk(
217231
return da.copy(deep=False).chunk(single_chunk)
218232

219233
# eagerly compute the input chunk and encode and decode it
220-
decoded = self.encode_decode(_MaybeChunkedNdArray(da.values, chunked)) # type: ignore
234+
decoded = self.encode_decode(ChunkedNdArray(da.values)) # type: ignore
221235

222236
return da.copy(deep=False, data=np.array(decoded).view(np.ndarray)).chunk(
223237
single_chunk
@@ -307,22 +321,3 @@ def __rmul__(self, other) -> "CodecStack":
307321

308322

309323
numcodecs.registry.register_codec(CodecStack)
310-
311-
312-
class _MaybeChunkedNdArray(np.ndarray):
313-
__slots__ = ("_chunked",)
314-
_chunked: bool
315-
316-
def __new__(cls, array, chunked: bool = True):
317-
obj = np.asarray(array).view(cls)
318-
obj._chunked = chunked
319-
return obj
320-
321-
def __array_finalize__(self, obj):
322-
if obj is None:
323-
return
324-
self._chunked = getattr(obj, "chunked", True)
325-
326-
@property
327-
def chunked(self) -> bool:
328-
return self._chunked

0 commit comments

Comments
 (0)