xarray-array-testing/xarray_array_testing/strategies.py at 3f8d6c642e0c087611e14387d7aad65c5ea2cb6b · xarray-contrib/xarray-array-testing · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
from collections.abc import Hashable
from itertools import compress

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
import numpy as np
import xarray as xr
from xarray.testing.strategies import unique_subset_of


def _basic_indexers(size):
    return st.one_of(
        st.integers(min_value=-size, max_value=size - 1),
        st.slices(size),
    )


def _outer_array_indexers(size, max_size):
    return npst.arrays(
        dtype=np.int64,
        shape=st.integers(min_value=1, max_value=min(size, max_size)),
        elements=st.integers(min_value=-size, max_value=size - 1),
    )


# vendored from `xarray`, should be included in `xarray>=2026.01.0`
@st.composite
def basic_indexers(
    draw,
    /,
    *,
    sizes: dict[Hashable, int],
    min_dims: int = 1,
    max_dims: int | None = None,
) -> dict[Hashable, int | slice]:
    """Generate basic indexers using ``hypothesis.extra.numpy.basic_indices``.

    Parameters
    ----------
    draw : callable
    sizes : dict[Hashable, int]
        Dictionary mapping dimension names to their sizes.
    min_dims : int, optional
        Minimum number of dimensions to index.
    max_dims : int or None, optional
        Maximum number of dimensions to index.

    Returns
    -------
    sizes : mapping of hashable to int or slice
        Indexers as a dict with keys randomly selected from ``sizes.keys()``.

    See Also
    --------
    hypothesis.strategies.slices
    """
    selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims))

    # Generate one basic index (int or slice) per selected dimension
    idxr = {
        dim: draw(
            st.one_of(
                st.integers(min_value=-size, max_value=size - 1),
                st.slices(size),
            )
        )
        for dim, size in selected_dims.items()
    }
    return idxr


@st.composite
def outer_array_indexers(
    draw,
    /,
    *,
    sizes: dict[Hashable, int],
    min_dims: int = 0,
    max_dims: int | None = None,
    max_size: int = 10,
) -> dict[Hashable, np.ndarray]:
    """Generate outer array indexers (vectorized/orthogonal indexing).

    Parameters
    ----------
    draw : callable
        The Hypothesis draw function (automatically provided by @st.composite).
    sizes : dict[Hashable, int]
        Dictionary mapping dimension names to their sizes.
    min_dims : int, optional
        Minimum number of dimensions to index
    max_dims : int or None, optional
        Maximum number of dimensions to index

    Returns
    -------
    sizes : mapping of hashable to np.ndarray
        Indexers as a dict with keys randomly selected from ``sizes.keys()``.
        Values are 1D numpy arrays of integer indices for each dimension.

    See Also
    --------
    hypothesis.extra.numpy.arrays
    """
    selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims))
    idxr = {
        dim: draw(
            npst.arrays(
                dtype=np.int64,
                shape=st.integers(min_value=1, max_value=min(size, max_size)),
                elements=st.integers(min_value=-size, max_value=size - 1),
            )
        )
        for dim, size in selected_dims.items()
    }
    return idxr


@st.composite
def orthogonal_indexers(
    draw,
    /,
    *,
    sizes: dict[Hashable, int],
    min_dims: int = 2,
    max_dims: int | None = None,
    max_size: int = 10,
) -> dict[Hashable, int | slice | np.ndarray]:
    """Generate orthogonal indexers (vectorized/orthogonal indexing).

    Parameters
    ----------
    draw : callable
        The Hypothesis draw function (automatically provided by @st.composite).
    sizes : dict[Hashable, int]
        Dictionary mapping dimension names to their sizes.
    min_dims : int, optional
        Minimum number of dimensions to index
    max_dims : int or None, optional
        Maximum number of dimensions to index
    max_size : int, optional
        Maximum size of array indexers

    Returns
    -------
    sizes : mapping of hashable to indexer
        Indexers as a dict with keys randomly selected from ``sizes.keys()``.
        Values are integers, slices, or 1D numpy arrays of integer indices for
        each dimension.

    See Also
    --------
    hypothesis.extra.numpy.arrays
    """
    selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims))

    return {
        dim: draw(
            st.one_of(
                _basic_indexers(size),
                _outer_array_indexers(size, max_size),
            )
        )
        for dim, size in selected_dims.items()
    }


@st.composite
def vectorized_indexers(
    draw,
    /,
    *,
    sizes: dict[Hashable, int],
    min_dims: int = 2,
    max_dims: int | None = None,
    min_ndim: int = 1,
    max_ndim: int = 3,
    min_size: int = 1,
    max_size: int = 5,
) -> dict[Hashable, xr.Variable]:
    """Generate vectorized (fancy) indexers where all arrays are broadcastable.

    In vectorized indexing, all array indexers must have compatible shapes
    that can be broadcast together, and the result shape is determined by
    broadcasting the indexer arrays.

    Parameters
    ----------
    draw : callable
        The Hypothesis draw function (automatically provided by @st.composite).
    sizes : dict[Hashable, int]
        Dictionary mapping dimension names to their sizes.
    min_dims : int, optional
        Minimum number of dimensions to index. Default is 2, so that we always have a "trajectory".
        Use ``outer_array_indexers`` for the ``min_dims==1`` case.
    max_dims : int or None, optional
        Maximum number of dimensions to index.
    min_ndim : int, optional
        Minimum number of dimensions for the result arrays.
    max_ndim : int, optional
        Maximum number of dimensions for the result arrays.
    min_size : int, optional
        Minimum size for each dimension in the result arrays.
    max_size : int, optional
        Maximum size for each dimension in the result arrays.

    Returns
    -------
    sizes : mapping of hashable to Variable
        Indexers as a dict with keys randomly selected from sizes.keys().
        Values are DataArrays of integer indices that are all broadcastable
        to a common shape.

    See Also
    --------
    hypothesis.extra.numpy.arrays
    """
    selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims))

    # Generate a common broadcast shape for all arrays
    # Use min_ndim to max_ndim dimensions for the result shape
    result_shape = draw(
        st.lists(
            st.integers(min_value=min_size, max_value=max_size),
            min_size=min_ndim,
            max_size=max_ndim,
        )
    )
    result_ndim = len(result_shape)

    # Create dimension names for the vectorized result
    vec_dims = tuple(f"vec_{i}" for i in range(result_ndim))

    # Generate array indexers for each selected dimension
    # All arrays must be broadcastable to the same result_shape
    idxr = {}
    for dim, size in selected_dims.items():
        array_shape = draw(
            npst.broadcastable_shapes(
                shape=tuple(result_shape),
                min_dims=min_ndim,
                max_dims=result_ndim,
            )
        )

        # For xarray broadcasting, drop dimensions where size differs from result_shape
        # (numpy broadcasts size-1, but xarray requires matching sizes or missing dims)
        # Right-align array_shape with result_shape for comparison
        aligned_dims = vec_dims[-len(array_shape) :] if array_shape else ()
        aligned_result = result_shape[-len(array_shape) :] if array_shape else []
        keep_mask = [s == r for s, r in zip(array_shape, aligned_result, strict=True)]
        filtered_shape = tuple(compress(array_shape, keep_mask))
        filtered_dims = tuple(compress(aligned_dims, keep_mask))

        # Generate array of valid indices for this dimension
        indices = draw(
            npst.arrays(
                dtype=np.int64,
                shape=filtered_shape,
                elements=st.integers(min_value=-size, max_value=size - 1),
            )
        )
        idxr[dim] = xr.Variable(data=indices, dims=filtered_dims)
    return idxr