diff --git a/xrspatial/templates.py b/xrspatial/templates.py index 177dc4efa..7509f1011 100644 --- a/xrspatial/templates.py +++ b/xrspatial/templates.py @@ -65,18 +65,47 @@ def _balanced_axis(size, block): return tuple(base + 1 if i < rem else base for i in range(n)) -def _neighborhood_chunks(shape): +def _block_edge(cells): + """Block edge (cells) for the default tiling of a ``cells``-cell grid. + + A fixed ``_DASK_BLOCK`` edge, grown for very large grids so the block count + stays near ``_DASK_MAX_BLOCKS`` instead of exploding the task graph. + """ + import math + return max(_DASK_BLOCK, math.ceil(math.sqrt(cells / _DASK_MAX_BLOCKS))) + + +def _neighborhood_chunks(shape, block=None): """Even, square-ish dask chunks for the default tiling. Uses a ~``_DASK_BLOCK`` block, growing it for very large grids so the block count stays near ``_DASK_MAX_BLOCKS`` instead of exploding the task graph. + Pass ``block`` to reuse an edge already computed (so a padded shape tiles + into the same blocks it was padded to). """ - import math - cells = shape[0] * shape[1] - block = max(_DASK_BLOCK, math.ceil(math.sqrt(cells / _DASK_MAX_BLOCKS))) + if block is None: + block = _block_edge(shape[0] * shape[1]) return tuple(_balanced_axis(size, block) for size in shape) +def _pad_to_tiles(shape, block): + """Grow each axis up to a whole multiple of ``block``. + + An axis that already fits in a single block (``_balanced_axis`` would keep + it one chunk) is left alone, so a small template is not bloated out to a + full block. A multi-block axis is padded up to the next ``block`` multiple + so its tiling has no ragged remainder chunk. + """ + import math + out = [] + for size in shape: + if round(size / block) >= 2: + out.append(int(math.ceil(size / block) * block)) + else: + out.append(size) + return tuple(out) + + def _resolve(name): """Resolve a name to a spec dict. @@ -319,7 +348,8 @@ def list_templates(kind: Optional[str] = None) -> Union[Dict[str, List[str]], Li def from_template(name: str, resolution: Optional[Union[float, Tuple[float, float]]] = None, - *, preserve: Optional[str] = None, backend: str = "numpy", + *, height: Optional[int] = None, width: Optional[int] = None, + preserve: Optional[str] = None, backend: str = "numpy", fill: float = np.nan, chunks: Optional[Union[int, str, Tuple]] = None) -> xr.DataArray: """Create an empty DataArray for a common study area. @@ -357,7 +387,16 @@ def from_template(name: str, Cell size in the template's CRS units (metres for projected regions, degrees for country codes). A scalar gives square cells; a ``(res_x, res_y)`` tuple sets each axis. Defaults to a per-template - value so a bare ``from_template('conus')`` works. + value so a bare ``from_template('conus')`` works. Ignored on the + ``height`` / ``width`` path unless given (see below). + height, width : int, optional + Grid shape in cells. Supply both (or neither). When given, the result + is exactly ``height`` x ``width`` cells anchored at the region's + lower-left corner, and the extent floats off that anchor rather than + snapping to the study-area box: with ``resolution`` the extent is + ``shape x resolution``; without it the resolution is derived so the + exact shape spans the region bbox. Use this to ask for a + tiling-friendly shape directly. preserve : {'area', 'shape'}, optional Reproject the template into an EPSG-coded projection chosen for the property it preserves, instead of the template's default CRS. ``'area'`` @@ -381,9 +420,14 @@ def from_template(name: str, longer applies. When omitted (or ``'auto'``), the dask backends tile the grid into even, square-ish blocks (~2048 cells per side) tuned for the neighborhood ops -- ``slope``, ``hillshade``, ``focal`` -- that run - on the result through ``map_overlap``. That gives a parallel, - well-formed task graph instead of one giant block or thin ragged edges. - A grid small enough to not be worth splitting stays a single chunk. + on the result through ``map_overlap``. To make that tiling exact, a + multi-block grid has its extent padded out from the lower-left anchor + so each axis is a whole number of blocks: every chunk is full-size with + no ragged remainder, the requested ``resolution`` is unchanged, and the + padded grid still covers the study area (it only grows). The padding is + dask-only -- eager grids and explicit ``height`` / ``width`` keep the + exact bbox-derived shape -- and a grid small enough to not be worth + splitting stays a single chunk, unpadded. Pass an explicit value to override the tiling. The data stays lazy, but a very fine resolution still builds one task per chunk, so an extreme shape with small explicit chunks can make a task graph large enough to @@ -439,6 +483,9 @@ def from_template(name: str, >>> agg = from_template("new_england", resolution=10, chunks=512) >>> type(agg.data).__name__ 'Array' + >>> # ask for an exact tiling-friendly shape; the extent floats + >>> from_template("conus", resolution=1000, height=4096, width=6144).shape + (4096, 6144) Recipes ------- @@ -485,10 +532,33 @@ def from_template(name: str, default_res = spec["default_resolution"] left, bottom, right, top = bounds - res_x, res_y = _normalize_resolution(resolution, default_res) - width = max(1, int(round((right - left) / res_x))) - height = max(1, int(round((top - bottom) / res_y))) + # height/width are an all-or-nothing pair that fixes the grid shape exactly. + explicit_shape = height is not None or width is not None + if explicit_shape and (height is None or width is None): + raise ValueError( + "supply both height and width together, or neither" + ) + + if explicit_shape: + # Exact shape: the grid is height x width cells anchored at the region's + # lower-left corner, and the extent floats off that anchor. With a + # resolution the extent is shape x resolution; without one the + # resolution is derived so the exact shape spans the region bbox. + height, width = int(height), int(width) + if height <= 0 or width <= 0: + raise ValueError( + f"height and width must be positive, got {(height, width)}" + ) + if resolution is None: + res_x = (right - left) / width + res_y = (top - bottom) / height + else: + res_x, res_y = _normalize_resolution(resolution, default_res) + else: + res_x, res_y = _normalize_resolution(resolution, default_res) + width = max(1, int(round((right - left) / res_x))) + height = max(1, int(round((top - bottom) / res_y))) # Supplying `chunks` signals intent for a lazy, chunked grid: promote an # eager backend to its dask variant so the result never materializes. @@ -497,20 +567,40 @@ def from_template(name: str, backend = "dask+numpy" if backend == "numpy" else "dask+cupy" is_dask = backend in ("dask", "dask+numpy", "dask+cupy") + # Default dask tiling pads the shape out to whole blocks so every chunk is + # full-size with no ragged remainder. Only on a dask backend under the + # default tiling, and never when the caller fixed the shape with + # height/width or drove the tiling with an explicit chunks=. Reuse the same + # block edge for the padding and the chunking so they agree exactly. + default_tiling = chunks is None or chunks == "auto" + block = None + if is_dask and default_tiling and not explicit_shape: + block = _block_edge(height * width) + height, width = _pad_to_tiles((height, width), block) + + # Name the knob the caller actually set when a cap is hit: height/width on + # the explicit-shape path (its resolution is derived), resolution otherwise. + if explicit_shape: + shape_desc = f"height={height}, width={width}" + coarsen = "Use a smaller height/width" + else: + shape_desc = f"resolution {(res_x, res_y)}" + coarsen = "Use a coarser resolution" + n_cells = width * height if not is_dask and n_cells > _MAX_CELLS: raise ValueError( - f"resolution {(res_x, res_y)} produces a {height} x {width} grid " + f"{shape_desc} produces a {height} x {width} grid " f"({n_cells:,} cells), exceeding the {_MAX_CELLS:,}-cell limit. " - f"Use a coarser resolution, or pass chunks=... for a lazy dask grid." + f"{coarsen}, or pass chunks=... for a lazy dask grid." ) # 'auto' (the default) tiles into even square blocks tuned for the # neighborhood ops that run on the result; an explicit chunks= is honored # verbatim. Either way the block count is checked against _MAX_CHUNKS below. - if chunks is None or chunks == "auto": - effective_chunks = _neighborhood_chunks((height, width)) if is_dask \ - else "auto" + if default_tiling: + effective_chunks = _neighborhood_chunks((height, width), block) \ + if is_dask else "auto" else: effective_chunks = chunks if is_dask: @@ -521,12 +611,12 @@ def from_template(name: str, chunks_display = chunks if chunks not in (None, "auto") \ else f"the default ~{_DASK_BLOCK}-cell tiling" raise ValueError( - f"resolution {(res_x, res_y)} produces a {height} x {width} " + f"{shape_desc} produces a {height} x {width} " f"grid that splits into {n_chunks:,} chunks with " f"{chunks_display!r}, exceeding the " f"{_MAX_CHUNKS:,}-chunk limit. A task graph this large can bog " - f"down the client even though no data is computed. Use a " - f"coarser resolution or larger chunks." + f"down the client even though no data is computed. " + f"{coarsen} or use larger chunks." ) # Honor the requested resolution exactly: anchor the lower-left corner and diff --git a/xrspatial/tests/test_templates.py b/xrspatial/tests/test_templates.py index 7bac23ad0..af6bce1c8 100644 --- a/xrspatial/tests/test_templates.py +++ b/xrspatial/tests/test_templates.py @@ -929,3 +929,174 @@ def test_lite_crs_name_property(): assert LiteCRS(5070).name == "NAD83 / Conus Albers" assert LiteCRS(4326).name == "WGS 84" + + +# --------------------------------------------------------------------------- +# tiling-optimized extents: the default dask grid pads its shape out to whole +# blocks so every chunk is full-size, and explicit height/width set the grid +# shape exactly (extent floats off the region anchor). +# --------------------------------------------------------------------------- + + +@dask_array_available +def test_default_dask_extent_padded_to_whole_tiles(): + import dask.array as da + from xrspatial.templates import _DASK_BLOCK + # conus @ 1 km is a multi-block grid (~3105 x 5865). The default tiling pads + # the shape up to exact multiples of the block edge so every chunk is a full + # _DASK_BLOCK square -- no ragged remainder block on the far edge. + agg = from_template("conus", resolution=1000, backend="dask") + assert isinstance(agg.data, da.Array) + assert agg.shape[0] % _DASK_BLOCK == 0 + assert agg.shape[1] % _DASK_BLOCK == 0 + for axis in agg.data.chunks: + assert set(axis) == {_DASK_BLOCK} + + +@dask_array_available +def test_padding_keeps_resolution_exact_and_covers_region(): + # Padding grows the grid by whole cells out from the lower-left anchor, so + # the requested resolution comes back unchanged and the padded extent still + # covers the original study-area box (it only ever grows). + agg = from_template("conus", resolution=1000, backend="dask") + assert agg.attrs["res"] == (1000.0, 1000.0) + left, bottom, right, top = _REGIONS["conus"]["bounds"] + # half-cell pixel-center inset, then the padded edges reach at/past the box + assert agg.x.values.min() <= left + 1000.0 + assert agg.y.values.max() >= top - 1000.0 + assert agg.x.values.max() >= right - 1000.0 + assert agg.y.values.min() <= bottom + 1000.0 + + +@dask_array_available +def test_padding_skipped_for_single_chunk_grid(): + # A grid that stays a single chunk (nyc @ default, ~1760 x 1850) is not + # padded -- its dask coords match the eager grid cell-for-cell, so backend + # stays a pure execution detail for templates small enough not to tile. + eager = from_template("nyc") + lazy = from_template("nyc", backend="dask+numpy") + assert lazy.data.npartitions == 1 + np.testing.assert_array_equal(lazy.x.values, eager.x.values) + np.testing.assert_array_equal(lazy.y.values, eager.y.values) + + +def test_padding_skipped_for_eager_backend(): + # Padding is a dask tiling concern; the eager numpy grid keeps the exact + # bbox-derived shape (no point bloating a materialized array). + from xrspatial.templates import _resolve, _normalize_resolution + spec = _resolve("conus") + left, bottom, right, top = spec["bounds"] + rx, ry = _normalize_resolution(1000, spec["default_resolution"]) + w = max(1, round((right - left) / rx)) + h = max(1, round((top - bottom) / ry)) + agg = from_template("conus", resolution=1000) # numpy + assert agg.shape == (h, w) + + +@dask_array_available +def test_padding_skipped_for_explicit_chunks(): + # An explicit chunks= means the caller is driving the tiling, so the shape + # is left at the exact bbox-derived size (today's honored-verbatim contract). + from xrspatial.templates import _resolve, _normalize_resolution + spec = _resolve("conus") + left, bottom, right, top = spec["bounds"] + rx, ry = _normalize_resolution(1000, spec["default_resolution"]) + w = max(1, round((right - left) / rx)) + h = max(1, round((top - bottom) / ry)) + agg = from_template("conus", resolution=1000, chunks=512) + assert agg.shape == (h, w) + + +def test_explicit_height_width_exact_shape(): + # Supplying height and width sets the grid shape exactly; the extent floats + # off the region's lower-left anchor instead of the bbox. + agg = from_template("conus", height=4096, width=6144) + assert agg.shape == (4096, 6144) + assert agg.dims == ("y", "x") + assert agg.attrs["crs"] == 5070 + + +def test_explicit_height_width_with_resolution_floats_extent(): + # height/width + resolution => extent = shape x resolution, anchored at the + # region origin. Resolution is honored exactly. + agg = from_template("conus", resolution=1000, height=4096, width=6144) + assert agg.shape == (4096, 6144) + assert agg.attrs["res"] == (1000.0, 1000.0) + left, bottom, right, top = _REGIONS["conus"]["bounds"] + # width * res out from the left anchor (pixel centers inset by half a cell) + assert np.isclose(agg.x.values[0], left + 1000.0 / 2) + assert np.isclose(agg.x.values[-1], left + (6144 - 0.5) * 1000.0) + + +def test_explicit_height_width_without_resolution_covers_bbox(): + # height/width alone => resolution is derived so the exact shape spans the + # region bbox (the grid still covers the named study area). + agg = from_template("conus", height=600, width=1200) + assert agg.shape == (600, 1200) + left, bottom, right, top = _REGIONS["conus"]["bounds"] + res_x, res_y = agg.attrs["res"] + assert np.isclose(res_x, (right - left) / 1200) + assert np.isclose(res_y, (top - bottom) / 600) + + +@dask_array_available +def test_explicit_height_width_not_padded_on_dask(): + # The user picked the exact (tile-friendly) shape; the dask path tiles it + # but does not pad it back out to a different size. + import dask.array as da + agg = from_template("conus", resolution=1000, height=4096, width=6144, + backend="dask") + assert isinstance(agg.data, da.Array) + assert agg.shape == (4096, 6144) + + +def test_partial_height_width_raises(): + # height and width are an all-or-nothing pair. + with pytest.raises(ValueError, match="both height and width"): + from_template("conus", height=4096) + with pytest.raises(ValueError, match="both height and width"): + from_template("conus", width=6144) + + +def test_non_positive_height_width_raises(): + with pytest.raises(ValueError, match="height and width must be positive"): + from_template("conus", height=0, width=10) + with pytest.raises(ValueError, match="height and width must be positive"): + from_template("conus", height=10, width=-5) + + +def test_oversized_explicit_shape_cap_message_names_height_width(): + # The cell-cap message must point at the knob the caller actually set: on + # the height/width path that is height/width, not the derived resolution. + with pytest.raises(ValueError, match="exceeding") as exc: + from_template("conus", height=30000, width=30000) # 9e8 cells, eager + assert "height=30000" in str(exc.value) + assert "width=30000" in str(exc.value) + assert "resolution" not in str(exc.value) + + +@dask_array_available +def test_explicit_height_width_with_preserve(): + # height/width compose with preserve=: the exact shape is anchored at the + # reprojected bbox and the CRS is the chosen EPSG, resolution derived. + agg = from_template("FRA", preserve="shape", height=300, width=400) + assert agg.shape == (300, 400) + assert agg.attrs["crs"] == 32630 # FRA centroid UTM 30N + assert agg.x.attrs["units"] == "m" + res_x, res_y = agg.attrs["res"] + assert res_x > 0 and res_y > 0 + + +@dask_array_available +def test_explicit_height_width_dask_ragged_shape_stays_exact(): + import dask.array as da + # An explicit shape that is not a block multiple is left exactly as asked + # (not padded); the dask path still tiles it into balanced blocks. + agg = from_template("conus", resolution=1000, height=5000, width=5000, + backend="dask") + assert isinstance(agg.data, da.Array) + assert agg.shape == (5000, 5000) + assert agg.data.npartitions > 1 + for axis in agg.data.chunks: + assert sum(axis) == 5000 + assert max(axis) - min(axis) <= 1 # balanced, no ragged sliver