Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 110 additions & 20 deletions xrspatial/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,47 @@ def _balanced_axis(size, block):
return tuple(base + 1 if i < rem else base for i in range(n))


def _neighborhood_chunks(shape):
def _block_edge(cells):
"""Block edge (cells) for the default tiling of a ``cells``-cell grid.

A fixed ``_DASK_BLOCK`` edge, grown for very large grids so the block count
stays near ``_DASK_MAX_BLOCKS`` instead of exploding the task graph.
"""
import math
return max(_DASK_BLOCK, math.ceil(math.sqrt(cells / _DASK_MAX_BLOCKS)))


def _neighborhood_chunks(shape, block=None):
"""Even, square-ish dask chunks for the default tiling.

Uses a ~``_DASK_BLOCK`` block, growing it for very large grids so the block
count stays near ``_DASK_MAX_BLOCKS`` instead of exploding the task graph.
Pass ``block`` to reuse an edge already computed (so a padded shape tiles
into the same blocks it was padded to).
"""
import math
cells = shape[0] * shape[1]
block = max(_DASK_BLOCK, math.ceil(math.sqrt(cells / _DASK_MAX_BLOCKS)))
if block is None:
block = _block_edge(shape[0] * shape[1])
return tuple(_balanced_axis(size, block) for size in shape)


def _pad_to_tiles(shape, block):
"""Grow each axis up to a whole multiple of ``block``.

An axis that already fits in a single block (``_balanced_axis`` would keep
it one chunk) is left alone, so a small template is not bloated out to a
full block. A multi-block axis is padded up to the next ``block`` multiple
so its tiling has no ragged remainder chunk.
"""
import math
out = []
for size in shape:
if round(size / block) >= 2:
out.append(int(math.ceil(size / block) * block))
else:
out.append(size)
return tuple(out)


def _resolve(name):
"""Resolve a name to a spec dict.

Expand Down Expand Up @@ -319,7 +348,8 @@ def list_templates(kind: Optional[str] = None) -> Union[Dict[str, List[str]], Li

def from_template(name: str,
resolution: Optional[Union[float, Tuple[float, float]]] = None,
*, preserve: Optional[str] = None, backend: str = "numpy",
*, height: Optional[int] = None, width: Optional[int] = None,
preserve: Optional[str] = None, backend: str = "numpy",
fill: float = np.nan,
chunks: Optional[Union[int, str, Tuple]] = None) -> xr.DataArray:
"""Create an empty DataArray for a common study area.
Expand Down Expand Up @@ -357,7 +387,16 @@ def from_template(name: str,
Cell size in the template's CRS units (metres for projected regions,
degrees for country codes). A scalar gives square cells; a
``(res_x, res_y)`` tuple sets each axis. Defaults to a per-template
value so a bare ``from_template('conus')`` works.
value so a bare ``from_template('conus')`` works. Ignored on the
``height`` / ``width`` path unless given (see below).
height, width : int, optional
Grid shape in cells. Supply both (or neither). When given, the result
is exactly ``height`` x ``width`` cells anchored at the region's
lower-left corner, and the extent floats off that anchor rather than
snapping to the study-area box: with ``resolution`` the extent is
``shape x resolution``; without it the resolution is derived so the
exact shape spans the region bbox. Use this to ask for a
tiling-friendly shape directly.
preserve : {'area', 'shape'}, optional
Reproject the template into an EPSG-coded projection chosen for the
property it preserves, instead of the template's default CRS. ``'area'``
Expand All @@ -381,9 +420,14 @@ def from_template(name: str,
longer applies. When omitted (or ``'auto'``), the dask backends tile
the grid into even, square-ish blocks (~2048 cells per side) tuned for
the neighborhood ops -- ``slope``, ``hillshade``, ``focal`` -- that run
on the result through ``map_overlap``. That gives a parallel,
well-formed task graph instead of one giant block or thin ragged edges.
A grid small enough to not be worth splitting stays a single chunk.
on the result through ``map_overlap``. To make that tiling exact, a
multi-block grid has its extent padded out from the lower-left anchor
so each axis is a whole number of blocks: every chunk is full-size with
no ragged remainder, the requested ``resolution`` is unchanged, and the
padded grid still covers the study area (it only grows). The padding is
dask-only -- eager grids and explicit ``height`` / ``width`` keep the
exact bbox-derived shape -- and a grid small enough to not be worth
splitting stays a single chunk, unpadded.
Pass an explicit value to override the tiling. The data stays lazy, but
a very fine resolution still builds one task per chunk, so an extreme
shape with small explicit chunks can make a task graph large enough to
Expand Down Expand Up @@ -439,6 +483,9 @@ def from_template(name: str,
>>> agg = from_template("new_england", resolution=10, chunks=512)
>>> type(agg.data).__name__
'Array'
>>> # ask for an exact tiling-friendly shape; the extent floats
>>> from_template("conus", resolution=1000, height=4096, width=6144).shape
(4096, 6144)

Recipes
-------
Expand Down Expand Up @@ -485,10 +532,33 @@ def from_template(name: str,
default_res = spec["default_resolution"]

left, bottom, right, top = bounds
res_x, res_y = _normalize_resolution(resolution, default_res)

width = max(1, int(round((right - left) / res_x)))
height = max(1, int(round((top - bottom) / res_y)))
# height/width are an all-or-nothing pair that fixes the grid shape exactly.
explicit_shape = height is not None or width is not None
if explicit_shape and (height is None or width is None):
raise ValueError(
"supply both height and width together, or neither"
)

if explicit_shape:
# Exact shape: the grid is height x width cells anchored at the region's
# lower-left corner, and the extent floats off that anchor. With a
# resolution the extent is shape x resolution; without one the
# resolution is derived so the exact shape spans the region bbox.
height, width = int(height), int(width)
if height <= 0 or width <= 0:
raise ValueError(
f"height and width must be positive, got {(height, width)}"
)
if resolution is None:
res_x = (right - left) / width
res_y = (top - bottom) / height
else:
res_x, res_y = _normalize_resolution(resolution, default_res)
else:
res_x, res_y = _normalize_resolution(resolution, default_res)
width = max(1, int(round((right - left) / res_x)))
height = max(1, int(round((top - bottom) / res_y)))

# Supplying `chunks` signals intent for a lazy, chunked grid: promote an
# eager backend to its dask variant so the result never materializes.
Expand All @@ -497,20 +567,40 @@ def from_template(name: str,
backend = "dask+numpy" if backend == "numpy" else "dask+cupy"
is_dask = backend in ("dask", "dask+numpy", "dask+cupy")

# Default dask tiling pads the shape out to whole blocks so every chunk is
# full-size with no ragged remainder. Only on a dask backend under the
# default tiling, and never when the caller fixed the shape with
# height/width or drove the tiling with an explicit chunks=. Reuse the same
# block edge for the padding and the chunking so they agree exactly.
default_tiling = chunks is None or chunks == "auto"
block = None
if is_dask and default_tiling and not explicit_shape:
block = _block_edge(height * width)
height, width = _pad_to_tiles((height, width), block)

# Name the knob the caller actually set when a cap is hit: height/width on
# the explicit-shape path (its resolution is derived), resolution otherwise.
if explicit_shape:
shape_desc = f"height={height}, width={width}"
coarsen = "Use a smaller height/width"
else:
shape_desc = f"resolution {(res_x, res_y)}"
coarsen = "Use a coarser resolution"

n_cells = width * height
if not is_dask and n_cells > _MAX_CELLS:
raise ValueError(
f"resolution {(res_x, res_y)} produces a {height} x {width} grid "
f"{shape_desc} produces a {height} x {width} grid "
f"({n_cells:,} cells), exceeding the {_MAX_CELLS:,}-cell limit. "
f"Use a coarser resolution, or pass chunks=... for a lazy dask grid."
f"{coarsen}, or pass chunks=... for a lazy dask grid."
)

# 'auto' (the default) tiles into even square blocks tuned for the
# neighborhood ops that run on the result; an explicit chunks= is honored
# verbatim. Either way the block count is checked against _MAX_CHUNKS below.
if chunks is None or chunks == "auto":
effective_chunks = _neighborhood_chunks((height, width)) if is_dask \
else "auto"
if default_tiling:
effective_chunks = _neighborhood_chunks((height, width), block) \
if is_dask else "auto"
else:
effective_chunks = chunks
if is_dask:
Expand All @@ -521,12 +611,12 @@ def from_template(name: str,
chunks_display = chunks if chunks not in (None, "auto") \
else f"the default ~{_DASK_BLOCK}-cell tiling"
raise ValueError(
f"resolution {(res_x, res_y)} produces a {height} x {width} "
f"{shape_desc} produces a {height} x {width} "
f"grid that splits into {n_chunks:,} chunks with "
f"{chunks_display!r}, exceeding the "
f"{_MAX_CHUNKS:,}-chunk limit. A task graph this large can bog "
f"down the client even though no data is computed. Use a "
f"coarser resolution or larger chunks."
f"down the client even though no data is computed. "
f"{coarsen} or use larger chunks."
)

# Honor the requested resolution exactly: anchor the lower-left corner and
Expand Down
171 changes: 171 additions & 0 deletions xrspatial/tests/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,3 +929,174 @@ def test_lite_crs_name_property():

assert LiteCRS(5070).name == "NAD83 / Conus Albers"
assert LiteCRS(4326).name == "WGS 84"


# ---------------------------------------------------------------------------
# tiling-optimized extents: the default dask grid pads its shape out to whole
# blocks so every chunk is full-size, and explicit height/width set the grid
# shape exactly (extent floats off the region anchor).
# ---------------------------------------------------------------------------


@dask_array_available
def test_default_dask_extent_padded_to_whole_tiles():
import dask.array as da
from xrspatial.templates import _DASK_BLOCK
# conus @ 1 km is a multi-block grid (~3105 x 5865). The default tiling pads
# the shape up to exact multiples of the block edge so every chunk is a full
# _DASK_BLOCK square -- no ragged remainder block on the far edge.
agg = from_template("conus", resolution=1000, backend="dask")
assert isinstance(agg.data, da.Array)
assert agg.shape[0] % _DASK_BLOCK == 0
assert agg.shape[1] % _DASK_BLOCK == 0
for axis in agg.data.chunks:
assert set(axis) == {_DASK_BLOCK}


@dask_array_available
def test_padding_keeps_resolution_exact_and_covers_region():
# Padding grows the grid by whole cells out from the lower-left anchor, so
# the requested resolution comes back unchanged and the padded extent still
# covers the original study-area box (it only ever grows).
agg = from_template("conus", resolution=1000, backend="dask")
assert agg.attrs["res"] == (1000.0, 1000.0)
left, bottom, right, top = _REGIONS["conus"]["bounds"]
# half-cell pixel-center inset, then the padded edges reach at/past the box
assert agg.x.values.min() <= left + 1000.0
assert agg.y.values.max() >= top - 1000.0
assert agg.x.values.max() >= right - 1000.0
assert agg.y.values.min() <= bottom + 1000.0


@dask_array_available
def test_padding_skipped_for_single_chunk_grid():
# A grid that stays a single chunk (nyc @ default, ~1760 x 1850) is not
# padded -- its dask coords match the eager grid cell-for-cell, so backend
# stays a pure execution detail for templates small enough not to tile.
eager = from_template("nyc")
lazy = from_template("nyc", backend="dask+numpy")
assert lazy.data.npartitions == 1
np.testing.assert_array_equal(lazy.x.values, eager.x.values)
np.testing.assert_array_equal(lazy.y.values, eager.y.values)


def test_padding_skipped_for_eager_backend():
# Padding is a dask tiling concern; the eager numpy grid keeps the exact
# bbox-derived shape (no point bloating a materialized array).
from xrspatial.templates import _resolve, _normalize_resolution
spec = _resolve("conus")
left, bottom, right, top = spec["bounds"]
rx, ry = _normalize_resolution(1000, spec["default_resolution"])
w = max(1, round((right - left) / rx))
h = max(1, round((top - bottom) / ry))
agg = from_template("conus", resolution=1000) # numpy
assert agg.shape == (h, w)


@dask_array_available
def test_padding_skipped_for_explicit_chunks():
# An explicit chunks= means the caller is driving the tiling, so the shape
# is left at the exact bbox-derived size (today's honored-verbatim contract).
from xrspatial.templates import _resolve, _normalize_resolution
spec = _resolve("conus")
left, bottom, right, top = spec["bounds"]
rx, ry = _normalize_resolution(1000, spec["default_resolution"])
w = max(1, round((right - left) / rx))
h = max(1, round((top - bottom) / ry))
agg = from_template("conus", resolution=1000, chunks=512)
assert agg.shape == (h, w)


def test_explicit_height_width_exact_shape():
# Supplying height and width sets the grid shape exactly; the extent floats
# off the region's lower-left anchor instead of the bbox.
agg = from_template("conus", height=4096, width=6144)
assert agg.shape == (4096, 6144)
assert agg.dims == ("y", "x")
assert agg.attrs["crs"] == 5070


def test_explicit_height_width_with_resolution_floats_extent():
# height/width + resolution => extent = shape x resolution, anchored at the
# region origin. Resolution is honored exactly.
agg = from_template("conus", resolution=1000, height=4096, width=6144)
assert agg.shape == (4096, 6144)
assert agg.attrs["res"] == (1000.0, 1000.0)
left, bottom, right, top = _REGIONS["conus"]["bounds"]
# width * res out from the left anchor (pixel centers inset by half a cell)
assert np.isclose(agg.x.values[0], left + 1000.0 / 2)
assert np.isclose(agg.x.values[-1], left + (6144 - 0.5) * 1000.0)


def test_explicit_height_width_without_resolution_covers_bbox():
# height/width alone => resolution is derived so the exact shape spans the
# region bbox (the grid still covers the named study area).
agg = from_template("conus", height=600, width=1200)
assert agg.shape == (600, 1200)
left, bottom, right, top = _REGIONS["conus"]["bounds"]
res_x, res_y = agg.attrs["res"]
assert np.isclose(res_x, (right - left) / 1200)
assert np.isclose(res_y, (top - bottom) / 600)


@dask_array_available
def test_explicit_height_width_not_padded_on_dask():
# The user picked the exact (tile-friendly) shape; the dask path tiles it
# but does not pad it back out to a different size.
import dask.array as da
agg = from_template("conus", resolution=1000, height=4096, width=6144,
backend="dask")
assert isinstance(agg.data, da.Array)
assert agg.shape == (4096, 6144)


def test_partial_height_width_raises():
# height and width are an all-or-nothing pair.
with pytest.raises(ValueError, match="both height and width"):
from_template("conus", height=4096)
with pytest.raises(ValueError, match="both height and width"):
from_template("conus", width=6144)


def test_non_positive_height_width_raises():
with pytest.raises(ValueError, match="height and width must be positive"):
from_template("conus", height=0, width=10)
with pytest.raises(ValueError, match="height and width must be positive"):
from_template("conus", height=10, width=-5)


def test_oversized_explicit_shape_cap_message_names_height_width():
# The cell-cap message must point at the knob the caller actually set: on
# the height/width path that is height/width, not the derived resolution.
with pytest.raises(ValueError, match="exceeding") as exc:
from_template("conus", height=30000, width=30000) # 9e8 cells, eager
assert "height=30000" in str(exc.value)
assert "width=30000" in str(exc.value)
assert "resolution" not in str(exc.value)


@dask_array_available
def test_explicit_height_width_with_preserve():
# height/width compose with preserve=: the exact shape is anchored at the
# reprojected bbox and the CRS is the chosen EPSG, resolution derived.
agg = from_template("FRA", preserve="shape", height=300, width=400)
assert agg.shape == (300, 400)
assert agg.attrs["crs"] == 32630 # FRA centroid UTM 30N
assert agg.x.attrs["units"] == "m"
res_x, res_y = agg.attrs["res"]
assert res_x > 0 and res_y > 0


@dask_array_available
def test_explicit_height_width_dask_ragged_shape_stays_exact():
import dask.array as da
# An explicit shape that is not a block multiple is left exactly as asked
# (not padded); the dask path still tiles it into balanced blocks.
agg = from_template("conus", resolution=1000, height=5000, width=5000,
backend="dask")
assert isinstance(agg.data, da.Array)
assert agg.shape == (5000, 5000)
assert agg.data.npartitions > 1
for axis in agg.data.chunks:
assert sum(axis) == 5000
assert max(axis) - min(axis) <= 1 # balanced, no ragged sliver
Loading