diff --git a/README.md b/README.md index 8c3ed1d0..bd02ccfb 100644 --- a/README.md +++ b/README.md @@ -352,6 +352,8 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e | Name | Description | Source | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | |:----------:|:------------|:------:|:----------------------:|:--------------------:|:-------------------:|:------:| | [Preview](xrspatial/preview.py) | Downsamples a raster to target pixel dimensions for visualization | Custom | ✅️ | ✅️ | ✅️ | 🔄 | +| [Rescale](xrspatial/normalize.py) | Min-max normalization to a target range (default [0, 1]) | Standard | ✅️ | ✅️ | ✅️ | ✅️ | +| [Standardize](xrspatial/normalize.py) | Z-score normalization (subtract mean, divide by std) | Standard | ✅️ | ✅️ | ✅️ | ✅️ | #### Usage diff --git a/docs/source/reference/utilities.rst b/docs/source/reference/utilities.rst index 938062d2..a7b15c61 100644 --- a/docs/source/reference/utilities.rst +++ b/docs/source/reference/utilities.rst @@ -46,6 +46,14 @@ Preview xrspatial.preview.preview +Normalization +============= +.. autosummary:: + :toctree: _autosummary + + xrspatial.normalize.rescale + xrspatial.normalize.standardize + Diagnostics =========== .. autosummary:: diff --git a/examples/user_guide/34_Normalization.ipynb b/examples/user_guide/34_Normalization.ipynb new file mode 100644 index 00000000..03518ffe --- /dev/null +++ b/examples/user_guide/34_Normalization.ipynb @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "x2762y1yuan", + "source": "# Normalization: rescale and standardize\n\nTwo common preprocessing steps before combining rasters or feeding them into models:\n\n- **rescale** maps values to a target range (default [0, 1]) using min-max normalization.\n- **standardize** centers values at zero with unit variance (z-score normalization).\n\nBoth functions handle NaN and infinite values (they pass through unchanged) and work on all four xarray-spatial backends: NumPy, CuPy, Dask+NumPy, and Dask+CuPy.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "wzy5yzkbde", + "source": "%matplotlib inline\nimport numpy as np\nimport xarray as xr\nimport matplotlib.pyplot as plt\n\nfrom xrspatial.normalize import rescale, standardize\nfrom xrspatial import generate_terrain", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "5zey0oy2cji", + "source": "## Synthetic terrain\n\nGenerate a 500x500 elevation raster with values roughly in the 0-1200 range. We'll sprinkle in a few NaN cells to show how they're preserved.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "1n68h492hdu", + "source": "terrain = generate_terrain(canvas=xr.DataArray(np.zeros((500, 500)), dims=['y', 'x']))\n\n# Add some NaN holes\nrng = np.random.default_rng(42)\nmask = rng.random(terrain.shape) < 0.005\nterrain.values[mask] = np.nan\n\nprint(f\"Shape: {terrain.shape}\")\nprint(f\"Range: {float(np.nanmin(terrain)):.1f} to {float(np.nanmax(terrain)):.1f}\")\nprint(f\"NaN cells: {int(np.isnan(terrain.values).sum())}\")\n\nfig, ax = plt.subplots(figsize=(7, 6))\nterrain.plot.imshow(ax=ax, cmap='terrain', add_colorbar=True,\n cbar_kwargs={'label': 'Elevation'})\nax.set_title('Raw elevation')\nax.set_axis_off()\nplt.tight_layout()", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "h7fv68zd0dq", + "source": "## rescale: min-max normalization\n\nBy default, `rescale()` maps finite values to [0, 1]. You can supply a custom range with `new_min` and `new_max`.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "z9j191ufbs", + "source": "scaled_01 = rescale(terrain)\nscaled_byte = rescale(terrain, new_min=0, new_max=255)\n\nfig, axes = plt.subplots(1, 3, figsize=(18, 5))\n\nterrain.plot.imshow(ax=axes[0], cmap='terrain', add_colorbar=True)\naxes[0].set_title('Original')\naxes[0].set_axis_off()\n\nscaled_01.plot.imshow(ax=axes[1], cmap='terrain', add_colorbar=True)\naxes[1].set_title('rescale() -> [0, 1]')\naxes[1].set_axis_off()\n\nscaled_byte.plot.imshow(ax=axes[2], cmap='terrain', add_colorbar=True)\naxes[2].set_title('rescale(0, 255)')\naxes[2].set_axis_off()\n\nplt.tight_layout()\n\nprint(f\"[0,1] range: {float(np.nanmin(scaled_01)):.4f} to {float(np.nanmax(scaled_01)):.4f}\")\nprint(f\"[0,255] range: {float(np.nanmin(scaled_byte)):.1f} to {float(np.nanmax(scaled_byte)):.1f}\")\nprint(f\"NaN preserved: {int(np.isnan(scaled_01.values).sum())} cells\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "u8clrxl4r2i", + "source": "## standardize: z-score normalization\n\n`standardize()` subtracts the mean and divides by the standard deviation of finite values. The result has mean ~0 and std ~1. Use `ddof=1` for sample standard deviation.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "e64naydwlya", + "source": "zscored = standardize(terrain)\n\nfig, axes = plt.subplots(1, 2, figsize=(14, 5))\n\nterrain.plot.imshow(ax=axes[0], cmap='terrain', add_colorbar=True)\naxes[0].set_title('Original')\naxes[0].set_axis_off()\n\nzscored.plot.imshow(ax=axes[1], cmap='RdBu_r', add_colorbar=True,\n cbar_kwargs={'label': 'Z-score'})\naxes[1].set_title('standardize()')\naxes[1].set_axis_off()\n\nplt.tight_layout()\n\nfinite = zscored.values[np.isfinite(zscored.values)]\nprint(f\"Mean: {finite.mean():.2e}\")\nprint(f\"Std: {finite.std():.6f}\")\nprint(f\"Range: {finite.min():.3f} to {finite.max():.3f}\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "0ql3g8hvphj", + "source": "## Practical use case: combining layers with different scales\n\nWhen combining elevation and slope into a composite index, the raw values live on different scales. Rescaling both to [0, 1] puts them on equal footing.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "9hhldlalk7f", + "source": "from xrspatial import slope\n\nslp = slope(terrain)\n\n# Raw values are on very different scales\nprint(f\"Elevation range: {float(np.nanmin(terrain)):.0f} to {float(np.nanmax(terrain)):.0f}\")\nprint(f\"Slope range: {float(np.nanmin(slp)):.1f} to {float(np.nanmax(slp)):.1f}\")\n\n# Rescale both to [0, 1] and combine\nelev_norm = rescale(terrain)\nslope_norm = rescale(slp)\n\n# Simple composite: high elevation + steep slope = high risk\ncomposite = 0.6 * elev_norm + 0.4 * slope_norm\n\nfig, axes = plt.subplots(1, 3, figsize=(18, 5))\n\nelev_norm.plot.imshow(ax=axes[0], cmap='terrain', add_colorbar=True)\naxes[0].set_title('Elevation [0, 1]')\naxes[0].set_axis_off()\n\nslope_norm.plot.imshow(ax=axes[1], cmap='YlOrRd', add_colorbar=True)\naxes[1].set_title('Slope [0, 1]')\naxes[1].set_axis_off()\n\ncomposite.plot.imshow(ax=axes[2], cmap='inferno', add_colorbar=True)\naxes[2].set_title('Weighted composite (0.6 elev + 0.4 slope)')\naxes[2].set_axis_off()\n\nplt.tight_layout()", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "c6w0w124c4c", + "source": "## Accessor syntax\n\nBoth functions are available through the `.xrs` accessor on DataArrays.\n\n```python\nimport xrspatial\n\nterrain.xrs.rescale()\nterrain.xrs.standardize(ddof=1)\n```", + "metadata": {} + }, + { + "cell_type": "code", + "id": "gcc8pi1a7u", + "source": "import xrspatial # registers .xrs accessor\n\naccessor_result = terrain.xrs.rescale()\nnp.testing.assert_array_equal(accessor_result.values, scaled_01.values)\nprint(\"Accessor output matches function output.\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/xrspatial/__init__.py b/xrspatial/__init__.py index 3eb227e9..11aa3613 100644 --- a/xrspatial/__init__.py +++ b/xrspatial/__init__.py @@ -67,6 +67,8 @@ from xrspatial.multispectral import sipi # noqa from xrspatial.pathfinding import a_star_search # noqa from xrspatial.pathfinding import multi_stop_search # noqa +from xrspatial.normalize import rescale # noqa +from xrspatial.normalize import standardize # noqa from xrspatial.perlin import perlin # noqa from xrspatial.preview import preview # noqa from xrspatial.proximity import allocation # noqa diff --git a/xrspatial/accessor.py b/xrspatial/accessor.py index 82a0ce28..71211aaf 100644 --- a/xrspatial/accessor.py +++ b/xrspatial/accessor.py @@ -321,6 +321,16 @@ def preview(self, **kwargs): from .preview import preview return preview(self._obj, **kwargs) + # ---- Normalization ---- + + def rescale(self, **kwargs): + from .normalize import rescale + return rescale(self._obj, **kwargs) + + def standardize(self, **kwargs): + from .normalize import standardize + return standardize(self._obj, **kwargs) + # ---- Raster to vector ---- def polygonize(self, **kwargs): @@ -637,6 +647,16 @@ def preview(self, **kwargs): from .preview import preview return preview(self._obj, **kwargs) + # ---- Normalization ---- + + def rescale(self, **kwargs): + from .normalize import rescale + return rescale(self._obj, **kwargs) + + def standardize(self, **kwargs): + from .normalize import standardize + return standardize(self._obj, **kwargs) + # ---- Fire ---- def burn_severity_class(self, **kwargs): diff --git a/xrspatial/normalize.py b/xrspatial/normalize.py new file mode 100644 index 00000000..9c663f3e --- /dev/null +++ b/xrspatial/normalize.py @@ -0,0 +1,326 @@ +"""Numeric normalization utilities for raster data. + +``rescale`` performs min-max normalization to a target range. +``standardize`` performs z-score normalization (subtract mean, divide by std). +""" + +from __future__ import annotations + +from functools import partial + +import numpy as np +import xarray as xr + +try: + import cupy +except ImportError: + class cupy: + ndarray = False + +try: + import dask.array as da +except ImportError: + da = None + +from xrspatial.dataset_support import supports_dataset +from xrspatial.utils import ( + ArrayTypeFunctionMapping, + _validate_raster, + _validate_scalar, + ngjit, + not_implemented_func, +) + + +# --------------------------------------------------------------------------- +# rescale -- min-max normalization +# --------------------------------------------------------------------------- + +@ngjit +def _cpu_rescale(data, data_min, data_range, new_min, new_range): + out = np.empty(data.shape, dtype=np.float64) + out[:] = np.nan + rows, cols = data.shape + for y in range(rows): + for x in range(cols): + val = data[y, x] + if np.isfinite(val): + if data_range == 0.0: + out[y, x] = new_min + else: + out[y, x] = (val - data_min) / data_range * new_range + new_min + return out + + +def _run_numpy_rescale(data, new_min, new_max): + finite = data[np.isfinite(data)] + if finite.size == 0: + return np.full(data.shape, np.nan, dtype=np.float64) + data_min = float(finite.min()) + data_max = float(finite.max()) + data_range = data_max - data_min + new_range = new_max - new_min + return _cpu_rescale(data, data_min, data_range, new_min, new_range) + + +def _run_dask_numpy_rescale(data, new_min, new_max): + # Compute global stats first (returns scalars), then map element-wise. + finite_mask = da.isfinite(data) + finite_vals = data[finite_mask] + data_min = finite_vals.min() + data_max = finite_vals.max() + + new_range = new_max - new_min + data_range = data_max - data_min + + out = da.where( + finite_mask, + da.where( + data_range == 0, + new_min, + (data - data_min) / data_range * new_range + new_min, + ), + np.nan, + ) + return out + + +def _run_cupy_rescale(data, new_min, new_max): + finite_mask = cupy.isfinite(data) + finite_vals = data[finite_mask] + if finite_vals.size == 0: + return cupy.full(data.shape, cupy.nan, dtype=cupy.float64) + data_min = float(finite_vals.min()) + data_max = float(finite_vals.max()) + data_range = data_max - data_min + new_range = new_max - new_min + + out = cupy.full(data.shape, cupy.nan, dtype=cupy.float64) + if data_range == 0.0: + out[finite_mask] = new_min + else: + out[finite_mask] = ( + (data[finite_mask] - data_min) / data_range * new_range + new_min + ) + return out + + +def _run_dask_cupy_rescale(data, new_min, new_max): + # Same lazy approach as dask+numpy; dask dispatches to cupy chunks. + finite_mask = da.isfinite(data) + finite_vals = data[finite_mask] + data_min = finite_vals.min() + data_max = finite_vals.max() + + new_range = new_max - new_min + data_range = data_max - data_min + + out = da.where( + finite_mask, + da.where( + data_range == 0, + new_min, + (data - data_min) / data_range * new_range + new_min, + ), + np.nan, + ) + return out + + +@supports_dataset +def rescale(agg, new_min=0.0, new_max=1.0, name='rescale'): + """Min-max normalization of a raster to a target range. + + Linearly maps finite values from ``[data_min, data_max]`` to + ``[new_min, new_max]``. NaN and infinite values pass through + unchanged. If all finite values are equal the output is filled + with *new_min*. + + Parameters + ---------- + agg : xr.DataArray or xr.Dataset + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array. + new_min : float, default=0.0 + Lower bound of the output range. + new_max : float, default=1.0 + Upper bound of the output range. + name : str, default='rescale' + Name of the output DataArray. + + Returns + ------- + rescaled : xr.DataArray or xr.Dataset + Rescaled raster with the same shape, dims, coords, and attrs. + If *agg* is a Dataset, each variable is rescaled independently. + + Examples + -------- + .. sourcecode:: python + + >>> import numpy as np + >>> import xarray as xr + >>> from xrspatial.normalize import rescale + + >>> data = np.array([ + [np.nan, 0., 5.], + [10., 20., 30.], + ], dtype=np.float64) + >>> agg = xr.DataArray(data) + >>> rescale(agg) + + array([[ nan, 0. , 0.16666667], + [0.33333333, 0.66666667, 1. ]]) + Dimensions without coordinates: dim_0, dim_1 + """ + _validate_raster(agg, func_name='rescale', name='agg', ndim=None) + _validate_scalar(new_min, func_name='rescale', name='new_min') + _validate_scalar(new_max, func_name='rescale', name='new_max') + + if new_min > new_max: + raise ValueError( + f"new_min ({new_min}) must be <= new_max ({new_max})" + ) + + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy_rescale, + dask_func=_run_dask_numpy_rescale, + cupy_func=_run_cupy_rescale, + dask_cupy_func=_run_dask_cupy_rescale, + ) + out = mapper(agg)(agg.data, new_min, new_max) + return xr.DataArray( + out, name=name, dims=agg.dims, coords=agg.coords, attrs=agg.attrs, + ) + + +# --------------------------------------------------------------------------- +# standardize -- z-score normalization +# --------------------------------------------------------------------------- + +@ngjit +def _cpu_standardize(data, mean, std): + out = np.empty(data.shape, dtype=np.float64) + out[:] = np.nan + rows, cols = data.shape + for y in range(rows): + for x in range(cols): + val = data[y, x] + if np.isfinite(val): + if std == 0.0: + out[y, x] = 0.0 + else: + out[y, x] = (val - mean) / std + return out + + +def _run_numpy_standardize(data, ddof): + finite = data[np.isfinite(data)] + if finite.size == 0: + return np.full(data.shape, np.nan, dtype=np.float64) + mean = float(finite.mean()) + std = float(finite.std(ddof=ddof)) + return _cpu_standardize(data, mean, std) + + +def _run_dask_numpy_standardize(data, ddof): + finite_mask = da.isfinite(data) + finite_vals = data[finite_mask] + mean = finite_vals.mean() + std = finite_vals.std(ddof=ddof) + + out = da.where( + finite_mask, + da.where(std == 0, 0.0, (data - mean) / std), + np.nan, + ) + return out + + +def _run_cupy_standardize(data, ddof): + finite_mask = cupy.isfinite(data) + finite_vals = data[finite_mask] + if finite_vals.size == 0: + return cupy.full(data.shape, cupy.nan, dtype=cupy.float64) + mean = float(finite_vals.mean()) + std = float(finite_vals.std(ddof=ddof)) + + out = cupy.full(data.shape, cupy.nan, dtype=cupy.float64) + if std == 0.0: + out[finite_mask] = 0.0 + else: + out[finite_mask] = (data[finite_mask] - mean) / std + return out + + +def _run_dask_cupy_standardize(data, ddof): + finite_mask = da.isfinite(data) + finite_vals = data[finite_mask] + mean = finite_vals.mean() + std = finite_vals.std(ddof=ddof) + + out = da.where( + finite_mask, + da.where(std == 0, 0.0, (data - mean) / std), + np.nan, + ) + return out + + +@supports_dataset +def standardize(agg, ddof=0, name='standardize'): + """Z-score normalization of a raster. + + Subtracts the mean and divides by the standard deviation of finite + values. NaN and infinite values pass through unchanged. If all + finite values are identical (std == 0), the output is filled with + 0.0 for those cells. + + Parameters + ---------- + agg : xr.DataArray or xr.Dataset + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array. + ddof : int, default=0 + Delta degrees of freedom for standard deviation. Use 0 for + population std, 1 for sample std. + name : str, default='standardize' + Name of the output DataArray. + + Returns + ------- + standardized : xr.DataArray or xr.Dataset + Z-score normalized raster with the same shape, dims, coords, + and attrs. If *agg* is a Dataset, each variable is + standardized independently. + + Examples + -------- + .. sourcecode:: python + + >>> import numpy as np + >>> import xarray as xr + >>> from xrspatial.normalize import standardize + + >>> data = np.array([ + [np.nan, 1., 2.], + [3., 4., 5.], + ], dtype=np.float64) + >>> agg = xr.DataArray(data) + >>> standardize(agg) + + array([[ nan, -1.41421356, -0.70710678], + [ 0. , 0.70710678, 1.41421356]]) + Dimensions without coordinates: dim_0, dim_1 + """ + _validate_raster(agg, func_name='standardize', name='agg', ndim=None) + _validate_scalar(ddof, func_name='standardize', name='ddof', dtype=int, min_val=0) + + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy_standardize, + dask_func=_run_dask_numpy_standardize, + cupy_func=_run_cupy_standardize, + dask_cupy_func=_run_dask_cupy_standardize, + ) + out = mapper(agg)(agg.data, ddof) + return xr.DataArray( + out, name=name, dims=agg.dims, coords=agg.coords, attrs=agg.attrs, + ) diff --git a/xrspatial/tests/test_normalize.py b/xrspatial/tests/test_normalize.py new file mode 100644 index 00000000..dc20756b --- /dev/null +++ b/xrspatial/tests/test_normalize.py @@ -0,0 +1,311 @@ +"""Tests for xrspatial.normalize (rescale and standardize).""" + +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.normalize import rescale, standardize +from xrspatial.tests.general_checks import ( + create_test_raster, + cuda_and_cupy_available, + dask_array_available, + general_output_checks, +) + + +# ---- helpers --------------------------------------------------------------- + +def _make_data(): + """6x4 float64 array with NaN in first row and an inf.""" + return np.array([ + [np.nan, np.nan, np.nan, np.nan], + [10., 20., 30., 40.], + [50., 60., 70., 80.], + [0., 5., 15., 25.], + [35., 45., 55., 65.], + [90., 100., np.inf, 75.], + ], dtype=np.float64) + + +# ---- rescale --------------------------------------------------------------- + +class TestRescaleNumpy: + + def test_default_range(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.nanmin(finite) == pytest.approx(0.0) + assert np.nanmax(finite) == pytest.approx(1.0) + + def test_custom_range(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = rescale(agg, new_min=0.0, new_max=255.0) + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.nanmin(finite) == pytest.approx(0.0) + assert np.nanmax(finite) == pytest.approx(255.0) + + def test_nan_passthrough(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + # First row was all NaN. + assert np.all(np.isnan(result.values[0, :])) + + def test_inf_passthrough(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + # inf stays as nan in output (not finite -> nan). + assert np.isnan(result.values[5, 2]) + + def test_constant_raster(self): + data = np.full((4, 4), 42.0) + agg = create_test_raster(data, backend='numpy') + result = rescale(agg, new_min=0.0, new_max=1.0) + # All values equal -> output should be new_min. + np.testing.assert_allclose(result.values, 0.0) + + def test_all_nan(self): + data = np.full((3, 3), np.nan) + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + assert np.all(np.isnan(result.values)) + + def test_single_cell(self): + data = np.array([[7.0]]) + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + # Single finite value, range=0 -> output = new_min = 0.0 + assert result.values[0, 0] == pytest.approx(0.0) + + def test_known_values(self): + data = np.array([ + [0., 50., 100.], + [25., 75., np.nan], + ], dtype=np.float64) + agg = create_test_raster(data, backend='numpy') + result = rescale(agg) + expected = np.array([ + [0.0, 0.5, 1.0], + [0.25, 0.75, np.nan], + ]) + np.testing.assert_allclose(result.values, expected, rtol=1e-10) + + def test_new_min_gt_new_max_raises(self): + data = np.array([[1., 2.], [3., 4.]]) + agg = create_test_raster(data, backend='numpy') + with pytest.raises(ValueError, match="new_min.*new_max"): + rescale(agg, new_min=10.0, new_max=0.0) + + def test_preserves_coords_attrs(self): + data = np.arange(12, dtype=np.float64).reshape(3, 4) + attrs = {'res': (1.0, 1.0), 'crs': 'EPSG:4326'} + agg = create_test_raster(data, backend='numpy', attrs=attrs) + result = rescale(agg) + assert result.attrs == agg.attrs + assert result.dims == agg.dims + + +@dask_array_available +class TestRescaleDask: + + def test_default_range(self): + data = _make_data() + agg = create_test_raster(data, backend='dask+numpy', chunks=(3, 4)) + result = rescale(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.nanmin(finite) == pytest.approx(0.0) + assert np.nanmax(finite) == pytest.approx(1.0) + + def test_custom_range(self): + data = _make_data() + agg = create_test_raster(data, backend='dask+numpy', chunks=(3, 4)) + result = rescale(agg, new_min=-1.0, new_max=1.0) + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.nanmin(finite) == pytest.approx(-1.0) + assert np.nanmax(finite) == pytest.approx(1.0) + + def test_matches_numpy(self): + data = _make_data() + np_agg = create_test_raster(data, backend='numpy') + dk_agg = create_test_raster(data, backend='dask+numpy', chunks=(3, 4)) + np_result = rescale(np_agg).values + dk_result = rescale(dk_agg).values + np.testing.assert_allclose(dk_result, np_result, equal_nan=True) + + +@cuda_and_cupy_available +class TestRescaleCuPy: + + def test_default_range(self): + data = _make_data() + agg = create_test_raster(data, backend='cupy') + result = rescale(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.data.get() + finite = vals[np.isfinite(vals)] + assert np.nanmin(finite) == pytest.approx(0.0) + assert np.nanmax(finite) == pytest.approx(1.0) + + def test_matches_numpy(self): + data = _make_data() + np_agg = create_test_raster(data, backend='numpy') + cu_agg = create_test_raster(data, backend='cupy') + np_result = rescale(np_agg).values + cu_result = rescale(cu_agg).data.get() + np.testing.assert_allclose(cu_result, np_result, equal_nan=True) + + +# ---- standardize ----------------------------------------------------------- + +class TestStandardizeNumpy: + + def test_default(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.mean(finite) == pytest.approx(0.0, abs=1e-10) + assert np.std(finite) == pytest.approx(1.0, abs=1e-10) + + def test_ddof_1(self): + data = np.array([ + [10., 20., 30.], + [40., 50., 60.], + ], dtype=np.float64) + agg = create_test_raster(data, backend='numpy') + result = standardize(agg, ddof=1) + vals = result.values.ravel() + # With ddof=1, std of output with population std != 1 + # but the formula (x-mean)/std(ddof=1) is applied consistently. + expected_mean = np.mean(data.ravel()) + expected_std = np.std(data.ravel(), ddof=1) + expected = (data - expected_mean) / expected_std + np.testing.assert_allclose(result.values, expected, rtol=1e-10) + + def test_nan_passthrough(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + assert np.all(np.isnan(result.values[0, :])) + + def test_inf_passthrough(self): + data = _make_data() + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + assert np.isnan(result.values[5, 2]) + + def test_constant_raster(self): + data = np.full((4, 4), 42.0) + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + # std == 0 -> output should be 0.0 + np.testing.assert_allclose(result.values, 0.0) + + def test_all_nan(self): + data = np.full((3, 3), np.nan) + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + assert np.all(np.isnan(result.values)) + + def test_single_cell(self): + data = np.array([[7.0]]) + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + # std == 0 -> 0.0 + assert result.values[0, 0] == pytest.approx(0.0) + + def test_known_values(self): + data = np.array([ + [1., 2., 3.], + [4., 5., np.nan], + ], dtype=np.float64) + agg = create_test_raster(data, backend='numpy') + result = standardize(agg) + finite_data = np.array([1., 2., 3., 4., 5.]) + mean = finite_data.mean() + std = finite_data.std(ddof=0) + expected = np.array([ + [(1 - mean) / std, (2 - mean) / std, (3 - mean) / std], + [(4 - mean) / std, (5 - mean) / std, np.nan], + ]) + np.testing.assert_allclose(result.values, expected, rtol=1e-10) + + def test_invalid_ddof_raises(self): + data = np.array([[1., 2.], [3., 4.]]) + agg = create_test_raster(data, backend='numpy') + with pytest.raises((TypeError, ValueError)): + standardize(agg, ddof=-1) + with pytest.raises((TypeError, ValueError)): + standardize(agg, ddof=1.5) + + def test_preserves_coords_attrs(self): + data = np.arange(12, dtype=np.float64).reshape(3, 4) + attrs = {'res': (1.0, 1.0), 'crs': 'EPSG:4326'} + agg = create_test_raster(data, backend='numpy', attrs=attrs) + result = standardize(agg) + assert result.attrs == agg.attrs + assert result.dims == agg.dims + + +@dask_array_available +class TestStandardizeDask: + + def test_default(self): + data = _make_data() + agg = create_test_raster(data, backend='dask+numpy', chunks=(3, 4)) + result = standardize(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.values + finite = vals[np.isfinite(vals)] + assert np.mean(finite) == pytest.approx(0.0, abs=1e-10) + assert np.std(finite) == pytest.approx(1.0, abs=1e-10) + + def test_matches_numpy(self): + data = _make_data() + np_agg = create_test_raster(data, backend='numpy') + dk_agg = create_test_raster(data, backend='dask+numpy', chunks=(3, 4)) + np_result = standardize(np_agg).values + dk_result = standardize(dk_agg).values + np.testing.assert_allclose(dk_result, np_result, equal_nan=True) + + +@cuda_and_cupy_available +class TestStandardizeCuPy: + + def test_default(self): + data = _make_data() + agg = create_test_raster(data, backend='cupy') + result = standardize(agg) + general_output_checks(agg, result, verify_attrs=True) + + vals = result.data.get() + finite = vals[np.isfinite(vals)] + assert np.mean(finite) == pytest.approx(0.0, abs=1e-10) + assert np.std(finite) == pytest.approx(1.0, abs=1e-10) + + def test_matches_numpy(self): + data = _make_data() + np_agg = create_test_raster(data, backend='numpy') + cu_agg = create_test_raster(data, backend='cupy') + np_result = standardize(np_agg).values + cu_result = standardize(cu_agg).data.get() + np.testing.assert_allclose(cu_result, np_result, equal_nan=True)