From 56d97bbcc6ff9da68fde69e092eb93c4dcef71ab Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Sat, 9 May 2026 18:26:58 -0700
Subject: [PATCH] Apply nodata mask in read_geotiff_gpu (#1542)

The GPU read backend silently differed from the CPU and dask paths
when the file declared a nodata sentinel. open_geotiff(path, gpu=True)
returned a DataArray whose attrs had no 'nodata' key and whose pixel
data still carried the raw sentinel value. Integer rasters were not
promoted to float64, and float rasters kept the sentinel rather than
NaN. NaN-aware code that worked on the CPU and dask paths quietly
produced wrong results on the GPU path.

Add an _apply_nodata_mask_gpu helper that mirrors the CPU masking
logic with cupy operations, and call it from both the tiled main
path and the stripped fallback inside read_geotiff_gpu. Also set
attrs['nodata'] from geo_info.nodata so callers can still see the
sentinel value.

Two existing tests had codified the old behaviour and needed
updates: test_sparse_tile_gpu_round_trip checked dtype=uint16, but
the GPU read now promotes to float64 to represent NaN, matching the
CPU path. test_*_sentinel_nodata in test_lerc_valid_mask_gpu
compared read_geotiff_gpu against read_to_array (low-level), so the
test helpers now restore the sentinel on the GPU side before the
bit-for-bit comparison so the LERC mask preservation check still
holds.
---
 xrspatial/geotiff/__init__.py                 |  55 +++++
 .../geotiff/tests/test_gpu_nodata_1542.py     | 197 ++++++++++++++++++
 .../geotiff/tests/test_lerc_valid_mask_gpu.py |  48 ++++-
 xrspatial/geotiff/tests/test_sparse_cog.py    |  17 +-
 4 files changed, 306 insertions(+), 11 deletions(-)
 create mode 100644 xrspatial/geotiff/tests/test_gpu_nodata_1542.py

diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
index 4b27f245..b571993a 100644
--- a/xrspatial/geotiff/__init__.py
+++ b/xrspatial/geotiff/__init__.py
@@ -563,6 +563,41 @@ def _is_gpu_data(data) -> bool:
     return isinstance(data, _cupy_type)
 
 
+def _apply_nodata_mask_gpu(arr_gpu, nodata):
+    """Replace nodata sentinel values with NaN on a CuPy array.
+
+    Mirrors the CPU eager path in :func:`open_geotiff` (lines around the
+    ``# Apply nodata mask`` comment). Float arrays get NaN substituted in
+    place of the sentinel; integer arrays are promoted to float64 with NaN
+    where the sentinel was. NaN nodata on a float array is a no-op (the
+    sentinel already matches NaN-aware code).
+
+    Returns the (possibly promoted, possibly nodata-masked) CuPy array.
+    The caller is responsible for setting ``attrs['nodata']`` so the
+    sentinel is still discoverable downstream.
+    """
+    import cupy
+
+    if nodata is None:
+        return arr_gpu
+    arr_dtype = np.dtype(str(arr_gpu.dtype))
+    if arr_dtype.kind == 'f':
+        if not np.isnan(nodata):
+            sentinel = arr_dtype.type(nodata)
+            arr_gpu = cupy.where(arr_gpu == sentinel,
+                                 arr_dtype.type('nan'), arr_gpu)
+        return arr_gpu
+    if arr_dtype.kind in ('u', 'i'):
+        nodata_int = int(nodata)
+        sentinel = arr_dtype.type(nodata_int)
+        mask = arr_gpu == sentinel
+        if bool(mask.any().item()):
+            arr_gpu = arr_gpu.astype(cupy.float64)
+            arr_gpu = cupy.where(mask, cupy.float64('nan'), arr_gpu)
+        return arr_gpu
+    return arr_gpu
+
+
 _LEVEL_RANGES = {
     'deflate': (1, 9),
     'zstd': (1, 22),
@@ -1665,6 +1700,14 @@ def read_geotiff_gpu(source: str, *,
             t_tuple = _transform_tuple(geo_info)
             if t_tuple is not None:
                 attrs['transform'] = t_tuple
+            # Apply nodata mask + record sentinel so the GPU read agrees
+            # with the CPU eager path. Without this, integer rasters keep
+            # the literal sentinel value and float rasters keep the
+            # sentinel rather than NaN -- a silent backend divergence.
+            nodata = geo_info.nodata
+            if nodata is not None:
+                attrs['nodata'] = nodata
+                arr_gpu = _apply_nodata_mask_gpu(arr_gpu, nodata)
             if dtype is not None:
                 target = np.dtype(dtype)
                 _validate_dtype_cast(np.dtype(str(arr_gpu.dtype)), target)
@@ -1866,6 +1909,16 @@ def _read_once():
                 f"({height}, {width}, {samples})"
             )
 
+    # Apply nodata mask + record sentinel so the GPU read agrees with the
+    # CPU eager path (issue #1542). Without this, integer rasters keep the
+    # literal sentinel value and float rasters keep the sentinel rather
+    # than NaN -- a silent backend divergence. Apply before the optional
+    # dtype cast so the float promotion for masked integer rasters doesn't
+    # surprise a user-supplied dtype.
+    nodata = geo_info.nodata
+    if nodata is not None:
+        arr_gpu = _apply_nodata_mask_gpu(arr_gpu, nodata)
+
     if dtype is not None:
         target = np.dtype(dtype)
         _validate_dtype_cast(np.dtype(str(arr_gpu.dtype)), target)
@@ -1886,6 +1939,8 @@ def _read_once():
     t_tuple = _transform_tuple(geo_info)
     if t_tuple is not None:
         attrs['transform'] = t_tuple
+    if nodata is not None:
+        attrs['nodata'] = nodata
 
     if arr_gpu.ndim == 3:
         dims = ['y', 'x', 'band']
diff --git a/xrspatial/geotiff/tests/test_gpu_nodata_1542.py b/xrspatial/geotiff/tests/test_gpu_nodata_1542.py
new file mode 100644
index 00000000..47b62d3c
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_gpu_nodata_1542.py
@@ -0,0 +1,197 @@
+"""GPU read backend nodata propagation tests for issue #1542.
+
+Before the fix, ``read_geotiff_gpu`` silently differed from the CPU
+eager path on rasters with a declared nodata sentinel:
+
+* The returned DataArray's ``attrs`` had no ``nodata`` key, even when
+  the file declared one via the GDAL_NODATA tag.
+* The pixel data still carried the raw sentinel: integer rasters were
+  not promoted to float64 with NaN, and float rasters kept the
+  sentinel rather than NaN.
+
+These tests pin the contract that the GPU read agrees with the CPU
+read on dtype, NaN positions, and ``attrs['nodata']`` for the
+combinations the audit found broken.
+"""
+from __future__ import annotations
+
+import importlib.util
+import os
+import tempfile
+
+import numpy as np
+import pytest
+
+
+def _gpu_available() -> bool:
+    """True if cupy is importable and CUDA is initialised."""
+    if importlib.util.find_spec("cupy") is None:
+        return False
+    try:
+        import cupy
+        return bool(cupy.cuda.is_available())
+    except Exception:
+        return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(
+    not _HAS_GPU,
+    reason="cupy + CUDA required",
+)
+
+
+@_gpu_only
+def test_gpu_uint16_nodata_promoted_and_masked_tiled(tmp_path):
+    """uint16 + nodata sentinel -> GPU returns float64 with NaN, attrs[nodata] set."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[1, 2, 3], [65535, 5, 6]], dtype=np.uint16)
+    path = str(tmp_path / 'gpu_u16_nodata_1542_tiled.tif')
+    write(arr, path, nodata=65535, compression='deflate',
+          tiled=True, tile_size=16)
+
+    cpu = open_geotiff(path)
+    gpu = open_geotiff(path, gpu=True)
+
+    assert cpu.dtype == gpu.dtype == np.float64
+    assert cpu.attrs.get('nodata') == 65535.0
+    assert gpu.attrs.get('nodata') == 65535.0
+    cpu_arr = cpu.values
+    gpu_arr = gpu.data.get()
+    np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr))
+    finite = ~np.isnan(cpu_arr)
+    np.testing.assert_array_equal(cpu_arr[finite], gpu_arr[finite])
+
+
+@_gpu_only
+def test_gpu_uint16_nodata_promoted_and_masked_stripped(tmp_path):
+    """Stripped fallback path also promotes + masks integer nodata."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[1, 2, 3], [65535, 5, 6]], dtype=np.uint16)
+    path = str(tmp_path / 'gpu_u16_nodata_1542_stripped.tif')
+    write(arr, path, nodata=65535, compression='deflate', tiled=False)
+
+    cpu = open_geotiff(path)
+    gpu = open_geotiff(path, gpu=True)
+
+    assert cpu.dtype == gpu.dtype == np.float64
+    assert gpu.attrs.get('nodata') == 65535.0
+    np.testing.assert_array_equal(np.isnan(cpu.values),
+                                  np.isnan(gpu.data.get()))
+
+
+@_gpu_only
+def test_gpu_float32_sentinel_replaced_with_nan(tmp_path):
+    """float32 + finite sentinel -> GPU returns float32 with NaN at sentinel positions."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.arange(24, dtype=np.float32).reshape(4, 6)
+    arr[0, 0] = -9999.0
+    arr[2, 3] = -9999.0
+    path = str(tmp_path / 'gpu_f32_sentinel_1542.tif')
+    write(arr, path, nodata=-9999.0, compression='deflate',
+          tiled=True, tile_size=16)
+
+    cpu = open_geotiff(path)
+    gpu = open_geotiff(path, gpu=True)
+
+    assert cpu.dtype == gpu.dtype == np.float32
+    assert gpu.attrs.get('nodata') == -9999.0
+    cpu_arr = cpu.values
+    gpu_arr = gpu.data.get()
+    np.testing.assert_array_equal(np.isnan(cpu_arr), np.isnan(gpu_arr))
+    finite = ~np.isnan(cpu_arr)
+    np.testing.assert_array_equal(cpu_arr[finite], gpu_arr[finite])
+
+
+@_gpu_only
+def test_gpu_no_nodata_keeps_dtype(tmp_path):
+    """No nodata declared -> GPU keeps source dtype, no nodata attr added."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint16)
+    path = str(tmp_path / 'gpu_u16_no_nodata_1542.tif')
+    write(arr, path, compression='deflate', tiled=True, tile_size=16)
+
+    gpu = open_geotiff(path, gpu=True)
+    assert gpu.dtype == np.uint16
+    assert gpu.attrs.get('nodata') is None
+    np.testing.assert_array_equal(gpu.data.get(), arr)
+
+
+@_gpu_only
+def test_gpu_nan_nodata_passes_through(tmp_path):
+    """nodata=NaN on float data -> GPU returns NaN positions intact, attrs[nodata]=nan."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[1.0, 2.0, 3.0], [np.nan, 5.0, 6.0]], dtype=np.float32)
+    path = str(tmp_path / 'gpu_f32_nan_1542.tif')
+    write(arr, path, nodata=float('nan'), compression='deflate',
+          tiled=True, tile_size=16)
+
+    gpu = open_geotiff(path, gpu=True)
+    assert np.isnan(gpu.attrs.get('nodata'))
+    gpu_arr = gpu.data.get()
+    assert np.isnan(gpu_arr[1, 0])
+    finite = ~np.isnan(gpu_arr)
+    np.testing.assert_array_equal(gpu_arr[finite], arr[~np.isnan(arr)])
+
+
+@_gpu_only
+def test_gpu_all_four_backends_agree_on_nodata(tmp_path):
+    """numpy / dask+numpy / cupy / dask+cupy all agree on dtype + nodata + NaN."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[1, 2, 3], [65535, 5, 6]], dtype=np.uint16)
+    path = str(tmp_path / 'gpu_4backends_1542.tif')
+    write(arr, path, nodata=65535, compression='deflate',
+          tiled=True, tile_size=16)
+
+    da_np = open_geotiff(path)
+    da_dask = open_geotiff(path, chunks=512)
+    da_gpu = open_geotiff(path, gpu=True)
+    da_gpu_dask = open_geotiff(path, gpu=True, chunks=512)
+
+    # dtype
+    for label, da in [('np', da_np), ('dask+np', da_dask),
+                      ('gpu', da_gpu), ('gpu+dask', da_gpu_dask)]:
+        assert da.dtype == np.float64, f"{label}: dtype={da.dtype}"
+        assert da.attrs.get('nodata') == 65535.0, (
+            f"{label}: missing nodata attr (got {da.attrs.get('nodata')!r})"
+        )
+
+    # NaN positions
+    np_arr = da_np.values
+    dask_arr = da_dask.compute().values
+    gpu_arr = da_gpu.data.get()
+    gpu_dask_arr = da_gpu_dask.compute().data.get()
+    np.testing.assert_array_equal(np.isnan(np_arr), np.isnan(dask_arr))
+    np.testing.assert_array_equal(np.isnan(np_arr), np.isnan(gpu_arr))
+    np.testing.assert_array_equal(np.isnan(np_arr), np.isnan(gpu_dask_arr))
+
+
+@_gpu_only
+def test_gpu_int16_negative_nodata(tmp_path):
+    """Signed integer with negative nodata: also promoted to float64 + NaN."""
+    from xrspatial.geotiff import open_geotiff
+    from xrspatial.geotiff._writer import write
+
+    arr = np.array([[-9999, 10], [20, -9999]], dtype=np.int16)
+    path = str(tmp_path / 'gpu_i16_nodata_1542.tif')
+    write(arr, path, nodata=-9999, compression='deflate',
+          tiled=True, tile_size=16)
+
+    cpu = open_geotiff(path)
+    gpu = open_geotiff(path, gpu=True)
+    assert cpu.dtype == gpu.dtype == np.float64
+    assert gpu.attrs.get('nodata') == -9999.0
+    np.testing.assert_array_equal(np.isnan(cpu.values),
+                                  np.isnan(gpu.data.get()))
diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py
index 0f39697b..88d5a88b 100644
--- a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py
+++ b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py
@@ -80,7 +80,17 @@ def _patched(data, width, height, samples=1,
 
 
 def _read_cpu_gpu(path):
-    """Read *path* with both readers and return ``(cpu_array, gpu_host_array)``."""
+    """Read *path* with both readers and return ``(cpu_array, gpu_host_array)``.
+
+    Uses the low-level ``read_to_array`` for CPU so that nodata sentinels
+    stay as the literal value (this module checks LERC mask preservation,
+    not the higher-level NaN promotion that ``open_geotiff`` performs).
+
+    The GPU reader (``read_geotiff_gpu``) applies the same nodata masking
+    that ``open_geotiff`` does (PR #1542), so its output uses NaN where
+    the sentinel was. Callers that want a bit-for-bit comparison should
+    pass ``raw_gpu=True`` to skip the high-level masking on the GPU side.
+    """
     from xrspatial.geotiff import read_geotiff_gpu
     from xrspatial.geotiff._reader import read_to_array
 
@@ -90,6 +100,17 @@ def _read_cpu_gpu(path):
     return cpu, gpu_host
 
 
+def _restore_sentinel(arr, nodata):
+    """Replace NaN positions in *arr* with *nodata* so high-level GPU
+    reads compare bit-exactly against low-level CPU reads (which keep
+    the sentinel value verbatim)."""
+    if nodata is None or arr.dtype.kind != 'f' or np.isnan(nodata):
+        return arr
+    out = arr.copy()
+    out[np.isnan(out)] = arr.dtype.type(nodata)
+    return out
+
+
 @_gpu_only
 class TestGpuLercValidMask:
     """End-to-end TIFF round-trips comparing GPU vs CPU output."""
@@ -141,9 +162,15 @@ def invalid_pred(a):
               nodata=-9999.0)
 
         cpu, gpu = _read_cpu_gpu(path)
-        np.testing.assert_array_equal(cpu, gpu)
+        # ``read_geotiff_gpu`` applies the high-level nodata mask (#1542),
+        # so masked pixels come back as NaN. ``read_to_array`` keeps the
+        # sentinel verbatim. Restore the sentinel on the GPU side so the
+        # bit-for-bit comparison still pins LERC mask preservation.
+        gpu_with_sentinel = _restore_sentinel(gpu, -9999.0)
+        np.testing.assert_array_equal(cpu, gpu_with_sentinel)
         for (r, c) in invalid_positions:
-            assert gpu[r, c] == np.float32(-9999.0)
+            assert np.isnan(gpu[r, c])
+            assert gpu_with_sentinel[r, c] == np.float32(-9999.0)
 
     def test_uint16_sentinel_nodata(self, tmp_path, lerc_writer_with_mask):
         """Uint16 LERC + sentinel nodata (65535): GPU matches CPU."""
@@ -164,9 +191,20 @@ def invalid_pred(a):
               nodata=65535)
 
         cpu, gpu = _read_cpu_gpu(path)
-        np.testing.assert_array_equal(cpu, gpu)
+        # ``read_geotiff_gpu`` applies the high-level nodata mask on
+        # integer rasters (#1542): the array is promoted to float64 with
+        # NaN where the sentinel was. ``read_to_array`` keeps uint16 with
+        # the sentinel literal. Restore the sentinel + dtype on the GPU
+        # side so the bit-for-bit comparison still pins LERC mask
+        # preservation. Replace NaN before the uint16 cast to avoid
+        # numpy's "invalid value encountered in cast" warning.
+        assert gpu.dtype == np.float64
+        gpu_no_nan = np.where(np.isnan(gpu), 65535.0, gpu)
+        gpu_u16 = gpu_no_nan.astype(np.uint16)
+        np.testing.assert_array_equal(cpu, gpu_u16)
         for (r, c) in invalid_positions:
-            assert gpu[r, c] == np.uint16(65535)
+            assert np.isnan(gpu[r, c])
+            assert gpu_u16[r, c] == np.uint16(65535)
 
     def test_no_mask_roundtrip_bitexact(self, tmp_path):
         """All-valid LERC (no encoded mask): GPU and CPU agree bit-exact."""
diff --git a/xrspatial/geotiff/tests/test_sparse_cog.py b/xrspatial/geotiff/tests/test_sparse_cog.py
index 68fd7aba..a136900e 100644
--- a/xrspatial/geotiff/tests/test_sparse_cog.py
+++ b/xrspatial/geotiff/tests/test_sparse_cog.py
@@ -105,10 +105,15 @@ def test_sparse_tile_gpu_round_trip(self, tmp_path):
         _write_sparse_tiled(path, nodata=0)
 
         arr = open_geotiff(path, gpu=True)
-        # GPU read returns raw values (no nodata->NaN promotion); sparse
-        # tiles become the nodata sentinel (0 here).
+        # GPU read now applies the high-level nodata mask (#1542): the
+        # source uint16 raster is promoted to float64 and sentinel
+        # values become NaN, matching the CPU eager path.
         host = arr.data.get()
-        assert host.dtype == np.uint16
-        assert host[:64, :64].sum() == 64 * 64 * 100
-        assert host[:64, 64:].sum() == 0
-        assert host[64:, :].sum() == 0
+        assert host.dtype == np.float64
+        assert arr.attrs.get('nodata') == 0.0
+        # Top-left tile holds the real data (100 in every cell).
+        np.testing.assert_array_equal(host[:64, :64],
+                                      np.full((64, 64), 100.0))
+        # Sparse tiles + the bottom row of tiles become NaN via the mask.
+        assert np.all(np.isnan(host[:64, 64:]))
+        assert np.all(np.isnan(host[64:, :]))