From cc63a20c8605ff27fe6eb05df115644d6a7a4b1c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 8 May 2026 08:38:58 -0700 Subject: [PATCH 1/3] read_geotiff_gpu: warn on CPU fallback, add gpu='strict' (#1516) The GPU decode path was wrapped in a too-broad ``try/except Exception: pass`` that silently fell back to CPU on any error. Real GPU bugs (#1508 was an AttributeError) lived undetected because the user-visible result stayed correct. Behaviour now: - Default ``gpu='auto'`` keeps the CPU fallback but emits a ``RuntimeWarning`` that includes the original exception type and message. Stacklevel=2 so the warning points at the caller. - New ``gpu='strict'`` re-raises the original exception instead of falling back. Useful for tests and CI that exercise the GPU fast path. - Unknown ``gpu=`` values raise ``ValueError`` early. Catch is still ``Exception`` rather than a narrower set: CUDA failures arrive as many distinct types (``cuda.RuntimeError``, ``CudaAPIError``, ``CUDARuntimeError``, plain ``RuntimeError``, ...). ``BaseException`` is intentionally not caught so KeyboardInterrupt and SystemExit propagate. Test ``xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py`` monkeypatches ``gpu_decode_tiles_from_file`` to raise a synthetic ``RuntimeError`` and asserts the warn-and-fallback and strict-reraise paths. Tests do not need a real GPU; a ``cupy`` shim is installed when the real package is unavailable. Closes #1516 --- xrspatial/geotiff/__init__.py | 31 +++- .../tests/test_gpu_strict_fallback_1516.py | 157 ++++++++++++++++++ 2 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 23b2cbeb..f91cef96 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -13,6 +13,8 @@ """ from __future__ import annotations +import warnings + import numpy as np import xarray as xr @@ -1399,7 +1401,8 @@ def read_geotiff_gpu(source: str, *, overview_level: int | None = None, name: str | None = None, chunks: int | tuple | None = None, - max_pixels: int | None = None) -> xr.DataArray: + max_pixels: int | None = None, + gpu: str = 'auto') -> xr.DataArray: """Read a GeoTIFF with GPU-accelerated decompression via Numba CUDA. Decompresses all tiles in parallel on the GPU and returns a @@ -1425,12 +1428,26 @@ def read_geotiff_gpu(source: str, *, max_pixels : int or None Maximum allowed pixel count (width * height * samples). None uses the default (~1 billion). + gpu : {'auto', 'strict'}, default 'auto' + Behaviour when the GPU decode path raises an exception. + + - ``'auto'``: emit a ``RuntimeWarning`` reporting the original + exception type and message, then fall back to the CPU path + and transfer the result onto the GPU. This preserves + backward-compatible behaviour while making GPU regressions + visible. + - ``'strict'``: re-raise the original exception so GPU bugs + surface immediately. Useful in tests and CI for the GPU + fast path. Returns ------- xr.DataArray CuPy-backed DataArray on GPU device. """ + if gpu not in ('auto', 'strict'): + raise ValueError( + f"gpu must be 'auto' or 'strict', got {gpu!r}") try: import cupy except ImportError: @@ -1537,8 +1554,16 @@ def read_geotiff_gpu(source: str, *, compression, predictor, file_dtype, samples, byte_order=header.byte_order, ) - except Exception: - pass + except Exception as e: + if gpu == 'strict': + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=2, + ) + arr_gpu = None if arr_gpu is None: # Fallback: extract tiles via CPU mmap, then GPU decode diff --git a/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py new file mode 100644 index 00000000..9b721a9d --- /dev/null +++ b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py @@ -0,0 +1,157 @@ +"""Regression tests for issue #1516. + +``read_geotiff_gpu`` previously wrapped the GPU decode in a too-broad +``try/except Exception: pass`` that silently swallowed any failure and +fell through to the CPU path. Real GPU regressions (#1508 was an +``AttributeError``) lived undetected because the user-visible result +was still numerically correct. + +The fix: + +1. Default ``gpu='auto'`` still falls back to CPU, but emits a + ``RuntimeWarning`` reporting the original exception type and + message so failures are visible. +2. New ``gpu='strict'`` mode re-raises instead of falling back, so + tests and CI for the GPU fast path see real errors. + +These tests monkeypatch ``gpu_decode_tiles_from_file`` to raise a +synthetic exception. They do not require a real GPU because we stub +``cupy`` at the ``sys.modules`` level when it is not already +available; ``cupy.asarray`` is only called in the CPU-fallback branch +and is satisfied by a thin numpy-backed shim. +""" +from __future__ import annotations + +import importlib +import sys +import types + +import numpy as np +import pytest + +from .conftest import make_minimal_tiff + + +def _ensure_cupy_stub(): + """Install a numpy-backed ``cupy`` shim if cupy is unavailable. + + The CPU fallback path inside ``read_geotiff_gpu`` calls + ``cupy.asarray(arr_cpu)`` to upload the CPU result onto the GPU. + On CPU-only CI we replace cupy with a numpy passthrough so the + function still returns a DataArray we can assert on. + """ + if 'cupy' in sys.modules: + return False # real cupy already imported, leave it alone + try: + import cupy # noqa: F401 + return False + except ImportError: + pass + + stub = types.ModuleType('cupy') + stub.ndarray = np.ndarray + stub.asarray = np.asarray + + cuda_mod = types.ModuleType('cupy.cuda') + + def _is_available(): + return False + + cuda_mod.is_available = _is_available + stub.cuda = cuda_mod + + sys.modules['cupy'] = stub + sys.modules['cupy.cuda'] = cuda_mod + return True + + +@pytest.fixture +def tiled_tiff_path(tmp_path): + """A small tiled TIFF on disk that exercises the GPU tile path.""" + data = np.arange(64, dtype=np.float32).reshape(8, 8) + raw = make_minimal_tiff( + 8, 8, np.dtype('float32'), + pixel_data=data, + tiled=True, + tile_size=4, + ) + path = tmp_path / "strict_fallback_1516.tif" + path.write_bytes(raw) + return str(path), data + + +def _patch_gpu_decode_to_raise(monkeypatch, exc): + """Replace ``gpu_decode_tiles_from_file`` with one that raises ``exc``.""" + from xrspatial.geotiff import _gpu_decode + + def _boom(*args, **kwargs): + raise exc + + monkeypatch.setattr( + _gpu_decode, 'gpu_decode_tiles_from_file', _boom, raising=True, + ) + + +def test_default_mode_warns_on_gpu_failure(tiled_tiff_path, monkeypatch): + """Default ``gpu='auto'`` warns and falls back to the CPU result.""" + inserted_stub = _ensure_cupy_stub() + try: + from xrspatial.geotiff import read_geotiff_gpu + + path, expected = tiled_tiff_path + + synthetic = RuntimeError("simulated GPU failure") + _patch_gpu_decode_to_raise(monkeypatch, synthetic) + + with pytest.warns(RuntimeWarning, match="GPU decode failed"): + result = read_geotiff_gpu(path) + + # Fallback returned the CPU-decoded data. Real cupy arrays expose + # ``.get()`` to copy back to host; the numpy stub returns a + # plain ndarray. + out = result.data + if hasattr(out, 'get'): + out = out.get() + np.testing.assert_array_equal(np.asarray(out), expected) + finally: + if inserted_stub: + sys.modules.pop('cupy', None) + sys.modules.pop('cupy.cuda', None) + importlib.invalidate_caches() + + +def test_strict_mode_reraises(tiled_tiff_path, monkeypatch): + """``gpu='strict'`` re-raises the original GPU exception.""" + inserted_stub = _ensure_cupy_stub() + try: + from xrspatial.geotiff import read_geotiff_gpu + + path, _ = tiled_tiff_path + + synthetic = RuntimeError("simulated GPU failure") + _patch_gpu_decode_to_raise(monkeypatch, synthetic) + + with pytest.raises(RuntimeError, match="simulated GPU failure"): + read_geotiff_gpu(path, gpu='strict') + finally: + if inserted_stub: + sys.modules.pop('cupy', None) + sys.modules.pop('cupy.cuda', None) + importlib.invalidate_caches() + + +def test_invalid_gpu_kwarg_rejected(tiled_tiff_path): + """An unknown ``gpu=`` value raises ``ValueError`` with a clear message.""" + inserted_stub = _ensure_cupy_stub() + try: + from xrspatial.geotiff import read_geotiff_gpu + + path, _ = tiled_tiff_path + + with pytest.raises(ValueError, match="gpu must be 'auto' or 'strict'"): + read_geotiff_gpu(path, gpu='loose') + finally: + if inserted_stub: + sys.modules.pop('cupy', None) + sys.modules.pop('cupy.cuda', None) + importlib.invalidate_caches() From 807391a9dc5714af8af4a9498c8b6e6953ae1e9d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 8 May 2026 10:04:59 -0700 Subject: [PATCH 2/3] Address Copilot review on #1523 - Plumb gpu='strict'/'auto' through the second GPU decode stage too. Previously only gpu_decode_tiles_from_file failures honored the mode; the gpu_decode_tiles call after CPU-mmap tile extraction silently fell back regardless. Strict now raises and auto warns from either stage. - Update the gpu kwarg docstring to describe the actual two-stage fallback (GDS GPU decode -> CPU-mmap + GPU decode -> full CPU + transfer). - Strengthen _ensure_cupy_stub: also stub when cupy is installed but CUDA is not available, and restore the original module after each test. - Add tests covering the second-stage strict re-raise and second-stage warning behaviour. --- xrspatial/geotiff/__init__.py | 38 +++-- .../tests/test_gpu_strict_fallback_1516.py | 141 ++++++++++++++---- 2 files changed, 142 insertions(+), 37 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index f91cef96..c48fe0ff 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1429,16 +1429,23 @@ def read_geotiff_gpu(source: str, *, Maximum allowed pixel count (width * height * samples). None uses the default (~1 billion). gpu : {'auto', 'strict'}, default 'auto' - Behaviour when the GPU decode path raises an exception. - - - ``'auto'``: emit a ``RuntimeWarning`` reporting the original - exception type and message, then fall back to the CPU path - and transfer the result onto the GPU. This preserves - backward-compatible behaviour while making GPU regressions - visible. - - ``'strict'``: re-raise the original exception so GPU bugs - surface immediately. Useful in tests and CI for the GPU - fast path. + Behaviour when any GPU decode stage raises an exception. + + The GPU pipeline has two stages: first ``gpu_decode_tiles_from_file`` + (GDS-style direct read), then ``gpu_decode_tiles`` over CPU-mmap + extracted tile bytes. Both stages still run on the GPU. The CPU + fallback (``read_to_array`` + ``cupy.asarray``) only fires after + both GPU stages have failed. + + - ``'auto'``: each GPU-stage failure emits a ``RuntimeWarning`` + reporting the original exception type and message, then falls + through to the next stage (CPU mmap re-decode for the first + failure, full CPU decode + GPU transfer for the second). This + preserves backward-compatible behaviour while making GPU + regressions visible. + - ``'strict'``: re-raise the original exception from either stage + so GPU bugs surface immediately. Useful in tests and CI for the + GPU fast path. Returns ------- @@ -1585,8 +1592,15 @@ def read_geotiff_gpu(source: str, *, compression, predictor, file_dtype, samples, byte_order=header.byte_order, ) - except (ValueError, Exception): - # Unsupported compression -- fall back to CPU then transfer + except Exception as e: + if gpu == 'strict': + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=2, + ) arr_cpu, _ = read_to_array(source, overview_level=overview_level) arr_gpu = cupy.asarray(arr_cpu) diff --git a/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py index 9b721a9d..cedf9002 100644 --- a/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py +++ b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py @@ -32,32 +32,50 @@ from .conftest import make_minimal_tiff -def _ensure_cupy_stub(): - """Install a numpy-backed ``cupy`` shim if cupy is unavailable. +_CUPY_ORIG_SENTINEL = object() +_cupy_saved = _CUPY_ORIG_SENTINEL +_cupy_cuda_saved = _CUPY_ORIG_SENTINEL - The CPU fallback path inside ``read_geotiff_gpu`` calls - ``cupy.asarray(arr_cpu)`` to upload the CPU result onto the GPU. - On CPU-only CI we replace cupy with a numpy passthrough so the - function still returns a DataArray we can assert on. + +def _cuda_actually_available() -> bool: + """Return True only if cupy + CUDA are usable on this host. + + cupy may be importable on a machine without a working CUDA runtime + (no driver, no device, ROCm-only, etc.). The CPU-fallback branch in + ``read_geotiff_gpu`` calls ``cupy.asarray`` which would then fail at + allocation time. Treat that case the same as cupy-not-installed. """ - if 'cupy' in sys.modules: - return False # real cupy already imported, leave it alone try: - import cupy # noqa: F401 - return False + import cupy except ImportError: - pass + return False + try: + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +def _ensure_cupy_stub() -> bool: + """Install a numpy-backed ``cupy`` shim if real cupy isn't usable. + + Replaces ``sys.modules['cupy']`` whenever cupy is missing OR cupy is + installed but CUDA isn't available. The original module (if any) is + saved so :func:`_restore_cupy` can put it back. + """ + global _cupy_saved, _cupy_cuda_saved + + if _cuda_actually_available(): + return False + + _cupy_saved = sys.modules.get('cupy', _CUPY_ORIG_SENTINEL) + _cupy_cuda_saved = sys.modules.get('cupy.cuda', _CUPY_ORIG_SENTINEL) stub = types.ModuleType('cupy') stub.ndarray = np.ndarray stub.asarray = np.asarray cuda_mod = types.ModuleType('cupy.cuda') - - def _is_available(): - return False - - cuda_mod.is_available = _is_available + cuda_mod.is_available = lambda: False stub.cuda = cuda_mod sys.modules['cupy'] = stub @@ -65,6 +83,22 @@ def _is_available(): return True +def _restore_cupy() -> None: + """Undo :func:`_ensure_cupy_stub`.""" + global _cupy_saved, _cupy_cuda_saved + for name, saved in ( + ('cupy', _cupy_saved), + ('cupy.cuda', _cupy_cuda_saved), + ): + if saved is _CUPY_ORIG_SENTINEL: + sys.modules.pop(name, None) + else: + sys.modules[name] = saved + _cupy_saved = _CUPY_ORIG_SENTINEL + _cupy_cuda_saved = _CUPY_ORIG_SENTINEL + importlib.invalidate_caches() + + @pytest.fixture def tiled_tiff_path(tmp_path): """A small tiled TIFF on disk that exercises the GPU tile path.""" @@ -92,6 +126,21 @@ def _boom(*args, **kwargs): ) +def _patch_both_gpu_stages_to_raise(monkeypatch, exc): + """Make both GPU decode stages raise ``exc`` to exercise the second handler.""" + from xrspatial.geotiff import _gpu_decode + + def _boom(*args, **kwargs): + raise exc + + monkeypatch.setattr( + _gpu_decode, 'gpu_decode_tiles_from_file', _boom, raising=True, + ) + monkeypatch.setattr( + _gpu_decode, 'gpu_decode_tiles', _boom, raising=True, + ) + + def test_default_mode_warns_on_gpu_failure(tiled_tiff_path, monkeypatch): """Default ``gpu='auto'`` warns and falls back to the CPU result.""" inserted_stub = _ensure_cupy_stub() @@ -115,9 +164,7 @@ def test_default_mode_warns_on_gpu_failure(tiled_tiff_path, monkeypatch): np.testing.assert_array_equal(np.asarray(out), expected) finally: if inserted_stub: - sys.modules.pop('cupy', None) - sys.modules.pop('cupy.cuda', None) - importlib.invalidate_caches() + _restore_cupy() def test_strict_mode_reraises(tiled_tiff_path, monkeypatch): @@ -135,9 +182,55 @@ def test_strict_mode_reraises(tiled_tiff_path, monkeypatch): read_geotiff_gpu(path, gpu='strict') finally: if inserted_stub: - sys.modules.pop('cupy', None) - sys.modules.pop('cupy.cuda', None) - importlib.invalidate_caches() + _restore_cupy() + + +def test_strict_mode_reraises_second_stage(tiled_tiff_path, monkeypatch): + """``gpu='strict'`` re-raises if the second-stage GPU decode fails too. + + Regression for the case where ``gpu_decode_tiles_from_file`` and the + follow-up ``gpu_decode_tiles`` both fail. Previously the second + failure was caught by an unconditional ``except (ValueError, Exception)`` + that fell back to CPU regardless of mode. + """ + inserted_stub = _ensure_cupy_stub() + try: + from xrspatial.geotiff import read_geotiff_gpu + + path, _ = tiled_tiff_path + + synthetic = RuntimeError("simulated second-stage GPU failure") + _patch_both_gpu_stages_to_raise(monkeypatch, synthetic) + + with pytest.raises(RuntimeError, + match="simulated second-stage GPU failure"): + read_geotiff_gpu(path, gpu='strict') + finally: + if inserted_stub: + _restore_cupy() + + +def test_default_mode_warns_on_second_stage_failure(tiled_tiff_path, monkeypatch): + """``gpu='auto'`` warns once per stage failure and falls back to CPU.""" + inserted_stub = _ensure_cupy_stub() + try: + from xrspatial.geotiff import read_geotiff_gpu + + path, expected = tiled_tiff_path + + synthetic = RuntimeError("simulated second-stage GPU failure") + _patch_both_gpu_stages_to_raise(monkeypatch, synthetic) + + with pytest.warns(RuntimeWarning, match="GPU decode failed"): + result = read_geotiff_gpu(path) + + out = result.data + if hasattr(out, 'get'): + out = out.get() + np.testing.assert_array_equal(np.asarray(out), expected) + finally: + if inserted_stub: + _restore_cupy() def test_invalid_gpu_kwarg_rejected(tiled_tiff_path): @@ -152,6 +245,4 @@ def test_invalid_gpu_kwarg_rejected(tiled_tiff_path): read_geotiff_gpu(path, gpu='loose') finally: if inserted_stub: - sys.modules.pop('cupy', None) - sys.modules.pop('cupy.cuda', None) - importlib.invalidate_caches() + _restore_cupy() From 61bdeba70a9850f63d4088d18c93926e75d5b25b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 8 May 2026 10:23:55 -0700 Subject: [PATCH 3/3] Address bug-hunt review on #1523 - Document that stripped layouts and sparse-tile files route directly to the CPU reader before any GPU decode stage runs, so the gpu kwarg does not affect them. cupy.asarray upload failures in those paths propagate unchanged. - Tighten test_default_mode_warns_on_second_stage_failure to assert exactly two RuntimeWarning records (one per stage). The previous pytest.warns context only verified that >=1 matching warning fired, so a regression where one of the two handlers stopped warning would not have been caught. --- xrspatial/geotiff/__init__.py | 5 ++++ .../tests/test_gpu_strict_fallback_1516.py | 23 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index c48fe0ff..11e740d0 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1447,6 +1447,11 @@ def read_geotiff_gpu(source: str, *, so GPU bugs surface immediately. Useful in tests and CI for the GPU fast path. + Stripped layouts and sparse-tile files route directly to the CPU + reader before either GPU decode stage runs, so the ``gpu`` kwarg + does not affect them. A failure inside the subsequent + ``cupy.asarray(...)`` upload propagates unchanged in both modes. + Returns ------- xr.DataArray diff --git a/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py index cedf9002..6b06c4a2 100644 --- a/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py +++ b/xrspatial/geotiff/tests/test_gpu_strict_fallback_1516.py @@ -211,7 +211,15 @@ def test_strict_mode_reraises_second_stage(tiled_tiff_path, monkeypatch): def test_default_mode_warns_on_second_stage_failure(tiled_tiff_path, monkeypatch): - """``gpu='auto'`` warns once per stage failure and falls back to CPU.""" + """``gpu='auto'`` warns once per stage failure and falls back to CPU. + + Both GPU decode stages are forced to raise, so the user sees two + distinct ``RuntimeWarning`` records (one per stage) before the CPU + fallback fires. Asserting the exact count guards against a + regression where one of the two handlers stops warning. + """ + import warnings as _warnings + inserted_stub = _ensure_cupy_stub() try: from xrspatial.geotiff import read_geotiff_gpu @@ -221,9 +229,20 @@ def test_default_mode_warns_on_second_stage_failure(tiled_tiff_path, monkeypatch synthetic = RuntimeError("simulated second-stage GPU failure") _patch_both_gpu_stages_to_raise(monkeypatch, synthetic) - with pytest.warns(RuntimeWarning, match="GPU decode failed"): + with _warnings.catch_warnings(record=True) as records: + _warnings.simplefilter("always") result = read_geotiff_gpu(path) + gpu_warnings = [ + w for w in records + if issubclass(w.category, RuntimeWarning) + and "GPU decode failed" in str(w.message) + ] + assert len(gpu_warnings) == 2, ( + f"expected one warning per GPU stage; got {len(gpu_warnings)}: " + f"{[str(w.message) for w in gpu_warnings]}" + ) + out = result.data if hasattr(out, 'get'): out = out.get()