diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index f3fef41f..19e5331a 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -13,6 +13,7 @@ """ from __future__ import annotations +import math import warnings import numpy as np @@ -1396,6 +1397,92 @@ def _read(): return _read() +def _gpu_decode_single_band_tiles( + source, lazy_data, offsets, byte_counts, + tw, th, width, height, + compression, predictor, file_dtype, + *, + byte_order: str, + gpu: str, +): + """Decode one band's tile sequence into a 2-D ``(H, W)`` cupy array. + + Helper for the ``PlanarConfiguration=2`` GPU path: the existing + ``gpu_decode_tiles`` / ``gpu_decode_tiles_from_file`` kernels assume + a single chunky tile sequence with ``bytes_per_pixel = itemsize * + samples``. For planar=2 each band has its own list of tiles, so we + invoke those kernels once per band with ``samples=1`` and stack the + resulting 2-D arrays into ``(H, W, samples)`` afterwards. + + Mirrors the two-stage GPU pipeline in ``read_geotiff_gpu`` -- GDS + first, then CPU-extracted-tiles GPU decode. ``lazy_data`` is a + zero-arg callable that returns the full file bytes; it caches its + result so the first band that needs the stage-2 fallback pays the + ``read_all()``, and subsequent bands reuse the same buffer. When + every band's GDS path succeeds the file is never read at all. + Sparse tiles are not expected here; the caller routes sparse files + to the CPU path. + + Auto-mode semantics: a stage-1 GDS failure warns and falls through + to stage 2; a stage-2 failure warns and returns ``None`` so the + caller can trigger a whole-image CPU fallback (a per-band CPU + decode would require a single-band CPU path keyed off planar + config, which doesn't exist). Strict mode re-raises from either + stage. + """ + from ._gpu_decode import gpu_decode_tiles, gpu_decode_tiles_from_file + + arr_gpu = None + try: + arr_gpu = gpu_decode_tiles_from_file( + source, offsets, byte_counts, + tw, th, width, height, + compression, predictor, file_dtype, 1, + byte_order=byte_order, + ) + except Exception as e: + if gpu == 'strict': + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=3, + ) + arr_gpu = None + + if arr_gpu is None: + shared_data = lazy_data() + compressed_tiles = [ + bytes(shared_data[offsets[i]:offsets[i] + byte_counts[i]]) + for i in range(len(offsets)) + ] + try: + arr_gpu = gpu_decode_tiles( + compressed_tiles, + tw, th, width, height, + compression, predictor, file_dtype, 1, + byte_order=byte_order, + ) + except Exception as e: + if gpu == 'strict': + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=3, + ) + return None + + if arr_gpu.ndim != 2 or arr_gpu.shape != (height, width): + raise RuntimeError( + f"single-band GPU tile decode produced shape " + f"{arr_gpu.shape}, expected ({height}, {width})" + ) + return arr_gpu + + def read_geotiff_gpu(source: str, *, dtype=None, overview_level: int | None = None, @@ -1519,7 +1606,9 @@ def read_geotiff_gpu(source: str, *, target = np.dtype(dtype) _validate_dtype_cast(np.dtype(str(arr_gpu.dtype)), target) arr_gpu = arr_gpu.astype(target) - # Mirror the tiled branch: 3-D (y, x, band) for multi-band reads. + # Multi-band stripped reads come back as (y, x, band); mirror + # the tiled branch so dims line up with ndim. Single-band stays + # 2-D ('y', 'x'). if arr_gpu.ndim == 3: dims = ['y', 'x', 'band'] coords['band'] = np.arange(arr_gpu.shape[2]) @@ -1533,6 +1622,7 @@ def read_geotiff_gpu(source: str, *, compression = ifd.compression predictor = ifd.predictor samples = ifd.samples_per_pixel + planar = ifd.planar_config tw = ifd.tile_width th = ifd.tile_height width = ifd.width @@ -1558,7 +1648,80 @@ def read_geotiff_gpu(source: str, *, # Sparse tiles (byte_count == 0) are unsupported on the GPU pipeline; # the CPU reader fills them with nodata and copies onto the GPU. has_sparse_tile = any(bc == 0 for bc in byte_counts) - if has_sparse_tile: + + # PlanarConfiguration=2 (separate bands): each band has its own list + # of tiles back-to-back in TileOffsets / TileByteCounts. The GPU + # tile-assembly kernel assumes a single chunky tile sequence with + # bytes_per_pixel = itemsize * samples, so it cannot handle planar=2 + # directly. Decode each band's tile slab as a single-band image, then + # stack into (H, W, samples). For planar=1 (chunky) the existing + # single-pass kernel is correct. Sparse-tile files always route to + # the CPU reader regardless of planar config. + if planar == 2 and samples > 1 and not has_sparse_tile: + tiles_across = math.ceil(width / tw) + tiles_down = math.ceil(height / th) + tiles_per_band = tiles_across * tiles_down + # validate_tile_layout already requires len(offsets) >= the grid; + # accept extra trailing entries (some writers emit padding) and + # only consume the first tiles_per_band * samples. + expected_min = tiles_per_band * samples + if len(offsets) < expected_min: + raise ValueError( + f"PlanarConfiguration=2 expects at least {expected_min} " + f"TileOffsets entries ({tiles_across} x {tiles_down} x " + f"{samples} bands), got {len(offsets)}" + ) + # Lazy shared file read for the per-band stage-2 fallback. When + # every band's GDS path succeeds, _read_once is never called + # and we skip the read_all() entirely; when any band falls + # back, the first call materialises the bytes and subsequent + # bands reuse the same buffer (so N bands cost at most one + # read_all(), not N). + _shared_data_cache: list = [] + + def _read_once(): + if not _shared_data_cache: + src2 = _FileSource(source) + try: + _shared_data_cache.append(src2.read_all()) + finally: + src2.close() + return _shared_data_cache[0] + + band_arrays = [] + cpu_fallback_needed = False + for band_idx in range(samples): + b0 = band_idx * tiles_per_band + b1 = b0 + tiles_per_band + band_offsets = list(offsets[b0:b1]) + band_byte_counts = list(byte_counts[b0:b1]) + band_arr = _gpu_decode_single_band_tiles( + source, _read_once, band_offsets, band_byte_counts, + tw, th, width, height, + compression, predictor, file_dtype, + byte_order=header.byte_order, + gpu=gpu, + ) + if band_arr is None: + # Auto-mode signal: stage-2 GPU decode failed for this + # band. There's no per-band CPU decode path, so fall + # back to a whole-image CPU read + GPU upload, matching + # the chunky path's auto-mode semantics. + cpu_fallback_needed = True + break + band_arrays.append(band_arr) + if cpu_fallback_needed: + arr_cpu, _ = read_to_array(source, overview_level=overview_level) + arr_gpu = cupy.asarray(arr_cpu) + else: + arr_gpu = cupy.stack(band_arrays, axis=2) + if arr_gpu.shape != (height, width, samples): + raise RuntimeError( + f"planar=2 GPU assembly produced shape " + f"{arr_gpu.shape}, expected " + f"({height}, {width}, {samples})" + ) + elif has_sparse_tile: arr_cpu, _ = read_to_array(source, overview_level=overview_level) arr_gpu = cupy.asarray(arr_cpu) else: @@ -1615,6 +1778,19 @@ def read_geotiff_gpu(source: str, *, arr_cpu, _ = read_to_array(source, overview_level=overview_level) arr_gpu = cupy.asarray(arr_cpu) + # Multi-band tiled output must be (H, W, samples) regardless of planar + # config -- catch any shape regression in the kernels before we attach + # dims/coords below. Plain `raise` rather than `assert` so the check + # survives `python -O`. + if samples > 1: + if (arr_gpu.shape[:2] != (height, width) + or arr_gpu.shape[2] != samples): + raise RuntimeError( + f"GPU multi-band tile assembly produced shape " + f"{arr_gpu.shape}, expected " + f"({height}, {width}, {samples})" + ) + if dtype is not None: target = np.dtype(dtype) _validate_dtype_cast(np.dtype(str(arr_gpu.dtype)), target) diff --git a/xrspatial/geotiff/tests/test_planar_multiband.py b/xrspatial/geotiff/tests/test_planar_multiband.py new file mode 100644 index 00000000..7162fb45 --- /dev/null +++ b/xrspatial/geotiff/tests/test_planar_multiband.py @@ -0,0 +1,195 @@ +"""Tests for ``PlanarConfiguration`` handling in CPU and GPU multi-band reads. + +Covers two bugs surfaced by the geotiff accuracy audit: + +A2 (HIGH) + GPU tiled multi-band reads returned wrong pixel values because the + tile-assembly kernel iterated a single ``tile_offsets`` list with + ``bytes_per_pixel = itemsize * samples``. Planar=2 files (one tile + sequence per band, contiguous in TileOffsets) silently produced + garbage because the kernel had no notion of separate band slabs. + +A3 (MEDIUM) + The CPU stripped reader returned planar=2 multi-band shape with + axes mismatched against the assigned dims. Already fixed in the CPU + path; this module pins the contract so future refactors do not + regress it. + +Each combination -- planar config x layout x band count x dtype -- +round-trips through the writer (or tifffile, where we want to control +planar config and layout independently) and asserts that the CPU read, +the GPU read, and the original numpy buffer all agree pixel-for-pixel. + +GPU tests skip cleanly when CUDA is unavailable. +""" +from __future__ import annotations + +import importlib.util +import os +import tempfile + +import numpy as np +import pytest + + +tifffile = pytest.importorskip("tifffile") + + +def _gpu_available() -> bool: + """True if cupy is importable and CUDA is initialised.""" + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, + reason="cupy + CUDA required", +) + + +def _make_data(bands: int, height: int, width: int, dtype) -> np.ndarray: + """Deterministic random multi-band raster shaped (bands, height, width).""" + rng = np.random.RandomState(0xA2A3 + bands * 1000 + height + np.dtype(dtype).itemsize) + info = np.iinfo(dtype) + high = min(int(info.max), 60_000) + 1 + return rng.randint(0, high, size=(bands, height, width)).astype(dtype) + + +def _write_tiff(path: str, data: np.ndarray, *, planar: str, tiled: bool): + """Write *data* (bands, height, width) with the requested planar config + layout. + + tifffile defaults: when bands are first axis, planar='separate'. + Pass planar='contig' to interleave samples (data is transposed to + (height, width, bands) before write). + """ + kwargs = {"photometric": "rgb" if data.shape[0] == 3 else "minisblack"} + if data.shape[0] not in (1, 3): + # tifffile rejects 'rgb' for non-3-band; use a generic photometric + # tag and tell it to skip extra-sample warnings. + kwargs = {"photometric": "minisblack"} + if tiled: + kwargs["tile"] = (32, 32) + if planar == "separate": + kwargs["planarconfig"] = "separate" + tifffile.imwrite(path, data, **kwargs) + elif planar == "contig": + kwargs["planarconfig"] = "contig" + tifffile.imwrite(path, np.transpose(data, (1, 2, 0)), **kwargs) + else: + raise ValueError(f"unknown planar={planar!r}") + + +@pytest.mark.parametrize("planar", ["separate", "contig"]) +@pytest.mark.parametrize("tiled", [True, False]) +@pytest.mark.parametrize("bands", [2, 3, 4]) +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16]) +def test_cpu_planar_multiband(planar, tiled, bands, dtype): + """CPU ``open_geotiff`` returns (y, x, band) for every planar+layout combo.""" + from xrspatial.geotiff import open_geotiff + + height, width = 64, 96 + data = _make_data(bands, height, width, dtype) + expected = np.transpose(data, (1, 2, 0)) + + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "x.tif") + _write_tiff(path, data, planar=planar, tiled=tiled) + out = open_geotiff(path) + + assert out.dims == ("y", "x", "band"), ( + f"got dims {out.dims} for planar={planar} tiled={tiled} " + f"bands={bands} dtype={np.dtype(dtype).name}" + ) + assert out.shape == (height, width, bands) + arr = np.asarray(out.data) + assert arr.dtype == np.dtype(dtype) + np.testing.assert_array_equal(arr, expected) + + +@_gpu_only +@pytest.mark.parametrize("planar", ["separate", "contig"]) +@pytest.mark.parametrize("tiled", [True, False]) +@pytest.mark.parametrize("bands", [2, 3, 4]) +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16]) +def test_gpu_matches_cpu_planar_multiband(planar, tiled, bands, dtype): + """GPU read agrees with CPU read AND with the source array.""" + from xrspatial.geotiff import open_geotiff, read_geotiff_gpu + + height, width = 64, 96 + data = _make_data(bands, height, width, dtype) + expected = np.transpose(data, (1, 2, 0)) + + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "x.tif") + _write_tiff(path, data, planar=planar, tiled=tiled) + cpu = np.asarray(open_geotiff(path).data) + gpu_da = read_geotiff_gpu(path) + + assert gpu_da.dims == ("y", "x", "band") + assert gpu_da.shape == (height, width, bands) + gpu = gpu_da.data.get() + + np.testing.assert_array_equal(cpu, expected) + np.testing.assert_array_equal(gpu, expected) + np.testing.assert_array_equal(gpu, cpu) + + +@pytest.mark.parametrize("tiled", [True, False]) +def test_cpu_singleband_sanity(tiled): + """Single-band reads stay 2-D regardless of layout.""" + from xrspatial.geotiff import open_geotiff + + rng = np.random.RandomState(7) + data = rng.randint(0, 200, size=(48, 80)).astype(np.uint8) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "s.tif") + kwargs = {"photometric": "minisblack"} + if tiled: + kwargs["tile"] = (32, 32) + tifffile.imwrite(path, data, **kwargs) + out = open_geotiff(path) + assert out.dims == ("y", "x") + assert out.shape == (48, 80) + np.testing.assert_array_equal(np.asarray(out.data), data) + + +@_gpu_only +@pytest.mark.parametrize("tiled", [True, False]) +def test_gpu_singleband_sanity(tiled): + """Single-band GPU reads stay 2-D regardless of layout.""" + from xrspatial.geotiff import read_geotiff_gpu + + rng = np.random.RandomState(7) + data = rng.randint(0, 200, size=(48, 80)).astype(np.uint8) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "s.tif") + kwargs = {"photometric": "minisblack"} + if tiled: + kwargs["tile"] = (32, 32) + tifffile.imwrite(path, data, **kwargs) + out = read_geotiff_gpu(path) + assert out.dims == ("y", "x") + assert out.shape == (48, 80) + np.testing.assert_array_equal(out.data.get(), data) + + +def test_a3_repro_stripped_planar_separate(): + """Spec-level guard for A3: stripped planar=2 must yield (y, x, band).""" + from xrspatial.geotiff import open_geotiff + + data = _make_data(3, 64, 96, np.uint8) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "sp.tif") + tifffile.imwrite(path, data, photometric="rgb") # default planar=2 + out = open_geotiff(path) + assert out.dims == ("y", "x", "band") + assert out.shape == (64, 96, 3), ( + f"got shape {out.shape} for dims {out.dims} -- A3 regressed" + )